src/cpu/mips/vm/sharedRuntime_mips_64.cpp

changeset 1
2d8a650513c2
child 5
7e3d3484b275
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/cpu/mips/vm/sharedRuntime_mips_64.cpp	Fri Apr 29 00:06:10 2016 +0800
     1.3 @@ -0,0 +1,4483 @@
     1.4 +/*
     1.5 + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
     1.6 + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
     1.7 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.8 + *
     1.9 + * This code is free software; you can redistribute it and/or modify it
    1.10 + * under the terms of the GNU General Public License version 2 only, as
    1.11 + * published by the Free Software Foundation.
    1.12 + *
    1.13 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.14 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.15 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.16 + * version 2 for more details (a copy is included in the LICENSE file that
    1.17 + * accompanied this code).
    1.18 + *
    1.19 + * You should have received a copy of the GNU General Public License version
    1.20 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.21 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.22 + *
    1.23 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    1.24 + * or visit www.oracle.com if you need additional information or have any
    1.25 + * questions.
    1.26 + *
    1.27 + */
    1.28 +
    1.29 +#include "precompiled.hpp"
    1.30 +#include "asm/macroAssembler.hpp"
    1.31 +#include "asm/macroAssembler.inline.hpp"
    1.32 +#include "code/debugInfoRec.hpp"
    1.33 +#include "code/icBuffer.hpp"
    1.34 +#include "code/vtableStubs.hpp"
    1.35 +#include "interpreter/interpreter.hpp"
    1.36 +#include "oops/compiledICHolder.hpp"
    1.37 +#include "prims/jvmtiRedefineClassesTrace.hpp"
    1.38 +#include "runtime/sharedRuntime.hpp"
    1.39 +#include "runtime/vframeArray.hpp"
    1.40 +#include "vmreg_mips.inline.hpp"
    1.41 +#ifdef COMPILER1
    1.42 +#include "c1/c1_Runtime1.hpp"
    1.43 +#endif
    1.44 +#ifdef COMPILER2
    1.45 +#include "opto/runtime.hpp"
    1.46 +#endif
    1.47 +
    1.48 +#define __ masm->
    1.49 +const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
    1.50 +
    1.51 +class RegisterSaver {
    1.52 +	enum { FPU_regs_live = 32 };
    1.53 +	// Capture info about frame layout
    1.54 +	enum layout { 
    1.55 +#define DEF_LAYOUT_OFFS(regname)  regname ## _off,  regname ## H_off,
    1.56 +		DEF_LAYOUT_OFFS(for_16_bytes_aligned)
    1.57 +		DEF_LAYOUT_OFFS(fpr0)
    1.58 +		DEF_LAYOUT_OFFS(fpr1)
    1.59 +		DEF_LAYOUT_OFFS(fpr2)
    1.60 +		DEF_LAYOUT_OFFS(fpr3)
    1.61 +		DEF_LAYOUT_OFFS(fpr4)
    1.62 +		DEF_LAYOUT_OFFS(fpr5)
    1.63 +		DEF_LAYOUT_OFFS(fpr6)
    1.64 +		DEF_LAYOUT_OFFS(fpr7)
    1.65 +		DEF_LAYOUT_OFFS(fpr8)
    1.66 +		DEF_LAYOUT_OFFS(fpr9)
    1.67 +		DEF_LAYOUT_OFFS(fpr10)
    1.68 +		DEF_LAYOUT_OFFS(fpr11)
    1.69 +		DEF_LAYOUT_OFFS(fpr12)
    1.70 +		DEF_LAYOUT_OFFS(fpr13)
    1.71 +		DEF_LAYOUT_OFFS(fpr14)
    1.72 +		DEF_LAYOUT_OFFS(fpr15)
    1.73 +		DEF_LAYOUT_OFFS(fpr16)
    1.74 +		DEF_LAYOUT_OFFS(fpr17)
    1.75 +		DEF_LAYOUT_OFFS(fpr18)
    1.76 +		DEF_LAYOUT_OFFS(fpr19)
    1.77 +		DEF_LAYOUT_OFFS(fpr20)
    1.78 +		DEF_LAYOUT_OFFS(fpr21)
    1.79 +		DEF_LAYOUT_OFFS(fpr22)
    1.80 +		DEF_LAYOUT_OFFS(fpr23)
    1.81 +		DEF_LAYOUT_OFFS(fpr24)
    1.82 +		DEF_LAYOUT_OFFS(fpr25)
    1.83 +		DEF_LAYOUT_OFFS(fpr26)
    1.84 +		DEF_LAYOUT_OFFS(fpr27)
    1.85 +		DEF_LAYOUT_OFFS(fpr28)
    1.86 +		DEF_LAYOUT_OFFS(fpr29)
    1.87 +		DEF_LAYOUT_OFFS(fpr30)
    1.88 +		DEF_LAYOUT_OFFS(fpr31)
    1.89 +
    1.90 +		DEF_LAYOUT_OFFS(v0)
    1.91 +		DEF_LAYOUT_OFFS(v1)
    1.92 +		DEF_LAYOUT_OFFS(a0)
    1.93 +		DEF_LAYOUT_OFFS(a1)
    1.94 +		DEF_LAYOUT_OFFS(a2)
    1.95 +		DEF_LAYOUT_OFFS(a3)
    1.96 +		DEF_LAYOUT_OFFS(a4)
    1.97 +		DEF_LAYOUT_OFFS(a5)
    1.98 +		DEF_LAYOUT_OFFS(a6)
    1.99 +		DEF_LAYOUT_OFFS(a7)
   1.100 +		DEF_LAYOUT_OFFS(t0)
   1.101 +		DEF_LAYOUT_OFFS(t1)
   1.102 +		DEF_LAYOUT_OFFS(t2)
   1.103 +		DEF_LAYOUT_OFFS(t3)
   1.104 +		DEF_LAYOUT_OFFS(s0)
   1.105 +		DEF_LAYOUT_OFFS(s1)
   1.106 +		DEF_LAYOUT_OFFS(s2)
   1.107 +		DEF_LAYOUT_OFFS(s3)
   1.108 +		DEF_LAYOUT_OFFS(s4)
   1.109 +		DEF_LAYOUT_OFFS(s5)
   1.110 +		DEF_LAYOUT_OFFS(s6)
   1.111 +		DEF_LAYOUT_OFFS(s7)
   1.112 +		DEF_LAYOUT_OFFS(t8)
   1.113 +		DEF_LAYOUT_OFFS(t9)
   1.114 +
   1.115 +		DEF_LAYOUT_OFFS(gp)
   1.116 +		DEF_LAYOUT_OFFS(fp)
   1.117 +		DEF_LAYOUT_OFFS(return)
   1.118 +/*
   1.119 +		fpr0_off, fpr1_off,
   1.120 +		fpr2_off, fpr3_off,
   1.121 +		fpr4_off, fpr5_off,
   1.122 +		fpr6_off, fpr7_off,
   1.123 +		fpr8_off, fpr9_off,
   1.124 +		fpr10_off, fpr11_off,
   1.125 +		fpr12_off, fpr13_off,
   1.126 +		fpr14_off, fpr15_off,
   1.127 +		fpr16_off, fpr17_off,
   1.128 +		fpr18_off, fpr19_off,
   1.129 +		fpr20_off, fpr21_off,
   1.130 +		fpr22_off, fpr23_off,
   1.131 +		fpr24_off, fpr25_off,
   1.132 +		fpr26_off, fpr27_off,
   1.133 +		fpr28_off, fpr29_off,
   1.134 +		fpr30_off, fpr31_off,
   1.135 +
   1.136 +		v0_off, v1_off,
   1.137 +		a0_off, a1_off,
   1.138 +		a2_off, a3_off,
   1.139 +		a4_off, a5_off,
   1.140 +		a6_off, a7_off,
   1.141 +		t0_off, t1_off, t2_off, t3_off,
   1.142 +		s0_off, s1_off, s2_off, s3_off, s4_off, s5_off, s6_off, s7_off,
   1.143 +		t8_off, t9_off,
   1.144 +	
   1.145 +		gp_off, fp_off,
   1.146 +		return_off,
   1.147 +*/
   1.148 +		reg_save_size
   1.149 +	};
   1.150 +
   1.151 +  public:
   1.152 +
   1.153 +	static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors =false );
   1.154 +	static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
   1.155 +	//FIXME, I have no idea which register to use
   1.156 +	static int raOffset(void) { return return_off / 2; }
   1.157 +	//Rmethod
   1.158 +	static int methodOffset(void) { return s3_off / 2; }
   1.159 +
   1.160 +	static int v0Offset(void) { return v0_off / 2; }
   1.161 +	static int v1Offset(void) { return v1_off / 2; }
   1.162 +
   1.163 +	static int fpResultOffset(void) { return fpr0_off / 2; }
   1.164 +
   1.165 +	// During deoptimization only the result register need to be restored
   1.166 +	// all the other values have already been extracted.
   1.167 +
   1.168 +	static void restore_result_registers(MacroAssembler* masm);
   1.169 +};
   1.170 +
   1.171 +OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors ) {
   1.172 +
   1.173 +/*
   1.174 +  int frame_words = reg_save_size + additional_frame_words;
   1.175 +  int frame_size_in_bytes =  frame_words * wordSize;
   1.176 +  *total_frame_words = frame_words;
   1.177 +  */
   1.178 +  // Always make the frame size 16-byte aligned
   1.179 +  int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
   1.180 +                                     reg_save_size*BytesPerInt, 16);
   1.181 +  // OopMap frame size is in compiler stack slots (jint's) not bytes or words
   1.182 +  int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
   1.183 +  // The caller will allocate additional_frame_words
   1.184 +  int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
   1.185 +  // CodeBlob frame size is in words.
   1.186 +  int frame_size_in_words = frame_size_in_bytes / wordSize;
   1.187 +  *total_frame_words = frame_size_in_words;
   1.188 +
   1.189 +  // save registers, fpu state, and flags  
   1.190 +  // We assume caller has already has return address slot on the stack
   1.191 +  // We push epb twice in this sequence because we want the real ebp
   1.192 +  // to be under the return like a normal enter and we want to use pushad
   1.193 +  // We push by hand instead of pusing push
   1.194 +
   1.195 +  __ daddiu(SP, SP, - reg_save_size * jintSize);
   1.196 +
   1.197 +  __ sdc1(F0, SP, fpr0_off * jintSize); __ sdc1(F1, SP, fpr1_off * jintSize);
   1.198 +  __ sdc1(F2, SP, fpr2_off * jintSize); __ sdc1(F3, SP, fpr3_off * jintSize);
   1.199 +  __ sdc1(F4, SP, fpr4_off * jintSize); __ sdc1(F5, SP, fpr5_off * jintSize);
   1.200 +  __ sdc1(F6, SP, fpr6_off * jintSize);	__ sdc1(F7, SP, fpr7_off * jintSize);
   1.201 +  __ sdc1(F8, SP, fpr8_off * jintSize);	__ sdc1(F9, SP, fpr9_off * jintSize);
   1.202 +  __ sdc1(F10, SP, fpr10_off * jintSize);	__ sdc1(F11, SP, fpr11_off * jintSize);
   1.203 +  __ sdc1(F12, SP, fpr12_off * jintSize);	__ sdc1(F13, SP, fpr13_off * jintSize);
   1.204 +  __ sdc1(F14, SP, fpr14_off * jintSize);	__ sdc1(F15, SP, fpr15_off * jintSize);
   1.205 +  __ sdc1(F16, SP, fpr16_off * jintSize);	__ sdc1(F17, SP, fpr17_off * jintSize);
   1.206 +  __ sdc1(F18, SP, fpr18_off * jintSize);	__ sdc1(F19, SP, fpr19_off * jintSize);
   1.207 +  __ sdc1(F20, SP, fpr20_off * jintSize);	__ sdc1(F21, SP, fpr21_off * jintSize);
   1.208 +  __ sdc1(F22, SP, fpr22_off * jintSize);	__ sdc1(F23, SP, fpr23_off * jintSize);
   1.209 +  __ sdc1(F24, SP, fpr24_off * jintSize);	__ sdc1(F25, SP, fpr25_off * jintSize);
   1.210 +  __ sdc1(F26, SP, fpr26_off * jintSize);	__ sdc1(F27, SP, fpr27_off * jintSize);
   1.211 +  __ sdc1(F28, SP, fpr28_off * jintSize);	__ sdc1(F29, SP, fpr29_off * jintSize);
   1.212 +  __ sdc1(F30, SP, fpr30_off * jintSize);	__ sdc1(F31, SP, fpr31_off * jintSize);
   1.213 +  __ sd(V0, SP, v0_off * jintSize);	__ sd(V1, SP, v1_off * jintSize);
   1.214 +  __ sd(A0, SP, a0_off * jintSize);	__ sd(A1, SP, a1_off * jintSize);
   1.215 +  __ sd(A2, SP, a2_off * jintSize);	__ sd(A3, SP, a3_off * jintSize);
   1.216 +  __ sd(A4, SP, a4_off * jintSize);	__ sd(A5, SP, a5_off * jintSize);
   1.217 +  __ sd(A6, SP, a6_off * jintSize);	__ sd(A7, SP, a7_off * jintSize);
   1.218 +  __ sd(T0, SP, t0_off * jintSize);
   1.219 +  __ sd(T1, SP, t1_off * jintSize);
   1.220 +  __ sd(T2, SP, t2_off * jintSize);
   1.221 +  __ sd(T3, SP, t3_off * jintSize);
   1.222 +  __ sd(S0, SP, s0_off * jintSize);
   1.223 +  __ sd(S1, SP, s1_off * jintSize);
   1.224 +  __ sd(S2, SP, s2_off * jintSize);
   1.225 +  __ sd(S3, SP, s3_off * jintSize);
   1.226 +  __ sd(S4, SP, s4_off * jintSize);
   1.227 +  __ sd(S5, SP, s5_off * jintSize);
   1.228 +  __ sd(S6, SP, s6_off * jintSize);
   1.229 +  __ sd(S7, SP, s7_off * jintSize);
   1.230 +
   1.231 +  __ sd(T8, SP, t8_off * jintSize);
   1.232 +  __ sd(T9, SP, t9_off * jintSize);
   1.233 +
   1.234 +  __ sd(GP, SP, gp_off * jintSize);
   1.235 +  __ sd(FP, SP, fp_off * jintSize);
   1.236 +  __ sd(RA, SP, return_off * jintSize);
   1.237 +  __ daddi(FP, SP, fp_off * jintSize);
   1.238 +
   1.239 +  OopMapSet *oop_maps = new OopMapSet();
   1.240 +  //OopMap* map =  new OopMap( frame_words, 0 );  
   1.241 +  OopMap* map =  new OopMap( frame_size_in_slots, 0 );  
   1.242 +
   1.243 +
   1.244 +//#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_words)
   1.245 +#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots)
   1.246 +  map->set_callee_saved(STACK_OFFSET( v0_off), V0->as_VMReg());
   1.247 +  map->set_callee_saved(STACK_OFFSET( v1_off), V1->as_VMReg());
   1.248 +  map->set_callee_saved(STACK_OFFSET( a0_off), A0->as_VMReg());
   1.249 +  map->set_callee_saved(STACK_OFFSET( a1_off), A1->as_VMReg());
   1.250 +  map->set_callee_saved(STACK_OFFSET( a2_off), A2->as_VMReg());
   1.251 +  map->set_callee_saved(STACK_OFFSET( a3_off), A3->as_VMReg());
   1.252 +  map->set_callee_saved(STACK_OFFSET( a4_off), A4->as_VMReg());
   1.253 +  map->set_callee_saved(STACK_OFFSET( a5_off), A5->as_VMReg());
   1.254 +  map->set_callee_saved(STACK_OFFSET( a6_off), A6->as_VMReg());
   1.255 +  map->set_callee_saved(STACK_OFFSET( a7_off), A7->as_VMReg());
   1.256 +  map->set_callee_saved(STACK_OFFSET( t0_off), T0->as_VMReg());
   1.257 +  map->set_callee_saved(STACK_OFFSET( t1_off), T1->as_VMReg());
   1.258 +  map->set_callee_saved(STACK_OFFSET( t2_off), T2->as_VMReg());
   1.259 +  map->set_callee_saved(STACK_OFFSET( t3_off), T3->as_VMReg());
   1.260 +  map->set_callee_saved(STACK_OFFSET( s0_off), S0->as_VMReg());
   1.261 +  map->set_callee_saved(STACK_OFFSET( s1_off), S1->as_VMReg());
   1.262 +  map->set_callee_saved(STACK_OFFSET( s2_off), S2->as_VMReg());
   1.263 +  map->set_callee_saved(STACK_OFFSET( s3_off), S3->as_VMReg());
   1.264 +  map->set_callee_saved(STACK_OFFSET( s4_off), S4->as_VMReg());
   1.265 +  map->set_callee_saved(STACK_OFFSET( s5_off), S5->as_VMReg());
   1.266 +  map->set_callee_saved(STACK_OFFSET( s6_off), S6->as_VMReg());
   1.267 +  map->set_callee_saved(STACK_OFFSET( s7_off), S7->as_VMReg());
   1.268 +  map->set_callee_saved(STACK_OFFSET( t8_off), T8->as_VMReg());
   1.269 +  map->set_callee_saved(STACK_OFFSET( t9_off), T9->as_VMReg());
   1.270 +  map->set_callee_saved(STACK_OFFSET( gp_off), GP->as_VMReg());
   1.271 +  map->set_callee_saved(STACK_OFFSET( fp_off), FP->as_VMReg());
   1.272 +  map->set_callee_saved(STACK_OFFSET( return_off), RA->as_VMReg());
   1.273 +
   1.274 +  map->set_callee_saved(STACK_OFFSET( fpr0_off), F0->as_VMReg());
   1.275 +  map->set_callee_saved(STACK_OFFSET( fpr1_off), F1->as_VMReg());
   1.276 +  map->set_callee_saved(STACK_OFFSET( fpr2_off), F2->as_VMReg());
   1.277 +  map->set_callee_saved(STACK_OFFSET( fpr3_off), F3->as_VMReg());
   1.278 +  map->set_callee_saved(STACK_OFFSET( fpr4_off), F4->as_VMReg());
   1.279 +  map->set_callee_saved(STACK_OFFSET( fpr5_off), F5->as_VMReg());
   1.280 +  map->set_callee_saved(STACK_OFFSET( fpr6_off), F6->as_VMReg());
   1.281 +  map->set_callee_saved(STACK_OFFSET( fpr7_off), F7->as_VMReg());
   1.282 +  map->set_callee_saved(STACK_OFFSET( fpr8_off), F8->as_VMReg());
   1.283 +  map->set_callee_saved(STACK_OFFSET( fpr9_off), F9->as_VMReg());
   1.284 +  map->set_callee_saved(STACK_OFFSET( fpr10_off), F10->as_VMReg());
   1.285 +  map->set_callee_saved(STACK_OFFSET( fpr11_off), F11->as_VMReg());
   1.286 +  map->set_callee_saved(STACK_OFFSET( fpr12_off), F12->as_VMReg());
   1.287 +  map->set_callee_saved(STACK_OFFSET( fpr13_off), F13->as_VMReg());
   1.288 +  map->set_callee_saved(STACK_OFFSET( fpr14_off), F14->as_VMReg());
   1.289 +  map->set_callee_saved(STACK_OFFSET( fpr15_off), F15->as_VMReg());
   1.290 +  map->set_callee_saved(STACK_OFFSET( fpr16_off), F16->as_VMReg());
   1.291 +  map->set_callee_saved(STACK_OFFSET( fpr17_off), F17->as_VMReg());
   1.292 +  map->set_callee_saved(STACK_OFFSET( fpr18_off), F18->as_VMReg());
   1.293 +  map->set_callee_saved(STACK_OFFSET( fpr19_off), F19->as_VMReg());
   1.294 +  map->set_callee_saved(STACK_OFFSET( fpr20_off), F20->as_VMReg());
   1.295 +  map->set_callee_saved(STACK_OFFSET( fpr21_off), F21->as_VMReg());
   1.296 +  map->set_callee_saved(STACK_OFFSET( fpr22_off), F22->as_VMReg());
   1.297 +  map->set_callee_saved(STACK_OFFSET( fpr23_off), F23->as_VMReg());
   1.298 +  map->set_callee_saved(STACK_OFFSET( fpr24_off), F24->as_VMReg());
   1.299 +  map->set_callee_saved(STACK_OFFSET( fpr25_off), F25->as_VMReg());
   1.300 +  map->set_callee_saved(STACK_OFFSET( fpr26_off), F26->as_VMReg());
   1.301 +  map->set_callee_saved(STACK_OFFSET( fpr27_off), F27->as_VMReg());
   1.302 +  map->set_callee_saved(STACK_OFFSET( fpr28_off), F28->as_VMReg());
   1.303 +  map->set_callee_saved(STACK_OFFSET( fpr29_off), F29->as_VMReg());
   1.304 +  map->set_callee_saved(STACK_OFFSET( fpr30_off), F30->as_VMReg());
   1.305 +  map->set_callee_saved(STACK_OFFSET( fpr31_off), F31->as_VMReg());
   1.306 +
   1.307 +/*
   1.308 +  if (true) {
   1.309 +    map->set_callee_saved(STACK_OFFSET( v0H_off), V0->as_VMReg()->next());
   1.310 +    map->set_callee_saved(STACK_OFFSET( v1H_off), V1->as_VMReg()->next());
   1.311 +    map->set_callee_saved(STACK_OFFSET( a0H_off), A0->as_VMReg()->next());
   1.312 +    map->set_callee_saved(STACK_OFFSET( a1H_off), A1->as_VMReg()->next());
   1.313 +    map->set_callee_saved(STACK_OFFSET( a2H_off), A2->as_VMReg()->next());
   1.314 +    map->set_callee_saved(STACK_OFFSET( a3H_off), A3->as_VMReg()->next());
   1.315 +    map->set_callee_saved(STACK_OFFSET( a4H_off), A4->as_VMReg()->next());
   1.316 +    map->set_callee_saved(STACK_OFFSET( a5H_off), A5->as_VMReg()->next());
   1.317 +    map->set_callee_saved(STACK_OFFSET( a6H_off), A6->as_VMReg()->next());
   1.318 +    map->set_callee_saved(STACK_OFFSET( a7H_off), A7->as_VMReg()->next());
   1.319 +    map->set_callee_saved(STACK_OFFSET( t0H_off), T0->as_VMReg()->next());
   1.320 +    map->set_callee_saved(STACK_OFFSET( t1H_off), T1->as_VMReg()->next());
   1.321 +    map->set_callee_saved(STACK_OFFSET( t2H_off), T2->as_VMReg()->next());
   1.322 +    map->set_callee_saved(STACK_OFFSET( t3H_off), T3->as_VMReg()->next());
   1.323 +    map->set_callee_saved(STACK_OFFSET( s0H_off), S0->as_VMReg()->next());
   1.324 +    map->set_callee_saved(STACK_OFFSET( s1H_off), S1->as_VMReg()->next());
   1.325 +    map->set_callee_saved(STACK_OFFSET( s2H_off), S2->as_VMReg()->next());
   1.326 +    map->set_callee_saved(STACK_OFFSET( s3H_off), S3->as_VMReg()->next());
   1.327 +    map->set_callee_saved(STACK_OFFSET( s4H_off), S4->as_VMReg()->next());
   1.328 +    map->set_callee_saved(STACK_OFFSET( s5H_off), S5->as_VMReg()->next());
   1.329 +    map->set_callee_saved(STACK_OFFSET( s6H_off), S6->as_VMReg()->next());
   1.330 +    map->set_callee_saved(STACK_OFFSET( s7H_off), S7->as_VMReg()->next());
   1.331 +    map->set_callee_saved(STACK_OFFSET( t8H_off), T8->as_VMReg()->next());
   1.332 +    map->set_callee_saved(STACK_OFFSET( t9H_off), T9->as_VMReg()->next());
   1.333 +    map->set_callee_saved(STACK_OFFSET( gpH_off), GP->as_VMReg()->next());
   1.334 +    map->set_callee_saved(STACK_OFFSET( fpH_off), FP->as_VMReg()->next());
   1.335 +    map->set_callee_saved(STACK_OFFSET( returnH_off), RA->as_VMReg()->next());
   1.336 +
   1.337 +    map->set_callee_saved(STACK_OFFSET( fpr0H_off), F0->as_VMReg()->next());
   1.338 +    map->set_callee_saved(STACK_OFFSET( fpr2H_off), F2->as_VMReg()->next());
   1.339 +    map->set_callee_saved(STACK_OFFSET( fpr4H_off), F4->as_VMReg()->next());
   1.340 +    map->set_callee_saved(STACK_OFFSET( fpr6H_off), F6->as_VMReg()->next());
   1.341 +    map->set_callee_saved(STACK_OFFSET( fpr8H_off), F8->as_VMReg()->next());
   1.342 +    map->set_callee_saved(STACK_OFFSET( fpr10H_off), F10->as_VMReg()->next());
   1.343 +    map->set_callee_saved(STACK_OFFSET( fpr12H_off), F12->as_VMReg()->next());
   1.344 +    map->set_callee_saved(STACK_OFFSET( fpr14H_off), F14->as_VMReg()->next());
   1.345 +    map->set_callee_saved(STACK_OFFSET( fpr16H_off), F16->as_VMReg()->next());
   1.346 +    map->set_callee_saved(STACK_OFFSET( fpr18H_off), F18->as_VMReg()->next());
   1.347 +    map->set_callee_saved(STACK_OFFSET( fpr20H_off), F20->as_VMReg()->next());
   1.348 +    map->set_callee_saved(STACK_OFFSET( fpr22H_off), F22->as_VMReg()->next());
   1.349 +    map->set_callee_saved(STACK_OFFSET( fpr24H_off), F24->as_VMReg()->next());
   1.350 +    map->set_callee_saved(STACK_OFFSET( fpr26H_off), F26->as_VMReg()->next());
   1.351 +    map->set_callee_saved(STACK_OFFSET( fpr28H_off), F28->as_VMReg()->next());
   1.352 +    map->set_callee_saved(STACK_OFFSET( fpr30H_off), F30->as_VMReg()->next());
   1.353 +  }
   1.354 +*/
   1.355 +#undef STACK_OFFSET
   1.356 +  return map;
   1.357 +}
   1.358 +
   1.359 +
   1.360 +// Pop the current frame and restore all the registers that we
   1.361 +// saved.
   1.362 +void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
   1.363 +  __ ldc1(F0, SP, fpr0_off * jintSize); __ ldc1(F1, SP, fpr1_off * jintSize);
   1.364 +  __ ldc1(F2, SP, fpr2_off * jintSize); __ ldc1(F3, SP, fpr3_off * jintSize);
   1.365 +  __ ldc1(F4, SP, fpr4_off * jintSize); __ ldc1(F5, SP, fpr5_off * jintSize);
   1.366 +  __ ldc1(F6, SP, fpr6_off * jintSize);	__ ldc1(F7, SP, fpr7_off * jintSize);
   1.367 +  __ ldc1(F8, SP, fpr8_off * jintSize);	__ ldc1(F9, SP, fpr9_off * jintSize);
   1.368 +  __ ldc1(F10, SP, fpr10_off * jintSize);	__ ldc1(F11, SP, fpr11_off * jintSize);
   1.369 +  __ ldc1(F12, SP, fpr12_off * jintSize);	__ ldc1(F13, SP, fpr13_off * jintSize);
   1.370 +  __ ldc1(F14, SP, fpr14_off * jintSize);	__ ldc1(F15, SP, fpr15_off * jintSize);
   1.371 +  __ ldc1(F16, SP, fpr16_off * jintSize);	__ ldc1(F17, SP, fpr17_off * jintSize);
   1.372 +  __ ldc1(F18, SP, fpr18_off * jintSize);	__ ldc1(F19, SP, fpr19_off * jintSize);
   1.373 +  __ ldc1(F20, SP, fpr20_off * jintSize);	__ ldc1(F21, SP, fpr21_off * jintSize);
   1.374 +  __ ldc1(F22, SP, fpr22_off * jintSize);	__ ldc1(F23, SP, fpr23_off * jintSize);
   1.375 +  __ ldc1(F24, SP, fpr24_off * jintSize);	__ ldc1(F25, SP, fpr25_off * jintSize);
   1.376 +  __ ldc1(F26, SP, fpr26_off * jintSize);	__ ldc1(F27, SP, fpr27_off * jintSize);
   1.377 +  __ ldc1(F28, SP, fpr28_off * jintSize);	__ ldc1(F29, SP, fpr29_off * jintSize);
   1.378 +  __ ldc1(F30, SP, fpr30_off * jintSize);	__ ldc1(F31, SP, fpr31_off * jintSize);
   1.379 +
   1.380 +  __ ld(V0, SP, v0_off * jintSize);	__ ld(V1, SP, v1_off * jintSize);
   1.381 +  __ ld(A0, SP, a0_off * jintSize);	__ ld(A1, SP, a1_off * jintSize);
   1.382 +  __ ld(A2, SP, a2_off * jintSize);	__ ld(A3, SP, a3_off * jintSize);
   1.383 +  __ ld(A4, SP, a4_off * jintSize);	__ ld(A5, SP, a5_off * jintSize);
   1.384 +  __ ld(A6, SP, a6_off * jintSize);	__ ld(A7, SP, a7_off * jintSize);
   1.385 +  __ ld(T0, SP, t0_off * jintSize);
   1.386 +  __ ld(T1, SP, t1_off * jintSize);
   1.387 +  __ ld(T2, SP, t2_off * jintSize);
   1.388 +  __ ld(T3, SP, t3_off * jintSize);
   1.389 +  __ ld(S0, SP, s0_off * jintSize);
   1.390 +  __ ld(S1, SP, s1_off * jintSize);
   1.391 +  __ ld(S2, SP, s2_off * jintSize);
   1.392 +  __ ld(S3, SP, s3_off * jintSize);
   1.393 +  __ ld(S4, SP, s4_off * jintSize);
   1.394 +  __ ld(S5, SP, s5_off * jintSize);
   1.395 +  __ ld(S6, SP, s6_off * jintSize);
   1.396 +  __ ld(S7, SP, s7_off * jintSize);
   1.397 +
   1.398 +  __ ld(T8, SP, t8_off * jintSize);
   1.399 +  __ ld(T9, SP, t9_off * jintSize);
   1.400 +
   1.401 +  __ ld(GP, SP, gp_off * jintSize);
   1.402 +  __ ld(FP, SP, fp_off * jintSize);
   1.403 +  __ ld(RA, SP, return_off * jintSize);
   1.404 +
   1.405 +  __ addiu(SP, SP, reg_save_size * jintSize);
   1.406 +}
   1.407 +
   1.408 +// Pop the current frame and restore the registers that might be holding
   1.409 +// a result.
   1.410 +// FIXME, if the result is float?
   1.411 +void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
   1.412 +  // Just restore result register. Only used by deoptimization. By
   1.413 +  // now any callee save register that needs to be restore to a c2
   1.414 +  // caller of the deoptee has been extracted into the vframeArray
   1.415 +  // and will be stuffed into the c2i adapter we create for later
   1.416 +  // restoration so only result registers need to be restored here.
   1.417 +  //
   1.418 +  __ ld(V0, SP, v0_off * jintSize);
   1.419 +  __ ld(V1, SP, v1_off * jintSize);
   1.420 +  __ addiu(SP, SP, return_off * jintSize); 
   1.421 +}
   1.422 +
   1.423 + // Is vector's size (in bytes) bigger than a size saved by default?
   1.424 + // 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions.
   1.425 + bool SharedRuntime::is_wide_vector(int size) {
   1.426 +   return size > 16;
   1.427 + }
   1.428 +
   1.429 +// The java_calling_convention describes stack locations as ideal slots on
   1.430 +// a frame with no abi restrictions. Since we must observe abi restrictions
   1.431 +// (like the placement of the register window) the slots must be biased by
   1.432 +// the following value.
   1.433 +
   1.434 +static int reg2offset_in(VMReg r) { 
   1.435 +	// Account for saved ebp and return address
   1.436 +	// This should really be in_preserve_stack_slots
   1.437 +	return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size;  // + 2 * VMRegImpl::stack_slot_size);
   1.438 +}
   1.439 +
   1.440 +static int reg2offset_out(VMReg r) { 
   1.441 +	return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
   1.442 +}
   1.443 +
   1.444 +// ---------------------------------------------------------------------------
   1.445 +// Read the array of BasicTypes from a signature, and compute where the
   1.446 +// arguments should go.  Values in the VMRegPair regs array refer to 4-byte
   1.447 +// quantities.  Values less than SharedInfo::stack0 are registers, those above
   1.448 +// refer to 4-byte stack slots.  All stack slots are based off of the stack pointer
   1.449 +// as framesizes are fixed.
   1.450 +// VMRegImpl::stack0 refers to the first slot 0(sp).
   1.451 +// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
   1.452 +// up to RegisterImpl::number_of_registers) are the 32-bit
   1.453 +// integer registers.
   1.454 +
   1.455 +// Pass first five oop/int args in registers T0, A0 - A3.
   1.456 +// Pass float/double/long args in stack.
   1.457 +// Doubles have precedence, so if you pass a mix of floats and doubles
   1.458 +// the doubles will grab the registers before the floats will.
   1.459 +
   1.460 +// Note: the INPUTS in sig_bt are in units of Java argument words, which are
   1.461 +// either 32-bit or 64-bit depending on the build.  The OUTPUTS are in 32-bit
   1.462 +// units regardless of build. Of course for i486 there is no 64 bit build
   1.463 +
   1.464 +
   1.465 +// ---------------------------------------------------------------------------
   1.466 +// The compiled Java calling convention.
   1.467 +// Pass first five oop/int args in registers T0, A0 - A3.
   1.468 +// Pass float/double/long args in stack.
   1.469 +// Doubles have precedence, so if you pass a mix of floats and doubles
   1.470 +// the doubles will grab the registers before the floats will.
   1.471 +
   1.472 +int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
   1.473 +                                           VMRegPair *regs,
   1.474 +                                           int total_args_passed,
   1.475 +                                           int is_outgoing) {
   1.476 +//#define aoqi_test
   1.477 +#ifdef aoqi_test
   1.478 +tty->print_cr(" SharedRuntime::%s :%d, total_args_passed: %d", __func__, __LINE__, total_args_passed);
   1.479 +#endif
   1.480 +
   1.481 +  // Create the mapping between argument positions and
   1.482 +  // registers.
   1.483 +  //static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = {
   1.484 +  static const Register INT_ArgReg[Argument::n_register_parameters + 1] = {
   1.485 +    T0, A0, A1, A2, A3, A4, A5, A6, A7
   1.486 +  };
   1.487 +  //static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_j] = {
   1.488 +  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
   1.489 +    F12, F13, F14, F15, F16, F17, F18, F19
   1.490 +  };
   1.491 +
   1.492 +
   1.493 +  uint args = 0;
   1.494 +  uint stk_args = 0; // inc by 2 each time
   1.495 +
   1.496 +  for (int i = 0; i < total_args_passed; i++) {
   1.497 +    switch (sig_bt[i]) {
   1.498 +    case T_VOID:
   1.499 +      // halves of T_LONG or T_DOUBLE
   1.500 +      assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
   1.501 +      regs[i].set_bad();
   1.502 +      break;
   1.503 +    case T_BOOLEAN:
   1.504 +    case T_CHAR:
   1.505 +    case T_BYTE:
   1.506 +    case T_SHORT:
   1.507 +    case T_INT:
   1.508 +      if (args < Argument::n_register_parameters) {
   1.509 +        regs[i].set1(INT_ArgReg[args++]->as_VMReg());
   1.510 +      } else {
   1.511 +        regs[i].set1(VMRegImpl::stack2reg(stk_args));
   1.512 +        stk_args += 2;
   1.513 +      }
   1.514 +      break;
   1.515 +    case T_LONG:
   1.516 +      assert(sig_bt[i + 1] == T_VOID, "expecting half");
   1.517 +      // fall through
   1.518 +    case T_OBJECT:
   1.519 +    case T_ARRAY:
   1.520 +    case T_ADDRESS:
   1.521 +      if (args < Argument::n_register_parameters) {
   1.522 +        regs[i].set2(INT_ArgReg[args++]->as_VMReg());
   1.523 +      } else {
   1.524 +        regs[i].set2(VMRegImpl::stack2reg(stk_args));
   1.525 +        stk_args += 2;
   1.526 +      }
   1.527 +      break;
   1.528 +    case T_FLOAT:
   1.529 +      if (args < Argument::n_float_register_parameters) {
   1.530 +        regs[i].set1(FP_ArgReg[args++]->as_VMReg());
   1.531 +      } else {
   1.532 +        regs[i].set1(VMRegImpl::stack2reg(stk_args));
   1.533 +        stk_args += 2;
   1.534 +      }
   1.535 +      break;
   1.536 +    case T_DOUBLE:
   1.537 +      assert(sig_bt[i + 1] == T_VOID, "expecting half");
   1.538 +      if (args < Argument::n_float_register_parameters) {
   1.539 +        regs[i].set2(FP_ArgReg[args++]->as_VMReg());
   1.540 +      } else {
   1.541 +        regs[i].set2(VMRegImpl::stack2reg(stk_args));
   1.542 +        stk_args += 2;
   1.543 +      }
   1.544 +      break;
   1.545 +    default:
   1.546 +      ShouldNotReachHere();
   1.547 +      break;
   1.548 +    }
   1.549 +#ifdef aoqi_test
   1.550 +tty->print_cr(" SharedRuntime::%s :%d, sig_bt[%d]: %d, reg[%d]:%d|%d, stk_args:%d", __func__, __LINE__, i, sig_bt[i], i, regs[i].first(), regs[i].second(), stk_args);
   1.551 +#endif
   1.552 +  }
   1.553 +
   1.554 +  return round_to(stk_args, 2);
   1.555 +/*
   1.556 +	// Starting stack position for args on stack
   1.557 +  uint    stack = 0;
   1.558 +
   1.559 +	// Pass first five oop/int args in registers T0, A0 - A3.
   1.560 +	uint reg_arg0 = 9999;
   1.561 +	uint reg_arg1 = 9999;
   1.562 +	uint reg_arg2 = 9999;
   1.563 +	uint reg_arg3 = 9999;
   1.564 +	uint reg_arg4 = 9999;
   1.565 +
   1.566 + 
   1.567 +  // Pass doubles & longs &float  ligned on the stack.  First count stack slots for doubles
   1.568 +	int i;
   1.569 +	for( i = 0; i < total_args_passed; i++) {
   1.570 +		if( sig_bt[i] == T_DOUBLE || sig_bt[i] == T_LONG ) {
   1.571 +			stack += 2;
   1.572 +		}
   1.573 +	}
   1.574 +	int dstack = 0;  // Separate counter for placing doubles
   1.575 +  for( i = 0; i < total_args_passed; i++) {
   1.576 +    // From the type and the argument number (count) compute the location
   1.577 +    switch( sig_bt[i] ) {
   1.578 +    case T_SHORT:
   1.579 +    case T_CHAR:
   1.580 +    case T_BYTE:
   1.581 +    case T_BOOLEAN:
   1.582 +    case T_INT:
   1.583 +    case T_ARRAY:
   1.584 +    case T_OBJECT:
   1.585 +    case T_ADDRESS:
   1.586 +	    if( reg_arg0 == 9999 )  {
   1.587 +		    reg_arg0 = i;
   1.588 +		    regs[i].set1(T0->as_VMReg());
   1.589 +	    } else if( reg_arg1 == 9999 ) {
   1.590 +		    reg_arg1 = i;
   1.591 +		    regs[i].set1(A0->as_VMReg());
   1.592 +	    } else if( reg_arg2 == 9999 ) {
   1.593 +		    reg_arg2 = i;
   1.594 +		    regs[i].set1(A1->as_VMReg());
   1.595 +	    }else if( reg_arg3 == 9999 ) {
   1.596 +		    reg_arg3 = i;
   1.597 +		    regs[i].set1(A2->as_VMReg());
   1.598 +	    }else if( reg_arg4 == 9999 ) {
   1.599 +		    reg_arg4 = i;
   1.600 +		    regs[i].set1(A3->as_VMReg());
   1.601 +	    } else {
   1.602 +		    regs[i].set1(VMRegImpl::stack2reg(stack++));
   1.603 +	    }
   1.604 +	    break;
   1.605 +    case T_FLOAT:
   1.606 +	    regs[i].set1(VMRegImpl::stack2reg(stack++));
   1.607 +	    break;
   1.608 +    case T_LONG:      
   1.609 +	    assert(sig_bt[i+1] == T_VOID, "missing Half" ); 
   1.610 +	    regs[i].set2(VMRegImpl::stack2reg(dstack));
   1.611 +	    dstack += 2;
   1.612 +	    break;
   1.613 +    case T_DOUBLE:
   1.614 +	    assert(sig_bt[i+1] == T_VOID, "missing Half" ); 
   1.615 +	    regs[i].set2(VMRegImpl::stack2reg(dstack));
   1.616 +	    dstack += 2;
   1.617 +	    break;
   1.618 +    case T_VOID: regs[i].set_bad(); break;
   1.619 +		 break;
   1.620 +    default:
   1.621 +		 ShouldNotReachHere();
   1.622 +		 break;
   1.623 +    }
   1.624 + }
   1.625 +  // return value can be odd number of VMRegImpl stack slots make multiple of 2
   1.626 +  return round_to(stack, 2);
   1.627 +*/
   1.628 +}
   1.629 +
   1.630 +// Helper class mostly to avoid passing masm everywhere, and handle store
   1.631 +// displacement overflow logic for LP64
   1.632 +class AdapterGenerator {
   1.633 +  MacroAssembler *masm;
   1.634 +#ifdef _LP64
   1.635 +  Register Rdisp;
   1.636 +  void set_Rdisp(Register r)  { Rdisp = r; }
   1.637 +#endif // _LP64
   1.638 +
   1.639 +  void patch_callers_callsite();
   1.640 +//  void tag_c2i_arg(frame::Tag t, Register base, int st_off, Register scratch);
   1.641 +
   1.642 +  // base+st_off points to top of argument
   1.643 +  int arg_offset(const int st_off) { return st_off; }
   1.644 +  int next_arg_offset(const int st_off) {
   1.645 +    return st_off - Interpreter::stackElementSize;
   1.646 +  }
   1.647 +
   1.648 +#ifdef _LP64
   1.649 +  // On _LP64 argument slot values are loaded first into a register
   1.650 +  // because they might not fit into displacement.
   1.651 +  Register arg_slot(const int st_off);
   1.652 +  Register next_arg_slot(const int st_off);
   1.653 +#else
   1.654 +  int arg_slot(const int st_off)      { return arg_offset(st_off); }
   1.655 +  int next_arg_slot(const int st_off) { return next_arg_offset(st_off); }
   1.656 +#endif // _LP64
   1.657 +
   1.658 +  // Stores long into offset pointed to by base
   1.659 +  void store_c2i_long(Register r, Register base,
   1.660 +                      const int st_off, bool is_stack);
   1.661 +  void store_c2i_object(Register r, Register base,
   1.662 +                        const int st_off);
   1.663 +  void store_c2i_int(Register r, Register base,
   1.664 +                     const int st_off);
   1.665 +  void store_c2i_double(VMReg r_2,
   1.666 +                        VMReg r_1, Register base, const int st_off);
   1.667 +  void store_c2i_float(FloatRegister f, Register base,
   1.668 +                       const int st_off);
   1.669 +
   1.670 + public:
   1.671 +  //void tag_stack(const BasicType sig, int st_off);
   1.672 +  void gen_c2i_adapter(int total_args_passed,
   1.673 +                              // VMReg max_arg,
   1.674 +                              int comp_args_on_stack, // VMRegStackSlots
   1.675 +                              const BasicType *sig_bt,
   1.676 +                              const VMRegPair *regs,
   1.677 +                              Label& skip_fixup);
   1.678 +  void gen_i2c_adapter(int total_args_passed,
   1.679 +                              // VMReg max_arg,
   1.680 +                              int comp_args_on_stack, // VMRegStackSlots
   1.681 +                              const BasicType *sig_bt,
   1.682 +                              const VMRegPair *regs);
   1.683 +
   1.684 +  AdapterGenerator(MacroAssembler *_masm) : masm(_masm) {}
   1.685 +};
   1.686 +
   1.687 +
   1.688 +// Patch the callers callsite with entry to compiled code if it exists.
   1.689 +void AdapterGenerator::patch_callers_callsite() {
   1.690 +	Label L;
   1.691 +	//FIXME , what is stored in eax? 
   1.692 +	//__ verify_oop(ebx);
   1.693 +	__ verify_oop(Rmethod);
   1.694 +	// __ cmpl(Address(ebx, in_bytes(Method::code_offset())), NULL_WORD);
   1.695 +	__ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); 
   1.696 +	//__ jcc(Assembler::equal, L);
   1.697 +	__ beq(AT,R0,L); 
   1.698 +	__ delayed()->nop(); 
   1.699 +	// Schedule the branch target address early.
   1.700 +	// Call into the VM to patch the caller, then jump to compiled callee
   1.701 +	// eax isn't live so capture return address while we easily can
   1.702 +	//  __ movl(eax, Address(esp, 0));
   1.703 +//	__ lw(T5,SP,0);  
   1.704 +	__ move(V0, RA);
   1.705 +       
   1.706 +	__ pushad();
   1.707 +      	//jerome_for_debug
   1.708 +	// __ pushad();
   1.709 +	// __ pushfd();
   1.710 +#ifdef COMPILER2
   1.711 +	// C2 may leave the stack dirty if not in SSE2+ mode
   1.712 +	__ empty_FPU_stack();
   1.713 +#endif /* COMPILER2 */
   1.714 +
   1.715 +	// VM needs caller's callsite
   1.716 +	//  __ pushl(eax);
   1.717 +
   1.718 +	// VM needs target method
   1.719 +	// __ pushl(ebx);
   1.720 +	//  __ push(Rmethod);
   1.721 +	// __ verify_oop(ebx);
   1.722 +      
   1.723 +	__ move(A0, Rmethod); 
   1.724 +	__ move(A1, V0); 
   1.725 +//	__ addi(SP, SP, -8);
   1.726 +//we should preserve the return address
   1.727 +	__ verify_oop(Rmethod);
   1.728 +        __ move(S0, SP); 
   1.729 +        __ move(AT, -(StackAlignmentInBytes));   // align the stack
   1.730 +        __ andr(SP, SP, AT);
   1.731 +      	__ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), 
   1.732 +			relocInfo::runtime_call_type);
   1.733 +	//__ addl(esp, 2*wordSize);
   1.734 +
   1.735 +	__ delayed()->nop(); 
   1.736 +  //      __ addi(SP, SP, 8);
   1.737 +	//  __ popfd();
   1.738 +        __ move(SP, S0);
   1.739 +	__ popad();
   1.740 +	__ bind(L);
   1.741 +}
   1.742 +/*
   1.743 +void AdapterGenerator::tag_c2i_arg(frame::Tag t, Register base, int st_off,
   1.744 +                 Register scratch) {
   1.745 +	Unimplemented();
   1.746 +}*/
   1.747 +
   1.748 +#ifdef _LP64
   1.749 +Register AdapterGenerator::arg_slot(const int st_off) {
   1.750 +	Unimplemented();
   1.751 +}
   1.752 +
   1.753 +Register AdapterGenerator::next_arg_slot(const int st_off){
   1.754 +	Unimplemented();
   1.755 +}
   1.756 +#endif // _LP64
   1.757 +
   1.758 +// Stores long into offset pointed to by base
   1.759 +void AdapterGenerator::store_c2i_long(Register r, Register base,
   1.760 +                                      const int st_off, bool is_stack) {
   1.761 +	Unimplemented();
   1.762 +}
   1.763 +
   1.764 +void AdapterGenerator::store_c2i_object(Register r, Register base,
   1.765 +                      const int st_off) {
   1.766 +	Unimplemented();
   1.767 +}
   1.768 +
   1.769 +void AdapterGenerator::store_c2i_int(Register r, Register base,
   1.770 +                   const int st_off) {
   1.771 +	Unimplemented();
   1.772 +}
   1.773 +
   1.774 +// Stores into offset pointed to by base
   1.775 +void AdapterGenerator::store_c2i_double(VMReg r_2,
   1.776 +                      VMReg r_1, Register base, const int st_off) {
   1.777 +	Unimplemented();
   1.778 +}
   1.779 +
   1.780 +void AdapterGenerator::store_c2i_float(FloatRegister f, Register base,
   1.781 +                                       const int st_off) {
   1.782 +	Unimplemented();
   1.783 +}
   1.784 +/*
   1.785 +void  AdapterGenerator::tag_stack(const BasicType sig, int st_off) {
   1.786 +	if (TaggedStackInterpreter) {
   1.787 +		int tag_offset = st_off + Interpreter::expr_tag_offset_in_bytes(0);
   1.788 +		if (sig == T_OBJECT || sig == T_ARRAY) {
   1.789 +			//   __ movl(Address(esp, tag_offset), frame::TagReference);
   1.790 +			//  __ addi(AT,R0, frame::TagReference); 
   1.791 +
   1.792 +			__ move(AT, frame::TagReference);
   1.793 +			__ sw (AT, SP, tag_offset); 
   1.794 +		} else if (sig == T_LONG || sig == T_DOUBLE) {
   1.795 +			int next_tag_offset = st_off + Interpreter::expr_tag_offset_in_bytes(1);
   1.796 +			// __ movl(Address(esp, next_tag_offset), frame::TagValue);
   1.797 +			// __ addi(AT,R0, frame::TagValue); 
   1.798 +			__ move(AT, frame::TagValue); 
   1.799 +			__ sw (AT, SP, next_tag_offset); 
   1.800 +			//__ movl(Address(esp, tag_offset), frame::TagValue);
   1.801 +			//   __ addi(AT,R0, frame::TagValue); 
   1.802 +			__ move(AT, frame::TagValue); 
   1.803 +			__ sw (AT, SP, tag_offset); 
   1.804 +
   1.805 +		} else {
   1.806 +			//  __ movl(Address(esp, tag_offset), frame::TagValue);
   1.807 +			//__ addi(AT,R0, frame::TagValue); 
   1.808 +			__ move(AT, frame::TagValue); 
   1.809 +			__ sw (AT, SP, tag_offset); 
   1.810 +
   1.811 +		}
   1.812 +	}
   1.813 +}*/
   1.814 +
   1.815 +void AdapterGenerator::gen_c2i_adapter(
   1.816 +                            int total_args_passed,
   1.817 +                            // VMReg max_arg,
   1.818 +                            int comp_args_on_stack, // VMRegStackSlots
   1.819 +                            const BasicType *sig_bt,
   1.820 +                            const VMRegPair *regs,
   1.821 +                            Label& skip_fixup) {
   1.822 +
   1.823 +  // Before we get into the guts of the C2I adapter, see if we should be here
   1.824 +  // at all.  We've come from compiled code and are attempting to jump to the
   1.825 +  // interpreter, which means the caller made a static call to get here
   1.826 +  // (vcalls always get a compiled target if there is one).  Check for a
   1.827 +  // compiled target.  If there is one, we need to patch the caller's call.
   1.828 +  // However we will run interpreted if we come thru here. The next pass
   1.829 +  // thru the call site will run compiled. If we ran compiled here then
   1.830 +  // we can (theorectically) do endless i2c->c2i->i2c transitions during
   1.831 +  // deopt/uncommon trap cycles. If we always go interpreted here then
   1.832 +  // we can have at most one and don't need to play any tricks to keep
   1.833 +  // from endlessly growing the stack.
   1.834 +  //
   1.835 +  // Actually if we detected that we had an i2c->c2i transition here we
   1.836 +  // ought to be able to reset the world back to the state of the interpreted
   1.837 +  // call and not bother building another interpreter arg area. We don't
   1.838 +  // do that at this point.
   1.839 +
   1.840 +	patch_callers_callsite();
   1.841 +
   1.842 +	__ bind(skip_fixup);
   1.843 +
   1.844 +#ifdef COMPILER2
   1.845 +	__ empty_FPU_stack();
   1.846 +#endif /* COMPILER2 */
   1.847 +	//this is for native ?
   1.848 +	// Since all args are passed on the stack, total_args_passed * interpreter_
   1.849 +	// stack_element_size  is the
   1.850 +	// space we need.
   1.851 +	int extraspace = total_args_passed * Interpreter::stackElementSize;
   1.852 +
   1.853 +        // stack is aligned, keep it that way
   1.854 +        extraspace = round_to(extraspace, 2*wordSize);
   1.855 +
   1.856 +	// Get return address
   1.857 +	// __ popl(eax);
   1.858 +	//__ pop(T4);
   1.859 +        __ move(V0, RA);		
   1.860 +	// set senderSP value
   1.861 +	// __ movl(esi, esp);
   1.862 +//refer to interpreter_mips.cpp:generate_asm_entry
   1.863 +	__ move(Rsender, SP); 
   1.864 +	//__ subl(esp, extraspace);
   1.865 +	__ addi(SP, SP, -extraspace);
   1.866 +
   1.867 +	// Now write the args into the outgoing interpreter space
   1.868 +	for (int i = 0; i < total_args_passed; i++) {
   1.869 +		if (sig_bt[i] == T_VOID) {
   1.870 +			assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), 
   1.871 +					"missing half");
   1.872 +			continue;
   1.873 +		}
   1.874 +
   1.875 +		// st_off points to lowest address on stack.
   1.876 +		int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize;
   1.877 +#ifdef aoqi_test
   1.878 +tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d, st_off:%d", __func__, __LINE__, i, sig_bt[i], total_args_passed, st_off);
   1.879 +#endif
   1.880 +		// Say 4 args:
   1.881 +		// i   st_off
   1.882 +		// 0   12 T_LONG
   1.883 +		// 1    8 T_VOID
   1.884 +		// 2    4 T_OBJECT
   1.885 +		// 3    0 T_BOOL
   1.886 +		VMReg r_1 = regs[i].first();
   1.887 +		VMReg r_2 = regs[i].second();
   1.888 +		if (!r_1->is_valid()) {
   1.889 +			assert(!r_2->is_valid(), "");
   1.890 +			continue;
   1.891 +		}
   1.892 +
   1.893 +		if (r_1->is_stack()) {
   1.894 +			// memory to memory use fpu stack top
   1.895 +			int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
   1.896 +#ifdef aoqi_test
   1.897 +tty->print_cr(" AdapterGenerator::%s :%d, r_1->is_stack, ld_off:%x", __func__, __LINE__, ld_off);
   1.898 +#endif
   1.899 +
   1.900 +			if (!r_2->is_valid()) {
   1.901 +#ifdef aoqi_test
   1.902 +tty->print_cr(" AdapterGenerator::%s :%d, !r_2->is_valid, ld_off:%x", __func__, __LINE__, ld_off);
   1.903 +#endif
   1.904 +				__ ld_ptr(AT, SP, ld_off); 
   1.905 +				__ st_ptr(AT, SP, st_off); 
   1.906 +				//tag_stack(sig_bt[i], st_off);
   1.907 +			} else {
   1.908 +#ifdef aoqi_test
   1.909 +tty->print_cr(" AdapterGenerator::%s :%d, r_2->is_valid, ld_off:%x", __func__, __LINE__, ld_off);
   1.910 +#endif
   1.911 +
   1.912 +				// ld_off == LSW, ld_off+VMRegImpl::stack_slot_size == MSW
   1.913 +				// st_off == MSW, st_off-wordSize == LSW
   1.914 +
   1.915 +				int next_off = st_off - Interpreter::stackElementSize;
   1.916 +				/*
   1.917 +				__ lw(AT, SP, ld_off); 
   1.918 +				__ sw(AT, SP, next_off);
   1.919 +				__ lw(AT, SP, ld_off + wordSize);
   1.920 +				__ sw(AT, SP, st_off);
   1.921 +				*/
   1.922 +				__ ld_ptr(AT, SP, ld_off); 
   1.923 +				__ st_ptr(AT, SP, st_off); 
   1.924 +
   1.925 +				/* Ref to is_Register condition */
   1.926 +				if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
   1.927 +					__ st_ptr(AT,SP,st_off - 8);
   1.928 +				//tag_stack(sig_bt[i], next_off);
   1.929 +			}
   1.930 +		} else if (r_1->is_Register()) {
   1.931 +			Register r = r_1->as_Register();
   1.932 +			if (!r_2->is_valid()) {
   1.933 +#ifdef aoqi_test
   1.934 +tty->print_cr(" AdapterGenerator::%s :%d, r_1->is_Register, !r_2->is_valid, st_off: %lx", __func__, __LINE__, st_off);
   1.935 +#endif
   1.936 +			  // __ movl(Address(esp, st_off), r);
   1.937 +			    __ sd(r,SP, st_off); //aoqi_test FIXME
   1.938 +			  //tag_stack(sig_bt[i], st_off);
   1.939 +			} else {
   1.940 +#ifdef aoqi_test
   1.941 +tty->print_cr(" AdapterGenerator::%s :%d, r_1->is_Register, r_2->is_valid, st_off: %lx", __func__, __LINE__, st_off);
   1.942 +#endif
   1.943 +				//FIXME, mips will not enter here 
   1.944 +				// long/double in gpr
   1.945 +			    __ sd(r,SP, st_off); //aoqi_test FIXME
   1.946 +/* Jin: In [java/util/zip/ZipFile.java] 
   1.947 +
   1.948 +    private static native long open(String name, int mode, long lastModified);
   1.949 +    private static native int getTotal(long jzfile);
   1.950 + *
   1.951 + * We need to transfer T_LONG paramenters from a compiled method to a native method.
   1.952 + * It's a complex process:
   1.953 + *
   1.954 + * Caller -> lir_static_call -> gen_resolve_stub
   1.955 +      -> -- resolve_static_call_C
   1.956 +         `- gen_c2i_adapter()	[*]
   1.957 +             |
   1.958 +	     `- AdapterHandlerLibrary::get_create_apapter_index
   1.959 +      -> generate_native_entry
   1.960 +      -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**]
   1.961 +
   1.962 + * In [**], T_Long parameter is stored in stack as:
   1.963 +
   1.964 +   (high)
   1.965 +    |         |
   1.966 +    -----------
   1.967 +    | 8 bytes |
   1.968 +    | (void)  |
   1.969 +    -----------
   1.970 +    | 8 bytes |
   1.971 +    | (long)  |
   1.972 +    -----------
   1.973 +    |         |
   1.974 +   (low)
   1.975 + *
   1.976 + * However, the sequence is reversed here: 
   1.977 + *
   1.978 +   (high)
   1.979 +    |         |
   1.980 +    -----------
   1.981 +    | 8 bytes |
   1.982 +    | (long)  |
   1.983 +    -----------
   1.984 +    | 8 bytes |
   1.985 +    | (void)  |
   1.986 +    -----------
   1.987 +    |         |
   1.988 +   (low)
   1.989 + *
   1.990 + * So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry().
   1.991 + */
   1.992 +			    if (sig_bt[i] == T_LONG)
   1.993 +			        __ sd(r,SP, st_off - 8);
   1.994 +			//	ShouldNotReachHere();
   1.995 +			//	int next_off = st_off - Interpreter::stackElementSize;
   1.996 +			//	__ sw(r_2->as_Register(),SP, st_off);
   1.997 +			//	__ sw(r,SP, next_off);
   1.998 +			//	tag_stack(masm, sig_bt[i], next_off);
   1.999 +			}
  1.1000 +		} else if (r_1->is_FloatRegister()) {
  1.1001 +			assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
  1.1002 +
  1.1003 +			FloatRegister fr = r_1->as_FloatRegister();
  1.1004 +			if (sig_bt[i] == T_FLOAT)
  1.1005 +		            __ swc1(fr,SP, st_off);
  1.1006 +			else
  1.1007 +			{
  1.1008 +		            __ sdc1(fr,SP, st_off);
  1.1009 +		            __ sdc1(fr,SP, st_off - 8);	/* T_DOUBLE needs two slots */
  1.1010 +			}
  1.1011 +		}
  1.1012 +	}
  1.1013 +        
  1.1014 +	// Schedule the branch target address early.
  1.1015 +	__ ld_ptr(AT, Rmethod,in_bytes(Method::interpreter_entry_offset()) ); 
  1.1016 +	// And repush original return address
  1.1017 +	__ move(RA, V0);	
  1.1018 +	__ jr (AT); 
  1.1019 +	__ delayed()->nop();
  1.1020 +}
  1.1021 +
  1.1022 +void AdapterGenerator::gen_i2c_adapter(
  1.1023 +                            int total_args_passed,
  1.1024 +                            // VMReg max_arg,
  1.1025 +                            int comp_args_on_stack, // VMRegStackSlots
  1.1026 +                            const BasicType *sig_bt,
  1.1027 +			    const VMRegPair *regs) {
  1.1028 +
  1.1029 +  // Generate an I2C adapter: adjust the I-frame to make space for the C-frame
  1.1030 +  // layout.  Lesp was saved by the calling I-frame and will be restored on
  1.1031 +  // return.  Meanwhile, outgoing arg space is all owned by the callee
  1.1032 +  // C-frame, so we can mangle it at will.  After adjusting the frame size,
  1.1033 +  // hoist register arguments and repack other args according to the compiled
  1.1034 +  // code convention.  Finally, end in a jump to the compiled code.  The entry
  1.1035 +  // point address is the start of the buffer.
  1.1036 +
  1.1037 +  // We will only enter here from an interpreted frame and never from after
  1.1038 +  // passing thru a c2i. Azul allowed this but we do not. If we lose the
  1.1039 +  // race and use a c2i we will remain interpreted for the race loser(s).
  1.1040 +  // This removes all sorts of headaches on the mips side and also eliminates
  1.1041 +  // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
  1.1042 +
  1.1043 +
  1.1044 +  __ move(T9, SP);
  1.1045 +
  1.1046 +  // Cut-out for having no stack args.  Since up to 2 int/oop args are passed
  1.1047 +  // in registers, we will occasionally have no stack args.
  1.1048 +  int comp_words_on_stack = 0;
  1.1049 +  if (comp_args_on_stack) {
  1.1050 +    // Sig words on the stack are greater-than VMRegImpl::stack0.  Those in
  1.1051 +    // registers are below.  By subtracting stack0, we either get a negative
  1.1052 +    // number (all values in registers) or the maximum stack slot accessed.
  1.1053 +    // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg);
  1.1054 +    // Convert 4-byte stack slots to words.
  1.1055 +    // did mips need round? FIXME  aoqi
  1.1056 +    comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord;
  1.1057 +    // Round up to miminum stack alignment, in wordSize
  1.1058 +    comp_words_on_stack = round_to(comp_words_on_stack, 2);
  1.1059 +    __ daddi(SP, SP, -comp_words_on_stack * wordSize);
  1.1060 +  }
  1.1061 +
  1.1062 +  // Align the outgoing SP
  1.1063 +  __ move(AT, -(StackAlignmentInBytes));
  1.1064 +  __ andr(SP, SP, AT);	
  1.1065 +  // push the return address on the stack (note that pushing, rather
  1.1066 +  // than storing it, yields the correct frame alignment for the callee)
  1.1067 +  // Put saved SP in another register
  1.1068 +  // const Register saved_sp = eax;
  1.1069 +  const Register saved_sp = V0;
  1.1070 +  __ move(saved_sp, T9);
  1.1071 +
  1.1072 +
  1.1073 +  // Will jump to the compiled code just as if compiled code was doing it.
  1.1074 +  // Pre-load the register-jump target early, to schedule it better.
  1.1075 +  __ ld(T9, Rmethod, in_bytes(Method::from_compiled_offset()));
  1.1076 +
  1.1077 +  // Now generate the shuffle code.  Pick up all register args and move the
  1.1078 +  // rest through the floating point stack top.
  1.1079 +  for (int i = 0; i < total_args_passed; i++) {
  1.1080 +    if (sig_bt[i] == T_VOID) {
  1.1081 +      // Longs and doubles are passed in native word order, but misaligned
  1.1082 +      // in the 32-bit build.
  1.1083 +      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
  1.1084 +      continue;
  1.1085 +    }
  1.1086 +
  1.1087 +    // Pick up 0, 1 or 2 words from SP+offset.  
  1.1088 +
  1.1089 +  //FIXME. aoqi. just delete the assert
  1.1090 +    //assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?");
  1.1091 +    // Load in argument order going down.
  1.1092 +    int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize;
  1.1093 +    // Point to interpreter value (vs. tag)
  1.1094 +    int next_off = ld_off - Interpreter::stackElementSize;
  1.1095 +    //
  1.1096 +    //  
  1.1097 +    //
  1.1098 +    VMReg r_1 = regs[i].first();
  1.1099 +    VMReg r_2 = regs[i].second();
  1.1100 +    if (!r_1->is_valid()) {
  1.1101 +      assert(!r_2->is_valid(), "");
  1.1102 +      continue;
  1.1103 +    }
  1.1104 +#ifdef aoqi_test
  1.1105 +tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d, ld_off:%d, next_off: %d", __func__, __LINE__, i, sig_bt[i], total_args_passed, ld_off, next_off);
  1.1106 +#endif
  1.1107 +    if (r_1->is_stack()) { 
  1.1108 +      // Convert stack slot to an SP offset (+ wordSize to 
  1.1109 +      // account for return address )
  1.1110 +      //NOTICE HERE!!!! I sub a wordSize here	
  1.1111 +      int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size; 
  1.1112 +      //+ wordSize;
  1.1113 +
  1.1114 +      // We can use esi as a temp here because compiled code doesn't 
  1.1115 +      // need esi as an input
  1.1116 +      // and if we end up going thru a c2i because of a miss a reasonable 
  1.1117 +      // value of esi 
  1.1118 +      // we be generated. 
  1.1119 +      if (!r_2->is_valid()) {
  1.1120 +#ifdef aoqi_test
  1.1121 +tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d r_1->is_stack() !r_2->is_valid(), st_off:%d", __func__, __LINE__, i, sig_bt[i], total_args_passed, st_off);
  1.1122 +#endif
  1.1123 +	__ ld(AT, saved_sp, ld_off);
  1.1124 +	__ sd(AT, SP, st_off); 
  1.1125 +      } else {
  1.1126 +#ifdef aoqi_test
  1.1127 +tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d r_1->is_stack() r_2->is_valid(), st_off:%d", __func__, __LINE__, i, sig_bt[i], total_args_passed, st_off);
  1.1128 +#endif
  1.1129 +	// Interpreter local[n] == MSW, local[n+1] == LSW however locals
  1.1130 +	// are accessed as negative so LSW is at LOW address
  1.1131 +
  1.1132 +	// ld_off is MSW so get LSW
  1.1133 +	// st_off is LSW (i.e. reg.first())
  1.1134 +	/*
  1.1135 +	__ ld(AT, saved_sp, next_off); 
  1.1136 +	__ sd(AT, SP, st_off); 
  1.1137 +	__ ld(AT, saved_sp, ld_off); 
  1.1138 +	__ sd(AT, SP, st_off + wordSize); 
  1.1139 +	*/
  1.1140 +
  1.1141 +	/* 2012/4/9 Jin
  1.1142 +	 * [./org/eclipse/swt/graphics/GC.java] 
  1.1143 +	 * void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight, 
  1.1144 +		int destX, int destY, int destWidth, int destHeight, 
  1.1145 +		boolean simple, 
  1.1146 +		int imgWidth, int imgHeight, 
  1.1147 +		long maskPixmap,	<-- Pass T_LONG in stack
  1.1148 +		int maskType);
  1.1149 +	 * Before this modification, Eclipse displays icons with solid black background.
  1.1150 +	 */
  1.1151 +	__ ld(AT, saved_sp, ld_off);
  1.1152 +        if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
  1.1153 +	  __ ld(AT, saved_sp, ld_off - 8);
  1.1154 +	__ sd(AT, SP, st_off); 
  1.1155 +	//__ ld(AT, saved_sp, next_off); 
  1.1156 +	//__ sd(AT, SP, st_off + wordSize); 
  1.1157 +      }
  1.1158 +    } else if (r_1->is_Register()) {  // Register argument
  1.1159 +      Register r = r_1->as_Register();
  1.1160 +      // assert(r != eax, "must be different");
  1.1161 +      if (r_2->is_valid()) {
  1.1162 +#ifdef aoqi_test
  1.1163 +tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d r_1->is_Register() r_2->is_valid()", __func__, __LINE__, i, sig_bt[i], total_args_passed);
  1.1164 +#endif
  1.1165 +	//  assert(r_2->as_Register() != eax, "need another temporary register");
  1.1166 +	// Remember r_1 is low address (and LSB on mips)
  1.1167 +	// So r_2 gets loaded from high address regardless of the platform
  1.1168 +	//aoqi
  1.1169 +	assert(r_2->as_Register() == r_1->as_Register(), "");
  1.1170 +	//__ ld(r_2->as_Register(), saved_sp, ld_off);
  1.1171 +	//__ ld(r, saved_sp, next_off);
  1.1172 +	__ ld(r, saved_sp, ld_off);
  1.1173 +
  1.1174 +/* Jin: 
  1.1175 + *
  1.1176 + * For T_LONG type, the real layout is as below:
  1.1177 +
  1.1178 +   (high)
  1.1179 +    |         |
  1.1180 +    -----------
  1.1181 +    | 8 bytes |
  1.1182 +    | (void)  |
  1.1183 +    -----------
  1.1184 +    | 8 bytes |
  1.1185 +    | (long)  |
  1.1186 +    -----------
  1.1187 +    |         |
  1.1188 +   (low)
  1.1189 + *
  1.1190 + * We should load the low-8 bytes.
  1.1191 + */
  1.1192 +      if (sig_bt[i] == T_LONG)
  1.1193 +	__ ld(r, saved_sp, ld_off - 8);
  1.1194 +      } else {
  1.1195 +#ifdef aoqi_test
  1.1196 +tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d r_1->is_Register() !r_2->is_valid()", __func__, __LINE__, i, sig_bt[i], total_args_passed);
  1.1197 +#endif
  1.1198 +	__ lw(r, saved_sp, ld_off);
  1.1199 +      }
  1.1200 +    } else if (r_1->is_FloatRegister()) { // Float Register
  1.1201 +	assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
  1.1202 +
  1.1203 +	FloatRegister fr = r_1->as_FloatRegister();
  1.1204 +	if (sig_bt[i] == T_FLOAT)
  1.1205 +	    __ lwc1(fr, saved_sp, ld_off);
  1.1206 +	else
  1.1207 +	{
  1.1208 +	    __ ldc1(fr, saved_sp, ld_off);
  1.1209 +	    __ ldc1(fr, saved_sp, ld_off - 8);
  1.1210 +	}
  1.1211 +    }   
  1.1212 +  }
  1.1213 +
  1.1214 +  // 6243940 We might end up in handle_wrong_method if
  1.1215 +  // the callee is deoptimized as we race thru here. If that
  1.1216 +  // happens we don't want to take a safepoint because the
  1.1217 +  // caller frame will look interpreted and arguments are now
  1.1218 +  // "compiled" so it is much better to make this transition
  1.1219 +  // invisible to the stack walking code. Unfortunately if
  1.1220 +  // we try and find the callee by normal means a safepoint
  1.1221 +  // is possible. So we stash the desired callee in the thread
  1.1222 +  // and the vm will find there should this case occur.
  1.1223 +  __ get_thread(T8);
  1.1224 +  __ sd(Rmethod, T8, in_bytes(JavaThread::callee_target_offset()));
  1.1225 +
  1.1226 +  // move methodOop to eax in case we end up in an c2i adapter.
  1.1227 +  // the c2i adapters expect methodOop in eax (c2) because c2's
  1.1228 +  // resolve stubs return the result (the method) in eax.
  1.1229 +  // I'd love to fix this. 
  1.1230 +  __ move(V0, Rmethod);	
  1.1231 +  __ jr(T9);
  1.1232 +  __ delayed()->nop();
  1.1233 +}
  1.1234 +
  1.1235 +// ---------------------------------------------------------------
  1.1236 +AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
  1.1237 +                                                            int total_args_passed,
  1.1238 +                                                            // VMReg max_arg,
  1.1239 +                                                            int comp_args_on_stack, // VMRegStackSlots
  1.1240 +                                                            const BasicType *sig_bt,
  1.1241 +                                                            const VMRegPair *regs,
  1.1242 +                                                            AdapterFingerPrint* fingerprint) {
  1.1243 +  address i2c_entry = __ pc();
  1.1244 +
  1.1245 +  AdapterGenerator agen(masm);
  1.1246 +
  1.1247 +  agen.gen_i2c_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs);
  1.1248 +
  1.1249 +
  1.1250 +  // -------------------------------------------------------------------------
  1.1251 +  // Generate a C2I adapter.  On entry we know G5 holds the methodOop.  The
  1.1252 +  // args start out packed in the compiled layout.  They need to be unpacked
  1.1253 +  // into the interpreter layout.  This will almost always require some stack
  1.1254 +  // space.  We grow the current (compiled) stack, then repack the args.  We
  1.1255 +  // finally end in a jump to the generic interpreter entry point.  On exit
  1.1256 +  // from the interpreter, the interpreter will restore our SP (lest the
  1.1257 +  // compiled code, which relys solely on SP and not FP, get sick).
  1.1258 +
  1.1259 +  address c2i_unverified_entry = __ pc();
  1.1260 +  Label skip_fixup;
  1.1261 +  {
  1.1262 +    Register holder = T1;
  1.1263 +    Register receiver = T0;
  1.1264 +    Register temp = T8;
  1.1265 +    address ic_miss = SharedRuntime::get_ic_miss_stub();
  1.1266 +
  1.1267 +    Label missed;
  1.1268 +
  1.1269 +    __ verify_oop(holder);
  1.1270 +    // __ movl(temp, Address(receiver, oopDesc::klass_offset_in_bytes()));
  1.1271 +    //__ ld_ptr(temp, receiver, oopDesc::klass_offset_in_bytes());
  1.1272 +    //add for compressedoops
  1.1273 +    __ load_klass(temp, receiver);
  1.1274 +    __ verify_oop(temp);
  1.1275 +
  1.1276 +    //  __ cmpl(temp, Address(holder, CompiledICHolder::holder_klass_offset()));
  1.1277 +    __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset()); 
  1.1278 +    //__ movl(ebx, Address(holder, CompiledICHolder::holder_method_offset()));
  1.1279 +    __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_method_offset());
  1.1280 +    //__ jcc(Assembler::notEqual, missed);
  1.1281 +    __ bne(AT, temp, missed); 
  1.1282 +    __ delayed()->nop(); 
  1.1283 +    // Method might have been compiled since the call site was patched to
  1.1284 +    // interpreted if that is the case treat it as a miss so we can get
  1.1285 +    // the call site corrected.
  1.1286 +    //__ cmpl(Address(ebx, in_bytes(Method::code_offset())), NULL_WORD);
  1.1287 +    //__ jcc(Assembler::equal, skip_fixup);
  1.1288 +    __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
  1.1289 +    __ beq(AT, R0, skip_fixup); 
  1.1290 +    __ delayed()->nop(); 
  1.1291 +    __ bind(missed);
  1.1292 +    //   __ move(AT, (int)&jerome7);	
  1.1293 +    //	__ sw(RA, AT, 0);	
  1.1294 +
  1.1295 +    __ jmp(ic_miss, relocInfo::runtime_call_type);
  1.1296 +    __ delayed()->nop(); 
  1.1297 +  }
  1.1298 +
  1.1299 +  address c2i_entry = __ pc();
  1.1300 +
  1.1301 +  agen.gen_c2i_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
  1.1302 +
  1.1303 +  __ flush();
  1.1304 +  return  AdapterHandlerLibrary::new_entry(fingerprint,i2c_entry, c2i_entry, c2i_unverified_entry);
  1.1305 +
  1.1306 +}
  1.1307 +/*
  1.1308 +// Helper function for native calling conventions
  1.1309 +static VMReg int_stk_helper( int i ) {
  1.1310 +  // Bias any stack based VMReg we get by ignoring the window area
  1.1311 +  // but not the register parameter save area.
  1.1312 +  //
  1.1313 +  // This is strange for the following reasons. We'd normally expect
  1.1314 +  // the calling convention to return an VMReg for a stack slot
  1.1315 +  // completely ignoring any abi reserved area. C2 thinks of that
  1.1316 +  // abi area as only out_preserve_stack_slots. This does not include
  1.1317 +  // the area allocated by the C abi to store down integer arguments
  1.1318 +  // because the java calling convention does not use it. So
  1.1319 +  // since c2 assumes that there are only out_preserve_stack_slots
  1.1320 +  // to bias the optoregs (which impacts VMRegs) when actually referencing any actual stack
  1.1321 +  // location the c calling convention must add in this bias amount
  1.1322 +  // to make up for the fact that the out_preserve_stack_slots is
  1.1323 +  // insufficient for C calls. What a mess. I sure hope those 6
  1.1324 +  // stack words were worth it on every java call!
  1.1325 +
  1.1326 +  // Another way of cleaning this up would be for out_preserve_stack_slots
  1.1327 +  // to take a parameter to say whether it was C or java calling conventions.
  1.1328 +  // Then things might look a little better (but not much).
  1.1329 +
  1.1330 +  int mem_parm_offset = i - SPARC_ARGS_IN_REGS_NUM;
  1.1331 +  if( mem_parm_offset < 0 ) {
  1.1332 +    return as_oRegister(i)->as_VMReg();
  1.1333 +  } else {
  1.1334 +    int actual_offset = (mem_parm_offset + frame::memory_parameter_word_sp_offset) * VMRegImpl::slots_per_word;
  1.1335 +    // Now return a biased offset that will be correct when out_preserve_slots is added back in
  1.1336 +    return VMRegImpl::stack2reg(actual_offset - SharedRuntime::out_preserve_stack_slots());
  1.1337 +  }
  1.1338 +}
  1.1339 +*/
  1.1340 +
  1.1341 +
  1.1342 +int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
  1.1343 +                                         VMRegPair *regs,
  1.1344 +                                         VMRegPair *regs2,
  1.1345 +                                         int total_args_passed) {
  1.1346 +    assert(regs2 == NULL, "not needed on MIPS");
  1.1347 +#ifdef aoqi_test
  1.1348 +tty->print_cr(" SharedRuntime::%s :%d total_args_passed:%d", __func__, __LINE__, total_args_passed);
  1.1349 +#endif
  1.1350 +    // Return the number of VMReg stack_slots needed for the args.
  1.1351 +    // This value does not include an abi space (like register window
  1.1352 +    // save area).
  1.1353 +
  1.1354 +    // The native convention is V8 if !LP64
  1.1355 +    // The LP64 convention is the V9 convention which is slightly more sane.
  1.1356 +
  1.1357 +    // We return the amount of VMReg stack slots we need to reserve for all
  1.1358 +    // the arguments NOT counting out_preserve_stack_slots. Since we always
  1.1359 +    // have space for storing at least 6 registers to memory we start with that.
  1.1360 +    // See int_stk_helper for a further discussion.
  1.1361 +	// We return the amount of VMRegImpl stack slots we need to reserve for all
  1.1362 +	// the arguments NOT counting out_preserve_stack_slots. 
  1.1363 +  static const Register INT_ArgReg[Argument::n_register_parameters] = {
  1.1364 +    A0, A1, A2, A3, A4, A5, A6, A7
  1.1365 +  };
  1.1366 +  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
  1.1367 +    F12, F13, F14, F15, F16, F17, F18, F19
  1.1368 +  };
  1.1369 +    uint args = 0;
  1.1370 +    uint stk_args = 0; // inc by 2 each time
  1.1371 +
  1.1372 +/* Example:
  1.1373 +---   n   java.lang.UNIXProcess::forkAndExec
  1.1374 +    private native int forkAndExec(byte[] prog,
  1.1375 +                                   byte[] argBlock, int argc,
  1.1376 +                                   byte[] envBlock, int envc,
  1.1377 +                                   byte[] dir,
  1.1378 +                                   boolean redirectErrorStream,
  1.1379 +                                   FileDescriptor stdin_fd,
  1.1380 +                                   FileDescriptor stdout_fd,
  1.1381 +                                   FileDescriptor stderr_fd)
  1.1382 +JNIEXPORT jint JNICALL
  1.1383 +Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env,
  1.1384 +                                       jobject process,
  1.1385 +                                       jbyteArray prog,
  1.1386 +                                       jbyteArray argBlock, jint argc,
  1.1387 +                                       jbyteArray envBlock, jint envc,
  1.1388 +                                       jbyteArray dir,
  1.1389 +                                       jboolean redirectErrorStream,
  1.1390 +                                       jobject stdin_fd,
  1.1391 +                                       jobject stdout_fd,
  1.1392 +                                       jobject stderr_fd)
  1.1393 +
  1.1394 +::c_calling_convention
  1.1395 +0: 		// env		<-- a0
  1.1396 +1: L		// klass/obj	<-- t0 => a1
  1.1397 +2: [		// prog[]	<-- a0 => a2
  1.1398 +3: [		// argBlock[]	<-- a1 => a3
  1.1399 +4: I		// argc
  1.1400 +5: [		// envBlock[]	<-- a3 => a5
  1.1401 +6: I		// envc
  1.1402 +7: [		// dir[]	<-- a5 => a7
  1.1403 +8: Z		// redirectErrorStream	a6 => sp[0]
  1.1404 +9: L		// stdin		a7 => sp[8]
  1.1405 +10: L		// stdout		fp[16] => sp[16]
  1.1406 +11: L		// stderr		fp[24] => sp[24]
  1.1407 +*/
  1.1408 +    for (int i = 0; i < total_args_passed; i++) {
  1.1409 +      switch (sig_bt[i]) {
  1.1410 +      case T_VOID: // Halves of longs and doubles
  1.1411 +        assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
  1.1412 +        regs[i].set_bad();
  1.1413 +        break;
  1.1414 +      case T_BOOLEAN:
  1.1415 +      case T_CHAR:
  1.1416 +      case T_BYTE:
  1.1417 +      case T_SHORT:
  1.1418 +      case T_INT:
  1.1419 +        if (args < Argument::n_register_parameters) {
  1.1420 +          regs[i].set1(INT_ArgReg[args++]->as_VMReg());
  1.1421 +        } else {
  1.1422 +          regs[i].set1(VMRegImpl::stack2reg(stk_args));
  1.1423 +          stk_args += 2;
  1.1424 +        }
  1.1425 +        break;
  1.1426 +      case T_LONG:
  1.1427 +        assert(sig_bt[i + 1] == T_VOID, "expecting half");
  1.1428 +        // fall through
  1.1429 +      case T_OBJECT:
  1.1430 +      case T_ARRAY:
  1.1431 +      case T_ADDRESS:
  1.1432 +      case T_METADATA:
  1.1433 +        if (args < Argument::n_register_parameters) {
  1.1434 +          regs[i].set2(INT_ArgReg[args++]->as_VMReg());
  1.1435 +        } else {
  1.1436 +          regs[i].set2(VMRegImpl::stack2reg(stk_args));
  1.1437 +          stk_args += 2;
  1.1438 +        }
  1.1439 +        break;
  1.1440 +      case T_FLOAT:
  1.1441 +        if (args < Argument::n_float_register_parameters) {
  1.1442 +          regs[i].set1(FP_ArgReg[args++]->as_VMReg());
  1.1443 +        } else {
  1.1444 +          regs[i].set1(VMRegImpl::stack2reg(stk_args));
  1.1445 +          stk_args += 2;
  1.1446 +        }
  1.1447 +        break;
  1.1448 +      case T_DOUBLE:
  1.1449 +        assert(sig_bt[i + 1] == T_VOID, "expecting half");
  1.1450 +        if (args < Argument::n_float_register_parameters) {
  1.1451 +          regs[i].set2(FP_ArgReg[args++]->as_VMReg());
  1.1452 +        } else {
  1.1453 +          regs[i].set2(VMRegImpl::stack2reg(stk_args));
  1.1454 +          stk_args += 2;
  1.1455 +        }
  1.1456 +        break;
  1.1457 +      default:
  1.1458 +        ShouldNotReachHere();
  1.1459 +        break;
  1.1460 +      }
  1.1461 +    }
  1.1462 +
  1.1463 +  return round_to(stk_args, 2);
  1.1464 +}
  1.1465 +/*
  1.1466 +int SharedRuntime::c_calling_convention_jni(const BasicType *sig_bt, 
  1.1467 +                                         VMRegPair *regs,
  1.1468 +                                         int total_args_passed) {
  1.1469 +// We return the amount of VMRegImpl stack slots we need to reserve for all
  1.1470 +// the arguments NOT counting out_preserve_stack_slots. 
  1.1471 +   bool unalign = 0;
  1.1472 +  uint    stack = 0;        // All arguments on stack
  1.1473 +#ifdef aoqi_test
  1.1474 +tty->print_cr(" SharedRuntime::%s :%d total_args_passed:%d", __func__, __LINE__, total_args_passed);
  1.1475 +#endif
  1.1476 +
  1.1477 +  for( int i = 0; i < total_args_passed; i++) {
  1.1478 +    // From the type and the argument number (count) compute the location
  1.1479 +    switch( sig_bt[i] ) {
  1.1480 +    case T_BOOLEAN:
  1.1481 +    case T_CHAR:
  1.1482 +    case T_FLOAT:
  1.1483 +    case T_BYTE:
  1.1484 +    case T_SHORT:
  1.1485 +    case T_INT:
  1.1486 +    case T_OBJECT:
  1.1487 +    case T_ARRAY:
  1.1488 +    case T_ADDRESS:
  1.1489 +      regs[i].set1(VMRegImpl::stack2reg(stack++));
  1.1490 +      unalign = !unalign;
  1.1491 +      break;
  1.1492 +    case T_LONG:
  1.1493 +    case T_DOUBLE: // The stack numbering is reversed from Java
  1.1494 +      // Since C arguments do not get reversed, the ordering for
  1.1495 +      // doubles on the stack must be opposite the Java convention
  1.1496 +      assert(sig_bt[i+1] == T_VOID, "missing Half" ); 
  1.1497 +      if(unalign){
  1.1498 +            stack += 1; 
  1.1499 +     	    unalign = ! unalign; 
  1.1500 +      } 
  1.1501 +      regs[i].set2(VMRegImpl::stack2reg(stack));
  1.1502 +      stack += 2;
  1.1503 +      break;
  1.1504 +    case T_VOID: regs[i].set_bad(); break;
  1.1505 +    default:
  1.1506 +      ShouldNotReachHere();
  1.1507 +      break;
  1.1508 +    }
  1.1509 +  }
  1.1510 +  return stack;
  1.1511 +}
  1.1512 +*/
  1.1513 +
  1.1514 +// ---------------------------------------------------------------------------
  1.1515 +void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
  1.1516 +	// We always ignore the frame_slots arg and just use the space just below frame pointer
  1.1517 +	// which by this time is free to use
  1.1518 +	switch (ret_type) {
  1.1519 +		case T_FLOAT:
  1.1520 +			__ swc1(FSF, FP, -wordSize); 
  1.1521 +			break;
  1.1522 +		case T_DOUBLE:
  1.1523 +			__ sdc1(FSF, FP, -wordSize ); 
  1.1524 +			break;
  1.1525 +		case T_VOID:  break;
  1.1526 +		case T_LONG:
  1.1527 +			      __ sd(V0, FP, -wordSize);
  1.1528 +			      break;
  1.1529 +		case T_OBJECT:
  1.1530 +		case T_ARRAY:
  1.1531 +			__ sd(V0, FP, -wordSize);
  1.1532 +			break;
  1.1533 +		default: {
  1.1534 +				 __ sw(V0, FP, -wordSize);
  1.1535 +			 }
  1.1536 +	}
  1.1537 +}
  1.1538 +
  1.1539 +void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
  1.1540 +	// We always ignore the frame_slots arg and just use the space just below frame pointer
  1.1541 +	// which by this time is free to use
  1.1542 +	switch (ret_type) {
  1.1543 +		case T_FLOAT:
  1.1544 +			__ lwc1(FSF, FP, -wordSize); 
  1.1545 +			break;
  1.1546 +		case T_DOUBLE:
  1.1547 +			__ ldc1(FSF, FP, -wordSize ); 
  1.1548 +			break;
  1.1549 +		case T_LONG:
  1.1550 +			__ ld(V0, FP, -wordSize);
  1.1551 +			break;
  1.1552 +		case T_VOID:  break;
  1.1553 +		case T_OBJECT:
  1.1554 +		case T_ARRAY:
  1.1555 +			__ ld(V0, FP, -wordSize);
  1.1556 +			break;
  1.1557 +		default: {
  1.1558 +				 __ lw(V0, FP, -wordSize);
  1.1559 +			 }
  1.1560 +	}
  1.1561 +}
  1.1562 +
  1.1563 +static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
  1.1564 +    for ( int i = first_arg ; i < arg_count ; i++ ) {
  1.1565 +      if (args[i].first()->is_Register()) {
  1.1566 +        __ push(args[i].first()->as_Register());
  1.1567 +      } else if (args[i].first()->is_FloatRegister()) {
  1.1568 +        __ push(args[i].first()->as_FloatRegister());
  1.1569 +      }
  1.1570 +    }
  1.1571 +}
  1.1572 +
  1.1573 +static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
  1.1574 +    for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
  1.1575 +      if (args[i].first()->is_Register()) {
  1.1576 +        __ pop(args[i].first()->as_Register());
  1.1577 +      } else if (args[i].first()->is_FloatRegister()) {
  1.1578 +        __ pop(args[i].first()->as_FloatRegister());
  1.1579 +      }
  1.1580 +    }
  1.1581 +}
  1.1582 +
  1.1583 +// A simple move of integer like type
  1.1584 +static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
  1.1585 +  if (src.first()->is_stack()) {
  1.1586 +    if (dst.first()->is_stack()) {
  1.1587 +      // stack to stack
  1.1588 +		__ lw(AT, FP, reg2offset_in(src.first())); 
  1.1589 +		__ sd(AT,SP, reg2offset_out(dst.first())); 
  1.1590 +    } else {
  1.1591 +      // stack to reg
  1.1592 +      //__ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
  1.1593 +			__ lw(dst.first()->as_Register(),  FP, reg2offset_in(src.first())); 
  1.1594 +    }
  1.1595 +  } else if (dst.first()->is_stack()) {
  1.1596 +    // reg to stack
  1.1597 +		__ sd(src.first()->as_Register(), SP, reg2offset_out(dst.first()));
  1.1598 +  } else {
  1.1599 +    //__ mov(src.first()->as_Register(), dst.first()->as_Register());
  1.1600 +	  if (dst.first() != src.first()){ 
  1.1601 +		__ move(dst.first()->as_Register(), src.first()->as_Register()); // fujie error:dst.first()
  1.1602 +	  }
  1.1603 +  }
  1.1604 +}
  1.1605 +/*
  1.1606 +// On 64 bit we will store integer like items to the stack as
  1.1607 +// 64 bits items (sparc abi) even though java would only store
  1.1608 +// 32bits for a parameter. On 32bit it will simply be 32 bits
  1.1609 +// So this routine will do 32->32 on 32bit and 32->64 on 64bit
  1.1610 +static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
  1.1611 +  if (src.first()->is_stack()) {
  1.1612 +    if (dst.first()->is_stack()) {
  1.1613 +      // stack to stack
  1.1614 +      __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5);
  1.1615 +      __ st_ptr(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
  1.1616 +    } else {
  1.1617 +      // stack to reg
  1.1618 +      __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
  1.1619 +    }
  1.1620 +  } else if (dst.first()->is_stack()) {
  1.1621 +    // reg to stack
  1.1622 +    __ st_ptr(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS);
  1.1623 +  } else {
  1.1624 +    __ mov(src.first()->as_Register(), dst.first()->as_Register());
  1.1625 +  }
  1.1626 +}
  1.1627 +*/
  1.1628 +
  1.1629 +// An oop arg. Must pass a handle not the oop itself
  1.1630 +static void object_move(MacroAssembler* masm,
  1.1631 +                        OopMap* map,
  1.1632 +                        int oop_handle_offset,
  1.1633 +                        int framesize_in_slots,
  1.1634 +                        VMRegPair src,
  1.1635 +                        VMRegPair dst,
  1.1636 +                        bool is_receiver,
  1.1637 +                        int* receiver_offset) {
  1.1638 +
  1.1639 +  // must pass a handle. First figure out the location we use as a handle
  1.1640 +
  1.1641 +	//FIXME, for mips, dst can be register
  1.1642 +	if (src.first()->is_stack()) {
  1.1643 +		// Oop is already on the stack as an argument
  1.1644 +		Register rHandle = V0;
  1.1645 +		Label nil;
  1.1646 +		//__ xorl(rHandle, rHandle);
  1.1647 +		__ xorr(rHandle, rHandle, rHandle);
  1.1648 +		//__ cmpl(Address(ebp, reg2offset_in(src.first())), NULL_WORD);
  1.1649 +		__ ld(AT, FP, reg2offset_in(src.first())); 
  1.1650 +		//__ jcc(Assembler::equal, nil);
  1.1651 +		__ beq(AT,R0, nil); 
  1.1652 +		__ delayed()->nop(); 
  1.1653 +		// __ leal(rHandle, Address(ebp, reg2offset_in(src.first())));
  1.1654 +		__ lea(rHandle, Address(FP, reg2offset_in(src.first())));
  1.1655 +		__ bind(nil);
  1.1656 +		//__ movl(Address(esp, reg2offset_out(dst.first())), rHandle);
  1.1657 +		if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first()));
  1.1658 +		else                       __ move( (dst.first())->as_Register(),rHandle); 
  1.1659 +		//if dst is register 
  1.1660 +	//FIXME, do mips need out preserve stack slots?	
  1.1661 +		int offset_in_older_frame = src.first()->reg2stack() 
  1.1662 +			+ SharedRuntime::out_preserve_stack_slots();
  1.1663 +		map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
  1.1664 +		if (is_receiver) {
  1.1665 +			*receiver_offset = (offset_in_older_frame 
  1.1666 +					+ framesize_in_slots) * VMRegImpl::stack_slot_size;
  1.1667 +		}
  1.1668 +	} else {
  1.1669 +		// Oop is in an a register we must store it to the space we reserve
  1.1670 +		// on the stack for oop_handles
  1.1671 +		const Register rOop = src.first()->as_Register();
  1.1672 +		assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register");
  1.1673 +		//   const Register rHandle = eax;
  1.1674 +		const Register rHandle = V0;
  1.1675 +		//Important: refer to java_calling_convertion	
  1.1676 +		int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
  1.1677 +		int offset = oop_slot*VMRegImpl::stack_slot_size;
  1.1678 +		Label skip;
  1.1679 +		// __ movl(Address(esp, offset), rOop);
  1.1680 +		__ sd( rOop , SP, offset );
  1.1681 +		map->set_oop(VMRegImpl::stack2reg(oop_slot));
  1.1682 +		//    __ xorl(rHandle, rHandle);
  1.1683 +		__ xorr( rHandle, rHandle, rHandle);
  1.1684 +		//__ cmpl(rOop, NULL_WORD);
  1.1685 +		// __ jcc(Assembler::equal, skip);
  1.1686 +		__ beq(rOop, R0, skip); 
  1.1687 +		__ delayed()->nop(); 
  1.1688 +		//  __ leal(rHandle, Address(esp, offset));
  1.1689 +		__ lea(rHandle, Address(SP, offset));
  1.1690 +		__ bind(skip);
  1.1691 +		// Store the handle parameter
  1.1692 +		//__ movl(Address(esp, reg2offset_out(dst.first())), rHandle);
  1.1693 +		if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first()));
  1.1694 +		else                       __ move((dst.first())->as_Register(), rHandle); 
  1.1695 +		//if dst is register 
  1.1696 +
  1.1697 +		if (is_receiver) {
  1.1698 +			*receiver_offset = offset;
  1.1699 +		}
  1.1700 +	}
  1.1701 +}
  1.1702 +
  1.1703 +// A float arg may have to do float reg int reg conversion
  1.1704 +static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
  1.1705 +  assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
  1.1706 +
  1.1707 +	if (src.first()->is_stack()) {
  1.1708 +		if(dst.first()->is_stack()){
  1.1709 +			//  __ movl(eax, Address(ebp, reg2offset_in(src.first())));
  1.1710 +			__ lwc1(F12 , FP, reg2offset_in(src.first()));
  1.1711 +			// __ movl(Address(esp, reg2offset_out(dst.first())), eax);
  1.1712 +			__ swc1(F12 ,SP, reg2offset_out(dst.first()));
  1.1713 +		}	
  1.1714 +		else
  1.1715 +			__ lwc1( dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first())); 
  1.1716 +	} else {
  1.1717 +		// reg to stack
  1.1718 +		// __ movss(Address(esp, reg2offset_out(dst.first())), 
  1.1719 +		// src.first()->as_XMMRegister());
  1.1720 +		// __ movl(Address(esp, reg2offset_out(dst.first())), eax);
  1.1721 +		if(dst.first()->is_stack())
  1.1722 +			__ swc1( src.first()->as_FloatRegister(),SP, reg2offset_out(dst.first()));
  1.1723 +		else
  1.1724 +			__ mov_s( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); 
  1.1725 +	}
  1.1726 +}
  1.1727 +/*
  1.1728 +static void split_long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
  1.1729 +  VMRegPair src_lo(src.first());
  1.1730 +  VMRegPair src_hi(src.second());
  1.1731 +  VMRegPair dst_lo(dst.first());
  1.1732 +  VMRegPair dst_hi(dst.second());
  1.1733 +  simple_move32(masm, src_lo, dst_lo);
  1.1734 +  simple_move32(masm, src_hi, dst_hi);
  1.1735 +}
  1.1736 +*/
  1.1737 +// A long move
  1.1738 +static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
  1.1739 +
  1.1740 +	// The only legal possibility for a long_move VMRegPair is:
  1.1741 +	// 1: two stack slots (possibly unaligned)
  1.1742 +	// as neither the java  or C calling convention will use registers
  1.1743 +	// for longs.
  1.1744 +
  1.1745 +	if (src.first()->is_stack()) {
  1.1746 +		assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack");
  1.1747 +		//  __ movl(eax, Address(ebp, reg2offset_in(src.first())));
  1.1748 +		if( dst.first()->is_stack()){ 
  1.1749 +			__ ld(AT, FP, reg2offset_in(src.first()));
  1.1750 +			//  __ movl(ebx, address(ebp, reg2offset_in(src.second())));
  1.1751 +			//__ lw(V0, FP, reg2offset_in(src.second())); 
  1.1752 +			// __ movl(address(esp, reg2offset_out(dst.first())), eax);
  1.1753 +			__ sd(AT, SP, reg2offset_out(dst.first()));
  1.1754 +			// __ movl(address(esp, reg2offset_out(dst.second())), ebx);
  1.1755 +			//__ sw(V0, SP,  reg2offset_out(dst.second())); 
  1.1756 +		} else{
  1.1757 +			__ ld( (dst.first())->as_Register() , FP, reg2offset_in(src.first()));
  1.1758 +			//__ lw( (dst.second())->as_Register(), FP, reg2offset_in(src.second())); 
  1.1759 +		} 
  1.1760 +	} else {
  1.1761 +		if( dst.first()->is_stack()){ 
  1.1762 +			__ sd( (src.first())->as_Register(), SP, reg2offset_out(dst.first()));
  1.1763 +			//__ sw( (src.second())->as_Register(), SP,  reg2offset_out(dst.second())); 
  1.1764 +		} else{
  1.1765 +			__ move( (dst.first())->as_Register() , (src.first())->as_Register());
  1.1766 +			//__ move( (dst.second())->as_Register(), (src.second())->as_Register()); 
  1.1767 +		} 
  1.1768 +	}
  1.1769 +}
  1.1770 +
  1.1771 +// A double move
  1.1772 +static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
  1.1773 +
  1.1774 +	// The only legal possibilities for a double_move VMRegPair are:
  1.1775 +	// The painful thing here is that like long_move a VMRegPair might be
  1.1776 +
  1.1777 +	// Because of the calling convention we know that src is either
  1.1778 +	//   1: a single physical register (xmm registers only)
  1.1779 +	//   2: two stack slots (possibly unaligned)
  1.1780 +	// dst can only be a pair of stack slots.
  1.1781 +
  1.1782 +	// assert(dst.first()->is_stack() && (src.first()->is_XMMRegister() || 
  1.1783 +	// src.first()->is_stack()), "bad args");
  1.1784 +	//  assert(dst.first()->is_stack() || src.first()->is_stack()), "bad args");
  1.1785 +
  1.1786 +	if (src.first()->is_stack()) {
  1.1787 +		// source is all stack
  1.1788 +		// __ movl(eax, Address(ebp, reg2offset_in(src.first())));
  1.1789 +		if( dst.first()->is_stack()){ 
  1.1790 +			__ ldc1(F12, FP, reg2offset_in(src.first()));
  1.1791 +			//__ movl(ebx, Address(ebp, reg2offset_in(src.second())));
  1.1792 +			//__ lwc1(F14, FP, reg2offset_in(src.second()));
  1.1793 +
  1.1794 +			//   __ movl(Address(esp, reg2offset_out(dst.first())), eax);
  1.1795 +			__ sdc1(F12, SP, reg2offset_out(dst.first())); 
  1.1796 +			//  __ movl(Address(esp, reg2offset_out(dst.second())), ebx);
  1.1797 +			//__ swc1(F14, SP, reg2offset_out(dst.second()));
  1.1798 +		} else{
  1.1799 +			__ ldc1( (dst.first())->as_FloatRegister(), FP, reg2offset_in(src.first()));
  1.1800 +			//__ lwc1( (dst.second())->as_FloatRegister(), FP, reg2offset_in(src.second()));
  1.1801 +		} 
  1.1802 +
  1.1803 +	} else {
  1.1804 +		// reg to stack
  1.1805 +		// No worries about stack alignment
  1.1806 +		// __ movsd(Address(esp, reg2offset_out(dst.first())), src.first()->as_XMMRegister());
  1.1807 +		if( dst.first()->is_stack()){ 
  1.1808 +			__ sdc1( src.first()->as_FloatRegister(),SP, reg2offset_out(dst.first()));
  1.1809 +			//__ swc1( src.second()->as_FloatRegister(),SP, reg2offset_out(dst.second()));
  1.1810 +		}
  1.1811 +		else
  1.1812 +			__ mov_d( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
  1.1813 +			//__ mov_s( dst.second()->as_FloatRegister(), src.second()->as_FloatRegister()); 
  1.1814 +
  1.1815 +	}
  1.1816 +}
  1.1817 +
  1.1818 +static void verify_oop_args(MacroAssembler* masm,
  1.1819 +                            methodHandle method,
  1.1820 +                            const BasicType* sig_bt,
  1.1821 +                            const VMRegPair* regs) {
  1.1822 +  Register temp_reg = T9;  // not part of any compiled calling seq
  1.1823 +  if (VerifyOops) {
  1.1824 +    for (int i = 0; i < method->size_of_parameters(); i++) {
  1.1825 +      if (sig_bt[i] == T_OBJECT ||
  1.1826 +          sig_bt[i] == T_ARRAY) {
  1.1827 +        VMReg r = regs[i].first();
  1.1828 +        assert(r->is_valid(), "bad oop arg");
  1.1829 +        if (r->is_stack()) {
  1.1830 +//          __ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
  1.1831 +          __ ld(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
  1.1832 +          __ verify_oop(temp_reg);
  1.1833 +        } else {
  1.1834 +          __ verify_oop(r->as_Register());
  1.1835 +        }
  1.1836 +      }
  1.1837 +    }
  1.1838 +  }
  1.1839 +}
  1.1840 +
  1.1841 +static void gen_special_dispatch(MacroAssembler* masm,
  1.1842 +                                 methodHandle method,
  1.1843 +                                 const BasicType* sig_bt,
  1.1844 +                                 const VMRegPair* regs) {
  1.1845 +  verify_oop_args(masm, method, sig_bt, regs);
  1.1846 +  vmIntrinsics::ID iid = method->intrinsic_id();
  1.1847 +
  1.1848 +  // Now write the args into the outgoing interpreter space
  1.1849 +  bool     has_receiver   = false;
  1.1850 +  Register receiver_reg   = noreg;
  1.1851 +  int      member_arg_pos = -1;
  1.1852 +  Register member_reg     = noreg;
  1.1853 +  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
  1.1854 +  if (ref_kind != 0) {
  1.1855 +    member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
  1.1856 +//    member_reg = rbx;  // known to be free at this point
  1.1857 +    member_reg = S3;  // known to be free at this point
  1.1858 +    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
  1.1859 +  } else if (iid == vmIntrinsics::_invokeBasic) {
  1.1860 +    has_receiver = true;
  1.1861 +  } else {
  1.1862 +    fatal(err_msg_res("unexpected intrinsic id %d", iid));
  1.1863 +  }
  1.1864 +
  1.1865 +  if (member_reg != noreg) {
  1.1866 +    // Load the member_arg into register, if necessary.
  1.1867 +    SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
  1.1868 +    VMReg r = regs[member_arg_pos].first();
  1.1869 +    if (r->is_stack()) {
  1.1870 +//      __ movptr(member_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
  1.1871 +      __ ld(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
  1.1872 +    } else {
  1.1873 +      // no data motion is needed
  1.1874 +      member_reg = r->as_Register();
  1.1875 +    }
  1.1876 +  }
  1.1877 +
  1.1878 +  if (has_receiver) {
  1.1879 +    // Make sure the receiver is loaded into a register.
  1.1880 +    assert(method->size_of_parameters() > 0, "oob");
  1.1881 +    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
  1.1882 +    VMReg r = regs[0].first();
  1.1883 +    assert(r->is_valid(), "bad receiver arg");
  1.1884 +    if (r->is_stack()) {
  1.1885 +      // Porting note:  This assumes that compiled calling conventions always
  1.1886 +      // pass the receiver oop in a register.  If this is not true on some
  1.1887 +      // platform, pick a temp and load the receiver from stack.
  1.1888 +      fatal("receiver always in a register");
  1.1889 +//      receiver_reg = j_rarg0;  // known to be free at this point
  1.1890 +      receiver_reg = SSR;  // known to be free at this point
  1.1891 +//      __ movptr(receiver_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
  1.1892 +      __ ld(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
  1.1893 +    } else {
  1.1894 +      // no data motion is needed
  1.1895 +      receiver_reg = r->as_Register();
  1.1896 +    }
  1.1897 +  }
  1.1898 +
  1.1899 +  // Figure out which address we are really jumping to:
  1.1900 +  MethodHandles::generate_method_handle_dispatch(masm, iid,
  1.1901 +                                                 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
  1.1902 +}
  1.1903 +
  1.1904 +// ---------------------------------------------------------------------------
  1.1905 +// Generate a native wrapper for a given method.  The method takes arguments
  1.1906 +// in the Java compiled code convention, marshals them to the native
  1.1907 +// convention (handlizes oops, etc), transitions to native, makes the call,
  1.1908 +// returns to java state (possibly blocking), unhandlizes any result and
  1.1909 +// returns.
  1.1910 +nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
  1.1911 +                                                methodHandle method,
  1.1912 +                                                int compile_id,
  1.1913 +                                                BasicType *in_sig_bt,
  1.1914 +                                                VMRegPair *in_regs,
  1.1915 +                                                BasicType ret_type) {
  1.1916 +
  1.1917 +  if (method->is_method_handle_intrinsic()) {
  1.1918 +    vmIntrinsics::ID iid = method->intrinsic_id();
  1.1919 +    intptr_t start = (intptr_t)__ pc();
  1.1920 +    int vep_offset = ((intptr_t)__ pc()) - start;
  1.1921 +
  1.1922 +    gen_special_dispatch(masm,
  1.1923 +                         method,
  1.1924 +                         in_sig_bt,
  1.1925 +                         in_regs);
  1.1926 +
  1.1927 +    int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
  1.1928 +    __ flush();
  1.1929 +    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
  1.1930 +    return nmethod::new_native_nmethod(method,
  1.1931 +                                       compile_id,
  1.1932 +                                       masm->code(),
  1.1933 +                                       vep_offset,
  1.1934 +                                       frame_complete,
  1.1935 +                                       stack_slots / VMRegImpl::slots_per_word,
  1.1936 +                                       in_ByteSize(-1),
  1.1937 +                                       in_ByteSize(-1),
  1.1938 +                                       (OopMapSet*)NULL);
  1.1939 +  }
  1.1940 +  bool is_critical_native = true;
  1.1941 +  address native_func = method->critical_native_function();
  1.1942 +  if (native_func == NULL) {
  1.1943 +    native_func = method->native_function();
  1.1944 +    is_critical_native = false;
  1.1945 +  }
  1.1946 +  assert(native_func != NULL, "must have function");
  1.1947 +
  1.1948 +  // Native nmethod wrappers never take possesion of the oop arguments.
  1.1949 +  // So the caller will gc the arguments. The only thing we need an
  1.1950 +  // oopMap for is if the call is static
  1.1951 +  //
  1.1952 +  // An OopMap for lock (and class if static), and one for the VM call itself
  1.1953 +  OopMapSet *oop_maps = new OopMapSet();
  1.1954 +
  1.1955 +	// We have received a description of where all the java arg are located
  1.1956 +	// on entry to the wrapper. We need to convert these args to where
  1.1957 +	// the jni function will expect them. To figure out where they go
  1.1958 +	// we convert the java signature to a C signature by inserting
  1.1959 +	// the hidden arguments as arg[0] and possibly arg[1] (static method)
  1.1960 +
  1.1961 +  const int total_in_args = method->size_of_parameters();
  1.1962 +  int total_c_args = total_in_args;
  1.1963 +  if (!is_critical_native) {
  1.1964 +    total_c_args += 1;
  1.1965 +    if (method->is_static()) {
  1.1966 +      total_c_args++;
  1.1967 +    }
  1.1968 +  } else {
  1.1969 +    for (int i = 0; i < total_in_args; i++) {
  1.1970 +      if (in_sig_bt[i] == T_ARRAY) {
  1.1971 +        total_c_args++;
  1.1972 +      }
  1.1973 +    }
  1.1974 +  }
  1.1975 +
  1.1976 +	BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
  1.1977 +	VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair,   total_c_args);
  1.1978 +  BasicType* in_elem_bt = NULL;
  1.1979 +
  1.1980 +  int argc = 0;
  1.1981 +  if (!is_critical_native) {
  1.1982 +    out_sig_bt[argc++] = T_ADDRESS;
  1.1983 +    if (method->is_static()) {
  1.1984 +      out_sig_bt[argc++] = T_OBJECT;
  1.1985 +    }
  1.1986 +
  1.1987 +    for (int i = 0; i < total_in_args ; i++ ) {
  1.1988 +      out_sig_bt[argc++] = in_sig_bt[i];
  1.1989 +    }
  1.1990 +  } else {
  1.1991 +    Thread* THREAD = Thread::current();
  1.1992 +    in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
  1.1993 +    SignatureStream ss(method->signature());
  1.1994 +    for (int i = 0; i < total_in_args ; i++ ) {
  1.1995 +      if (in_sig_bt[i] == T_ARRAY) {
  1.1996 +        // Arrays are passed as int, elem* pair
  1.1997 +        out_sig_bt[argc++] = T_INT;
  1.1998 +        out_sig_bt[argc++] = T_ADDRESS;
  1.1999 +        Symbol* atype = ss.as_symbol(CHECK_NULL);
  1.2000 +        const char* at = atype->as_C_string();
  1.2001 +        if (strlen(at) == 2) {
  1.2002 +          assert(at[0] == '[', "must be");
  1.2003 +          switch (at[1]) {
  1.2004 +            case 'B': in_elem_bt[i]  = T_BYTE; break;
  1.2005 +            case 'C': in_elem_bt[i]  = T_CHAR; break;
  1.2006 +            case 'D': in_elem_bt[i]  = T_DOUBLE; break;
  1.2007 +            case 'F': in_elem_bt[i]  = T_FLOAT; break;
  1.2008 +            case 'I': in_elem_bt[i]  = T_INT; break;
  1.2009 +            case 'J': in_elem_bt[i]  = T_LONG; break;
  1.2010 +            case 'S': in_elem_bt[i]  = T_SHORT; break;
  1.2011 +            case 'Z': in_elem_bt[i]  = T_BOOLEAN; break;
  1.2012 +            default: ShouldNotReachHere();
  1.2013 +          }
  1.2014 +        }
  1.2015 +      } else {
  1.2016 +        out_sig_bt[argc++] = in_sig_bt[i];
  1.2017 +        in_elem_bt[i] = T_VOID;
  1.2018 +      }
  1.2019 +      if (in_sig_bt[i] != T_VOID) {
  1.2020 +        assert(in_sig_bt[i] == ss.type(), "must match");
  1.2021 +        ss.next();
  1.2022 +      }
  1.2023 +    }
  1.2024 +  }
  1.2025 +
  1.2026 +  // Now figure out where the args must be stored and how much stack space
  1.2027 +  // they require (neglecting out_preserve_stack_slots but space for storing
  1.2028 +  // the 1st six register arguments). It's weird see int_stk_helper.
  1.2029 +  //
  1.2030 +  int out_arg_slots;
  1.2031 +  //out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
  1.2032 +	out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
  1.2033 +
  1.2034 +  // Compute framesize for the wrapper.  We need to handlize all oops in
  1.2035 +  // registers. We must create space for them here that is disjoint from
  1.2036 +  // the windowed save area because we have no control over when we might
  1.2037 +  // flush the window again and overwrite values that gc has since modified.
  1.2038 +  // (The live window race)
  1.2039 +  //
  1.2040 +  // We always just allocate 6 word for storing down these object. This allow
  1.2041 +  // us to simply record the base and use the Ireg number to decide which
  1.2042 +  // slot to use. (Note that the reg number is the inbound number not the
  1.2043 +  // outbound number).
  1.2044 +  // We must shuffle args to match the native convention, and include var-args space.
  1.2045 +
  1.2046 +  // Calculate the total number of stack slots we will need.
  1.2047 +
  1.2048 +  // First count the abi requirement plus all of the outgoing args
  1.2049 +  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
  1.2050 +
  1.2051 +  // Now the space for the inbound oop handle area
  1.2052 +  int total_save_slots = 9 * VMRegImpl::slots_per_word;  // 9 arguments passed in registers
  1.2053 +  if (is_critical_native) {
  1.2054 +    // Critical natives may have to call out so they need a save area
  1.2055 +    // for register arguments.
  1.2056 +    int double_slots = 0;
  1.2057 +    int single_slots = 0;
  1.2058 +    for ( int i = 0; i < total_in_args; i++) {
  1.2059 +      if (in_regs[i].first()->is_Register()) {
  1.2060 +        const Register reg = in_regs[i].first()->as_Register();
  1.2061 +        switch (in_sig_bt[i]) {
  1.2062 +          case T_BOOLEAN:
  1.2063 +          case T_BYTE:
  1.2064 +          case T_SHORT:
  1.2065 +          case T_CHAR:
  1.2066 +          case T_INT:  single_slots++; break;
  1.2067 +          case T_ARRAY:  // specific to LP64 (7145024)
  1.2068 +          case T_LONG: double_slots++; break;
  1.2069 +          default:  ShouldNotReachHere();
  1.2070 +        }
  1.2071 +      } else if (in_regs[i].first()->is_FloatRegister()) {
  1.2072 +        switch (in_sig_bt[i]) {
  1.2073 +          case T_FLOAT:  single_slots++; break;
  1.2074 +          case T_DOUBLE: double_slots++; break;
  1.2075 +          default:  ShouldNotReachHere();
  1.2076 +        }
  1.2077 +      }
  1.2078 +    }
  1.2079 +    total_save_slots = double_slots * 2 + single_slots;
  1.2080 +    // align the save area
  1.2081 +    if (double_slots != 0) {
  1.2082 +      stack_slots = round_to(stack_slots, 2);
  1.2083 +    }
  1.2084 +  }
  1.2085 +
  1.2086 +  int oop_handle_offset = stack_slots;
  1.2087 +//  stack_slots += 9*VMRegImpl::slots_per_word;	// T0, A0 ~ A7
  1.2088 +  stack_slots += total_save_slots;
  1.2089 +
  1.2090 +  // Now any space we need for handlizing a klass if static method
  1.2091 +
  1.2092 +	int klass_slot_offset = 0;
  1.2093 +	int klass_offset = -1;
  1.2094 +	int lock_slot_offset = 0;
  1.2095 +	bool is_static = false;
  1.2096 +	//int oop_temp_slot_offset = 0;
  1.2097 +
  1.2098 +  if (method->is_static()) {
  1.2099 +    klass_slot_offset = stack_slots;
  1.2100 +    stack_slots += VMRegImpl::slots_per_word;
  1.2101 +    klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
  1.2102 +    is_static = true;
  1.2103 +  }
  1.2104 +
  1.2105 +  // Plus a lock if needed
  1.2106 +
  1.2107 +  if (method->is_synchronized()) {
  1.2108 +    lock_slot_offset = stack_slots;
  1.2109 +    stack_slots += VMRegImpl::slots_per_word;
  1.2110 +  }
  1.2111 +
  1.2112 +  // Now a place to save return value or as a temporary for any gpr -> fpr moves
  1.2113 +	// + 2 for return address (which we own) and saved ebp
  1.2114 +  //stack_slots += 2;
  1.2115 +  stack_slots += 2 + 9 * VMRegImpl::slots_per_word;	// (T0, A0, A1, A2, A3, A4, A5, A6, A7)
  1.2116 +
  1.2117 +  // Ok The space we have allocated will look like:
  1.2118 +  //
  1.2119 +  //
  1.2120 +  // FP-> |                     |
  1.2121 +  //      |---------------------|
  1.2122 +  //      | 2 slots for moves   |
  1.2123 +  //      |---------------------|
  1.2124 +  //      | lock box (if sync)  |
  1.2125 +  //      |---------------------| <- lock_slot_offset
  1.2126 +  //      | klass (if static)   |
  1.2127 +  //      |---------------------| <- klass_slot_offset
  1.2128 +  //      | oopHandle area      |
  1.2129 +  //      |---------------------| <- oop_handle_offset
  1.2130 +  //      | outbound memory     |
  1.2131 +  //      | based arguments     |
  1.2132 +  //      |                     |
  1.2133 +  //      |---------------------|
  1.2134 +  //      | vararg area         |
  1.2135 +  //      |---------------------|
  1.2136 +  //      |                     |
  1.2137 +  // SP-> | out_preserved_slots |
  1.2138 +  //
  1.2139 +  //
  1.2140 +
  1.2141 +
  1.2142 +  // Now compute actual number of stack words we need rounding to make
  1.2143 +  // stack properly aligned.
  1.2144 +  stack_slots = round_to(stack_slots, StackAlignmentInSlots);
  1.2145 +
  1.2146 +  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
  1.2147 +
  1.2148 +	intptr_t start = (intptr_t)__ pc();
  1.2149 +
  1.2150 +
  1.2151 +
  1.2152 +	// First thing make an ic check to see if we should even be here
  1.2153 +	address ic_miss = SharedRuntime::get_ic_miss_stub();
  1.2154 +
  1.2155 +	// We are free to use all registers as temps without saving them and
  1.2156 +	// restoring them except ebp. ebp is the only callee save register
  1.2157 +	// as far as the interpreter and the compiler(s) are concerned.
  1.2158 +
  1.2159 +  //refer to register_mips.hpp:IC_Klass
  1.2160 +	const Register ic_reg = T1;
  1.2161 +	const Register receiver = T0;
  1.2162 +	Label hit;
  1.2163 +	Label exception_pending;
  1.2164 +
  1.2165 +	__ verify_oop(receiver);
  1.2166 +	//__ lw(AT, receiver, oopDesc::klass_offset_in_bytes()); 
  1.2167 +	//add for compressedoops
  1.2168 +	__ load_klass(AT, receiver);
  1.2169 +	__ beq(AT, ic_reg, hit); 
  1.2170 +	__ delayed()->nop(); 
  1.2171 +	__ jmp(ic_miss, relocInfo::runtime_call_type);
  1.2172 +	__ delayed()->nop();
  1.2173 +	// verified entry must be aligned for code patching.
  1.2174 +	// and the first 5 bytes must be in the same cache line
  1.2175 +	// if we align at 8 then we will be sure 5 bytes are in the same line
  1.2176 +	__ align(8);
  1.2177 +
  1.2178 +	__ bind(hit);
  1.2179 +
  1.2180 +
  1.2181 +	int vep_offset = ((intptr_t)__ pc()) - start;
  1.2182 +#ifdef COMPILER1
  1.2183 +	if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) {
  1.2184 +		// Object.hashCode can pull the hashCode from the header word
  1.2185 +		// instead of doing a full VM transition once it's been computed.
  1.2186 +		// Since hashCode is usually polymorphic at call sites we can't do
  1.2187 +		// this optimization at the call site without a lot of work.
  1.2188 +		Label slowCase;
  1.2189 +		Register receiver = T0;
  1.2190 +		Register result = V0;
  1.2191 +		__ ld ( result, receiver, oopDesc::mark_offset_in_bytes()); 
  1.2192 +		// check if locked
  1.2193 +		__ andi(AT, result, markOopDesc::unlocked_value); 
  1.2194 +		__ beq(AT, R0, slowCase); 
  1.2195 +		__ delayed()->nop(); 
  1.2196 +		if (UseBiasedLocking) {
  1.2197 +			// Check if biased and fall through to runtime if so
  1.2198 +			__ andi (AT, result, markOopDesc::biased_lock_bit_in_place);	  
  1.2199 +			__ bne(AT,R0, slowCase); 
  1.2200 +			__ delayed()->nop(); 
  1.2201 +		}
  1.2202 +		// get hash
  1.2203 +		__ li(AT, markOopDesc::hash_mask_in_place);
  1.2204 +		__ andr (AT, result, AT);
  1.2205 +		// test if hashCode exists
  1.2206 +		__ beq (AT, R0, slowCase); 
  1.2207 +		__ delayed()->nop(); 
  1.2208 +		__ shr(result, markOopDesc::hash_shift);
  1.2209 +		__ jr(RA); 
  1.2210 +		__ delayed()->nop(); 
  1.2211 +		__ bind (slowCase);
  1.2212 +	}
  1.2213 +#endif // COMPILER1
  1.2214 +
  1.2215 +	// The instruction at the verified entry point must be 5 bytes or longer
  1.2216 +	// because it can be patched on the fly by make_non_entrant. The stack bang
  1.2217 +	// instruction fits that requirement. 
  1.2218 +
  1.2219 +	// Generate stack overflow check
  1.2220 +
  1.2221 +	if (UseStackBanging) {
  1.2222 +	//this function will modify the value in A0	
  1.2223 +		__ push(A0);
  1.2224 +		__ bang_stack_with_offset(StackShadowPages*os::vm_page_size());
  1.2225 +		__ pop(A0);
  1.2226 +	} else {
  1.2227 +		// need a 5 byte instruction to allow MT safe patching to non-entrant
  1.2228 +		__ nop(); 
  1.2229 +		__ nop(); 
  1.2230 +		__ nop(); 
  1.2231 +		__ nop(); 
  1.2232 +		__ nop(); 
  1.2233 +	}
  1.2234 +	// Generate a new frame for the wrapper.
  1.2235 +	// do mips need this ? 
  1.2236 +#ifndef OPT_THREAD
  1.2237 +	__ get_thread(TREG);
  1.2238 +#endif
  1.2239 +//FIXME here
  1.2240 +	__ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
  1.2241 +	// -2 because return address is already present and so is saved ebp
  1.2242 +	__ move(AT, -(StackAlignmentInBytes));
  1.2243 +	__ andr(SP, SP, AT);
  1.2244 +
  1.2245 +	__ enter();
  1.2246 +	__ addiu(SP, SP, -1 * (stack_size - 2*wordSize));
  1.2247 +
  1.2248 +	// Frame is now completed as far a size and linkage.
  1.2249 +
  1.2250 +	int frame_complete = ((intptr_t)__ pc()) - start;
  1.2251 +
  1.2252 +	// Calculate the difference between esp and ebp. We need to know it
  1.2253 +	// after the native call because on windows Java Natives will pop
  1.2254 +	// the arguments and it is painful to do esp relative addressing
  1.2255 +	// in a platform independent way. So after the call we switch to
  1.2256 +	// ebp relative addressing.
  1.2257 +//FIXME actually , the fp_adjustment may not be the right, because andr(sp,sp,at)may change
  1.2258 +//the SP 
  1.2259 +	int fp_adjustment = stack_size - 2*wordSize;
  1.2260 +
  1.2261 +#ifdef COMPILER2
  1.2262 +	// C2 may leave the stack dirty if not in SSE2+ mode
  1.2263 +	// if (UseSSE >= 2) {
  1.2264 +	//  __ verify_FPU(0, "c2i transition should have clean FPU stack");
  1.2265 +	//} else {
  1.2266 +	__ empty_FPU_stack();
  1.2267 +	//}
  1.2268 +#endif /* COMPILER2 */
  1.2269 +
  1.2270 +	// Compute the ebp offset for any slots used after the jni call
  1.2271 +
  1.2272 +	int lock_slot_ebp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment;
  1.2273 +	// We use edi as a thread pointer because it is callee save and
  1.2274 +	// if we load it once it is usable thru the entire wrapper
  1.2275 +	// const Register thread = edi;
  1.2276 +	const Register thread = TREG;
  1.2277 +
  1.2278 +	// We use esi as the oop handle for the receiver/klass
  1.2279 +	// It is callee save so it survives the call to native
  1.2280 +
  1.2281 +	// const Register oop_handle_reg = esi;
  1.2282 +	const Register oop_handle_reg = S4;
  1.2283 +  if (is_critical_native) {
  1.2284 +     __ stop("generate_native_wrapper in sharedRuntime <2>");
  1.2285 +//TODO:Fu
  1.2286 +/*
  1.2287 +    check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args,
  1.2288 +                                       oop_handle_offset, oop_maps, in_regs, in_sig_bt);
  1.2289 +*/
  1.2290 +  }
  1.2291 +
  1.2292 +#ifndef OPT_THREAD
  1.2293 +	__ get_thread(thread);
  1.2294 +#endif
  1.2295 +
  1.2296 +  //
  1.2297 +  // We immediately shuffle the arguments so that any vm call we have to
  1.2298 +  // make from here on out (sync slow path, jvmpi, etc.) we will have
  1.2299 +  // captured the oops from our caller and have a valid oopMap for
  1.2300 +  // them.
  1.2301 +
  1.2302 +  // -----------------
  1.2303 +  // The Grand Shuffle 
  1.2304 +  //
  1.2305 +  // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
  1.2306 +  // and, if static, the class mirror instead of a receiver.  This pretty much
  1.2307 +  // guarantees that register layout will not match (and mips doesn't use reg
  1.2308 +  // parms though amd does).  Since the native abi doesn't use register args
  1.2309 +  // and the java conventions does we don't have to worry about collisions.
  1.2310 +  // All of our moved are reg->stack or stack->stack.
  1.2311 +  // We ignore the extra arguments during the shuffle and handle them at the
  1.2312 +  // last moment. The shuffle is described by the two calling convention
  1.2313 +  // vectors we have in our possession. We simply walk the java vector to
  1.2314 +  // get the source locations and the c vector to get the destinations.
  1.2315 +
  1.2316 +	int c_arg = method->is_static() ? 2 : 1 ;
  1.2317 +
  1.2318 +	// Record esp-based slot for receiver on stack for non-static methods
  1.2319 +	int receiver_offset = -1;
  1.2320 +
  1.2321 +	// This is a trick. We double the stack slots so we can claim
  1.2322 +	// the oops in the caller's frame. Since we are sure to have
  1.2323 +	// more args than the caller doubling is enough to make
  1.2324 +	// sure we can capture all the incoming oop args from the
  1.2325 +	// caller. 
  1.2326 +	//
  1.2327 +	OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
  1.2328 +
  1.2329 +  // Mark location of rbp (someday)
  1.2330 +  // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(rbp));
  1.2331 +
  1.2332 +  // Use eax, ebx as temporaries during any memory-memory moves we have to do
  1.2333 +  // All inbound args are referenced based on rbp and all outbound args via rsp.
  1.2334 +
  1.2335 +
  1.2336 +
  1.2337 +#ifdef ASSERT
  1.2338 +  bool reg_destroyed[RegisterImpl::number_of_registers];
  1.2339 +  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
  1.2340 +  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
  1.2341 +    reg_destroyed[r] = false;
  1.2342 +  }
  1.2343 +  for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
  1.2344 +    freg_destroyed[f] = false;
  1.2345 +  }
  1.2346 +
  1.2347 +#endif /* ASSERT */
  1.2348 +
  1.2349 +	// We know that we only have args in at most two integer registers (ecx, edx). So eax, ebx
  1.2350 +	// Are free to temporaries if we have to do  stack to steck moves.
  1.2351 +	// All inbound args are referenced based on ebp and all outbound args via esp.
  1.2352 +
  1.2353 +  // This may iterate in two different directions depending on the
  1.2354 +  // kind of native it is.  The reason is that for regular JNI natives
  1.2355 +  // the incoming and outgoing registers are offset upwards and for
  1.2356 +  // critical natives they are offset down.
  1.2357 +  GrowableArray<int> arg_order(2 * total_in_args);
  1.2358 +  VMRegPair tmp_vmreg;
  1.2359 +//  tmp_vmreg.set1(rbx->as_VMReg());
  1.2360 +  tmp_vmreg.set1(T8->as_VMReg());
  1.2361 +
  1.2362 +  if (!is_critical_native) {
  1.2363 +    for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
  1.2364 +      arg_order.push(i);
  1.2365 +      arg_order.push(c_arg);
  1.2366 +    }
  1.2367 +  } else {
  1.2368 +    // Compute a valid move order, using tmp_vmreg to break any cycles
  1.2369 +     __ stop("generate_native_wrapper in sharedRuntime <2>");
  1.2370 +//TODO:Fu
  1.2371 +//    ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg);
  1.2372 +  }
  1.2373 +
  1.2374 +  int temploc = -1;
  1.2375 +  for (int ai = 0; ai < arg_order.length(); ai += 2) {
  1.2376 +    int i = arg_order.at(ai);
  1.2377 +    int c_arg = arg_order.at(ai + 1);
  1.2378 +    __ block_comment(err_msg("move %d -> %d", i, c_arg));
  1.2379 +    if (c_arg == -1) {
  1.2380 +      assert(is_critical_native, "should only be required for critical natives");
  1.2381 +      // This arg needs to be moved to a temporary
  1.2382 +      __ move(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register());
  1.2383 +      in_regs[i] = tmp_vmreg;
  1.2384 +      temploc = i;
  1.2385 +      continue;
  1.2386 +    } else if (i == -1) {
  1.2387 +      assert(is_critical_native, "should only be required for critical natives");
  1.2388 +      // Read from the temporary location
  1.2389 +      assert(temploc != -1, "must be valid");
  1.2390 +      i = temploc;
  1.2391 +      temploc = -1;
  1.2392 +    }
  1.2393 +#ifdef ASSERT
  1.2394 +    if (in_regs[i].first()->is_Register()) {
  1.2395 +      assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
  1.2396 +    } else if (in_regs[i].first()->is_FloatRegister()) {
  1.2397 +      assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
  1.2398 +    }
  1.2399 +    if (out_regs[c_arg].first()->is_Register()) {
  1.2400 +      reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
  1.2401 +    } else if (out_regs[c_arg].first()->is_FloatRegister()) {
  1.2402 +      freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
  1.2403 +    }
  1.2404 +#endif /* ASSERT */
  1.2405 +    switch (in_sig_bt[i]) {
  1.2406 +      case T_ARRAY:
  1.2407 +        if (is_critical_native) {
  1.2408 +	  __ stop("generate_native_wrapper in sharedRuntime <2>");
  1.2409 +         //TODO:Fu
  1.2410 +         // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
  1.2411 +          c_arg++;
  1.2412 +#ifdef ASSERT
  1.2413 +          if (out_regs[c_arg].first()->is_Register()) {
  1.2414 +            reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
  1.2415 +          } else if (out_regs[c_arg].first()->is_FloatRegister()) {
  1.2416 +            freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
  1.2417 +          }
  1.2418 +#endif
  1.2419 +          break;
  1.2420 +        }
  1.2421 +      case T_OBJECT:
  1.2422 +        assert(!is_critical_native, "no oop arguments");
  1.2423 +        object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
  1.2424 +                    ((i == 0) && (!is_static)),
  1.2425 +                    &receiver_offset);
  1.2426 +        break;
  1.2427 +      case T_VOID:
  1.2428 +        break;
  1.2429 +
  1.2430 +      case T_FLOAT:
  1.2431 +        float_move(masm, in_regs[i], out_regs[c_arg]);
  1.2432 +          break;
  1.2433 +
  1.2434 +      case T_DOUBLE:
  1.2435 +        assert( i + 1 < total_in_args &&
  1.2436 +                in_sig_bt[i + 1] == T_VOID &&
  1.2437 +                out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
  1.2438 +        double_move(masm, in_regs[i], out_regs[c_arg]);
  1.2439 +        break;
  1.2440 +
  1.2441 +      case T_LONG :
  1.2442 +        long_move(masm, in_regs[i], out_regs[c_arg]);
  1.2443 +        break;
  1.2444 +
  1.2445 +      case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
  1.2446 +
  1.2447 +      default:
  1.2448 +//        move32_64(masm, in_regs[i], out_regs[c_arg]);
  1.2449 +        simple_move32(masm, in_regs[i], out_regs[c_arg]);
  1.2450 +    }
  1.2451 +  }
  1.2452 +
  1.2453 +  // point c_arg at the first arg that is already loaded in case we
  1.2454 +  // need to spill before we call out
  1.2455 +   c_arg = total_c_args - total_in_args;
  1.2456 +	// Pre-load a static method's oop into esi.  Used both by locking code and
  1.2457 +	// the normal JNI call code.
  1.2458 +	
  1.2459 +	__ move(oop_handle_reg, A1);
  1.2460 +
  1.2461 +	if (method->is_static() && !is_critical_native) {
  1.2462 +
  1.2463 +		//  load opp into a register
  1.2464 +		int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local(
  1.2465 +					(method->method_holder())->java_mirror()));
  1.2466 +
  1.2467 +		
  1.2468 +		RelocationHolder rspec = oop_Relocation::spec(oop_index);
  1.2469 +		__ relocate(rspec);
  1.2470 +		//__ lui(oop_handle_reg, Assembler::split_high((int)JNIHandles::make_local(
  1.2471 +		//	Klass::cast(method->method_holder())->java_mirror())));
  1.2472 +		//__ addiu(oop_handle_reg, oop_handle_reg, Assembler::split_low((int)
  1.2473 +		//    JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror())));
  1.2474 +		__ li48(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror()));
  1.2475 +	//	__ verify_oop(oop_handle_reg);
  1.2476 +		// Now handlize the static class mirror it's known not-null.
  1.2477 +		__ sd( oop_handle_reg, SP, klass_offset); 
  1.2478 +		map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
  1.2479 +		
  1.2480 +		// Now get the handle
  1.2481 +		__ lea(oop_handle_reg, Address(SP, klass_offset));
  1.2482 +		// store the klass handle as second argument
  1.2483 +		__ move(A1, oop_handle_reg);
  1.2484 +                // and protect the arg if we must spill
  1.2485 +                c_arg--;
  1.2486 +	}
  1.2487 +  // Change state to native (we save the return address in the thread, since it might not
  1.2488 +  // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
  1.2489 +  // points into the right code segment. It does not have to be the correct return pc.
  1.2490 +  // We use the same pc/oopMap repeatedly when we call out
  1.2491 +
  1.2492 +	intptr_t the_pc = (intptr_t) __ pc();
  1.2493 +	
  1.2494 +	oop_maps->add_gc_map(the_pc - start, map);
  1.2495 +
  1.2496 +	//__ set_last_Java_frame(thread, esp, noreg, (address)the_pc);
  1.2497 +	__ set_last_Java_frame(SP, noreg, NULL);
  1.2498 +	__ relocate(relocInfo::internal_pc_type); 
  1.2499 +	{	
  1.2500 +		intptr_t save_pc = (intptr_t)the_pc ;
  1.2501 +		__ li48(AT, save_pc);
  1.2502 +	}
  1.2503 +	__ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
  1.2504 + 
  1.2505 +
  1.2506 +	// We have all of the arguments setup at this point. We must not touch any register
  1.2507 +	// argument registers at this point (what if we save/restore them there are no oop?
  1.2508 +	{ 
  1.2509 +		SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
  1.2510 +		int metadata_index = __ oop_recorder()->find_index(method());
  1.2511 +		RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
  1.2512 +		__ relocate(rspec);
  1.2513 +		//__ lui(T6, Assembler::split_high((int)JNIHandles::make_local(method())));
  1.2514 +		//__ addiu(T6, T6, Assembler::split_low((int)JNIHandles::make_local(method())));
  1.2515 +		__ li48(AT, (long)(method()));
  1.2516 +
  1.2517 +		__ call_VM_leaf(
  1.2518 +				CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), 
  1.2519 +		   thread, AT); 
  1.2520 +
  1.2521 +	}
  1.2522 +
  1.2523 +  // These are register definitions we need for locking/unlocking 
  1.2524 +//  const Register swap_reg = eax;  // Must use eax for cmpxchg instruction
  1.2525 +//  const Register obj_reg  = ecx;  // Will contain the oop
  1.2526 + // const Register lock_reg = edx;  // Address of compiler lock object (BasicLock)
  1.2527 +//FIXME, I hava no idea which register to use
  1.2528 +	const Register swap_reg = T8;  // Must use eax for cmpxchg instruction
  1.2529 +	const Register obj_reg  = T9;  // Will contain the oop
  1.2530 +	//const Register lock_reg = T6;  // Address of compiler lock object (BasicLock)
  1.2531 +	const Register lock_reg = c_rarg0;  // Address of compiler lock object (BasicLock)
  1.2532 +
  1.2533 +
  1.2534 +
  1.2535 +	Label slow_path_lock;
  1.2536 +	Label lock_done;
  1.2537 +
  1.2538 +	// Lock a synchronized method
  1.2539 +	if (method->is_synchronized()) {
  1.2540 +                assert(!is_critical_native, "unhandled");
  1.2541 +
  1.2542 +		const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
  1.2543 +
  1.2544 +		// Get the handle (the 2nd argument)
  1.2545 +		__ move(oop_handle_reg, A1);
  1.2546 +
  1.2547 +		// Get address of the box
  1.2548 +		__ lea(lock_reg, Address(FP, lock_slot_ebp_offset));
  1.2549 +
  1.2550 +		// Load the oop from the handle 
  1.2551 +		__ ld(obj_reg, oop_handle_reg, 0);
  1.2552 +
  1.2553 +		if (UseBiasedLocking) {
  1.2554 +			// Note that oop_handle_reg is trashed during this call
  1.2555 +		__ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, 
  1.2556 +				false, lock_done, &slow_path_lock);
  1.2557 +		}
  1.2558 +
  1.2559 +		// Load immediate 1 into swap_reg %eax
  1.2560 +		__ move(swap_reg, 1);
  1.2561 +
  1.2562 +		__ ld(AT, obj_reg, 0);   
  1.2563 +		__ orr(swap_reg, swap_reg, AT); 
  1.2564 +
  1.2565 +		__ sd( swap_reg, lock_reg, mark_word_offset);
  1.2566 +		__ cmpxchg(lock_reg, Address(obj_reg, 0), swap_reg);
  1.2567 +		__ bne(AT, R0, lock_done);
  1.2568 +		__ delayed()->nop(); 
  1.2569 +		// Test if the oopMark is an obvious stack pointer, i.e.,
  1.2570 +		//  1) (mark & 3) == 0, and
  1.2571 +		//  2) esp <= mark < mark + os::pagesize()
  1.2572 +		// These 3 tests can be done by evaluating the following
  1.2573 +		// expression: ((mark - esp) & (3 - os::vm_page_size())),
  1.2574 +		// assuming both stack pointer and pagesize have their
  1.2575 +		// least significant 2 bits clear.
  1.2576 +		// NOTE: the oopMark is in swap_reg %eax as the result of cmpxchg
  1.2577 +
  1.2578 +		__ dsub(swap_reg, swap_reg,SP);
  1.2579 + 		__ move(AT, 3 - os::vm_page_size());
  1.2580 +		__ andr(swap_reg , swap_reg, AT);
  1.2581 +		// Save the test result, for recursive case, the result is zero
  1.2582 +		__ sd(swap_reg, lock_reg, mark_word_offset); 
  1.2583 +	//FIXME here, Why notEqual? 	
  1.2584 +		__ bne(swap_reg,R0, slow_path_lock);
  1.2585 +		__ delayed()->nop();  
  1.2586 +		// Slow path will re-enter here
  1.2587 +		__ bind(lock_done);
  1.2588 +
  1.2589 +		if (UseBiasedLocking) {
  1.2590 +			// Re-fetch oop_handle_reg as we trashed it above
  1.2591 +			__ move(A1, oop_handle_reg);
  1.2592 +		}
  1.2593 +	}
  1.2594 +
  1.2595 +
  1.2596 +	// Finally just about ready to make the JNI call
  1.2597 +
  1.2598 +
  1.2599 +	// get JNIEnv* which is first argument to native
  1.2600 +  if (!is_critical_native) {
  1.2601 +	__ addi(A0, thread, in_bytes(JavaThread::jni_environment_offset()));
  1.2602 +  }
  1.2603 +
  1.2604 +	// Example: Java_java_lang_ref_Finalizer_invokeFinalizeMethod(JNIEnv *env, jclass clazz, jobject ob)
  1.2605 +	/* Load the second arguments into A1 */
  1.2606 +	//__ ld(A1, SP , wordSize ); 	// klass
  1.2607 +
  1.2608 +	// Now set thread in native
  1.2609 +	__ addi(AT, R0, _thread_in_native); 
  1.2610 +	__ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); 
  1.2611 +	/* Jin: do the call */
  1.2612 +	__ call(method->native_function(), relocInfo::runtime_call_type);
  1.2613 +	__ delayed()->nop();
  1.2614 +	// WARNING - on Windows Java Natives use pascal calling convention and pop the
  1.2615 +	// arguments off of the stack. We could just re-adjust the stack pointer here
  1.2616 +	// and continue to do SP relative addressing but we instead switch to FP
  1.2617 +	// relative addressing.
  1.2618 +
  1.2619 +	// Unpack native results.  
  1.2620 +	switch (ret_type) {
  1.2621 +	case T_BOOLEAN: __ c2bool(V0);            break;
  1.2622 +	case T_CHAR   : __ andi(V0,V0, 0xFFFF);      break;
  1.2623 +	case T_BYTE   : __ sign_extend_byte (V0); break;
  1.2624 +	case T_SHORT  : __ sign_extend_short(V0); break;
  1.2625 +	case T_INT    : // nothing to do         break;
  1.2626 +	case T_DOUBLE :
  1.2627 +	case T_FLOAT  :
  1.2628 +	// Result is in st0 we'll save as needed
  1.2629 +	break;
  1.2630 +	case T_ARRAY:                 // Really a handle
  1.2631 +	case T_OBJECT:                // Really a handle
  1.2632 +	break; // can't de-handlize until after safepoint check
  1.2633 +	case T_VOID: break;
  1.2634 +	case T_LONG: break;
  1.2635 +	default       : ShouldNotReachHere();
  1.2636 +	}
  1.2637 +	// Switch thread to "native transition" state before reading the synchronization state.
  1.2638 +	// This additional state is necessary because reading and testing the synchronization
  1.2639 +	// state is not atomic w.r.t. GC, as this scenario demonstrates:
  1.2640 +	//     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
  1.2641 +	//     VM thread changes sync state to synchronizing and suspends threads for GC.
  1.2642 +	//     Thread A is resumed to finish this native method, but doesn't block here since it
  1.2643 +	//     didn't see any synchronization is progress, and escapes.
  1.2644 +	// __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans);    
  1.2645 +	//__ sw(_thread_in_native_trans, thread, JavaThread::thread_state_offset());    
  1.2646 +	//   __ move(AT, (int)_thread_in_native_trans);
  1.2647 +	__ addi(AT, R0, _thread_in_native_trans); 
  1.2648 +	__ sw(AT, thread, in_bytes(JavaThread::thread_state_offset()));    
  1.2649 +	
  1.2650 +  Label after_transition;
  1.2651 +
  1.2652 +	// check for safepoint operation in progress and/or pending suspend requests
  1.2653 +	{ Label Continue;
  1.2654 +//FIXME here, which regiser should we use?
  1.2655 +		//        SafepointSynchronize::_not_synchronized);
  1.2656 +		__ li(AT, SafepointSynchronize::address_of_state());
  1.2657 +		__ lw(A0, AT, 0);	
  1.2658 +		__ addi(AT, A0, -SafepointSynchronize::_not_synchronized); 
  1.2659 +		Label L;
  1.2660 +		__ bne(AT,R0, L); 
  1.2661 +		__ delayed()->nop();	
  1.2662 +		__ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); 
  1.2663 +		__ beq(AT, R0, Continue); 
  1.2664 +		__ delayed()->nop(); 
  1.2665 +		__ bind(L);
  1.2666 +
  1.2667 +		// Don't use call_VM as it will see a possible pending exception and forward it
  1.2668 +		// and never return here preventing us from clearing _last_native_pc down below.
  1.2669 +		// Also can't use call_VM_leaf either as it will check to see if esi & edi are
  1.2670 +		// preserved and correspond to the bcp/locals pointers. So we do a runtime call
  1.2671 +		// by hand.
  1.2672 +		//
  1.2673 +		save_native_result(masm, ret_type, stack_slots);
  1.2674 +		__ move (A0, thread); 
  1.2675 +		__ addi(SP,SP, -wordSize); 
  1.2676 +    if (!is_critical_native) {
  1.2677 +      __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type);
  1.2678 +      __ delayed()->nop(); 
  1.2679 +    } else {
  1.2680 +      __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), relocInfo::runtime_call_type);
  1.2681 +      __ delayed()->nop(); 
  1.2682 +    }
  1.2683 +//		__ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type);
  1.2684 +//		__ delayed()->nop(); 
  1.2685 +		__ addi(SP,SP, wordSize); 
  1.2686 +		//add for compressedoops
  1.2687 +		__ reinit_heapbase();
  1.2688 +		// Restore any method result value
  1.2689 +		restore_native_result(masm, ret_type, stack_slots);
  1.2690 +
  1.2691 +    if (is_critical_native) {
  1.2692 +      // The call above performed the transition to thread_in_Java so
  1.2693 +      // skip the transition logic below.
  1.2694 +      __ beq(R0, R0, after_transition);
  1.2695 +      __ delayed()->nop(); 
  1.2696 +    }
  1.2697 +
  1.2698 +		__ bind(Continue);
  1.2699 +	}
  1.2700 +
  1.2701 +	// change thread state
  1.2702 +	__ addi(AT, R0, _thread_in_Java); 
  1.2703 +	__ sw(AT,  thread, in_bytes(JavaThread::thread_state_offset())); 
  1.2704 +  __ bind(after_transition);
  1.2705 +	Label reguard;
  1.2706 +	Label reguard_done;
  1.2707 +	__ ld(AT, thread, in_bytes(JavaThread::stack_guard_state_offset())); 
  1.2708 +	__ addi(AT, AT, -JavaThread::stack_guard_yellow_disabled); 
  1.2709 +	__ beq(AT, R0, reguard);
  1.2710 +	__ delayed()->nop();  
  1.2711 +	// slow path reguard  re-enters here
  1.2712 +	__ bind(reguard_done);
  1.2713 +
  1.2714 +	// Handle possible exception (will unlock if necessary)
  1.2715 +
  1.2716 +	// native result if any is live 
  1.2717 +
  1.2718 +	// Unlock
  1.2719 +	Label slow_path_unlock;
  1.2720 +	Label unlock_done;
  1.2721 +	if (method->is_synchronized()) {
  1.2722 +
  1.2723 +		Label done;
  1.2724 +
  1.2725 +		// Get locked oop from the handle we passed to jni
  1.2726 +		__ ld( obj_reg, oop_handle_reg, 0);
  1.2727 +		//FIXME 
  1.2728 +		if (UseBiasedLocking) {
  1.2729 +			__ biased_locking_exit(obj_reg, T8, done);
  1.2730 +
  1.2731 +		}
  1.2732 +
  1.2733 +		// Simple recursive lock?
  1.2734 +
  1.2735 +		__ ld(AT, FP, lock_slot_ebp_offset); 
  1.2736 +		__ beq(AT, R0, done);
  1.2737 +		__ delayed()->nop();	
  1.2738 +		// Must save eax if if it is live now because cmpxchg must use it
  1.2739 +		if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
  1.2740 +			save_native_result(masm, ret_type, stack_slots);
  1.2741 +		}
  1.2742 +
  1.2743 +		//  get old displaced header
  1.2744 +		__ ld (T8, FP, lock_slot_ebp_offset);
  1.2745 +		// get address of the stack lock
  1.2746 +		//FIXME aoqi
  1.2747 +		//__ addi (T6, FP, lock_slot_ebp_offset);
  1.2748 +		__ addi (c_rarg0, FP, lock_slot_ebp_offset);
  1.2749 +		// Atomic swap old header if oop still contains the stack lock
  1.2750 +		//FIXME aoqi
  1.2751 +		//__ cmpxchg(T8, Address(obj_reg, 0),T6 );
  1.2752 +		__ cmpxchg(T8, Address(obj_reg, 0), c_rarg0);
  1.2753 +
  1.2754 +		__ beq(AT, R0, slow_path_unlock);
  1.2755 +		__ delayed()->nop(); 
  1.2756 +		// slow path re-enters here
  1.2757 +		__ bind(unlock_done);
  1.2758 +		if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
  1.2759 +			restore_native_result(masm, ret_type, stack_slots);
  1.2760 +		}
  1.2761 +
  1.2762 +		__ bind(done);
  1.2763 +
  1.2764 +	}
  1.2765 +	{ 
  1.2766 +		SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
  1.2767 +		// Tell dtrace about this method exit
  1.2768 +		save_native_result(masm, ret_type, stack_slots);
  1.2769 +		int metadata_index = __ oop_recorder()->find_index( (method()));
  1.2770 +		RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
  1.2771 +		__ relocate(rspec);
  1.2772 +		//__ lui(T6, Assembler::split_high((int)JNIHandles::make_local(method())));
  1.2773 +		//__ addiu(T6, T6, Assembler::split_low((int)JNIHandles::make_local(method())));
  1.2774 +		__ li48(AT, (long)(method()));
  1.2775 +
  1.2776 +		__ call_VM_leaf(
  1.2777 +				CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), 
  1.2778 +				thread, AT);
  1.2779 +		restore_native_result(masm, ret_type, stack_slots);
  1.2780 +	}
  1.2781 +
  1.2782 +	// We can finally stop using that last_Java_frame we setup ages ago
  1.2783 +
  1.2784 +	__ reset_last_Java_frame(false, true);
  1.2785 +
  1.2786 +	// Unpack oop result
  1.2787 +	if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
  1.2788 +		Label L;
  1.2789 +		//  __ cmpl(eax, NULL_WORD);
  1.2790 +		//  __ jcc(Assembler::equal, L);
  1.2791 +		__ beq(V0, R0,L ); 
  1.2792 +		__ delayed()->nop(); 
  1.2793 +		//  __ movl(eax, Address(eax));
  1.2794 +		__ ld(V0, V0, 0);	
  1.2795 +		__ bind(L);
  1.2796 +		// __ verify_oop(eax);
  1.2797 +		__ verify_oop(V0);
  1.2798 +	}
  1.2799 +
  1.2800 +  if (!is_critical_native) {
  1.2801 +	// reset handle block
  1.2802 +	__ ld(AT, thread, in_bytes(JavaThread::active_handles_offset()));
  1.2803 +	__ sw(R0, AT, JNIHandleBlock::top_offset_in_bytes()); 
  1.2804 +  }
  1.2805 +
  1.2806 +  if (!is_critical_native) {
  1.2807 +	// Any exception pending?
  1.2808 +	__ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); 
  1.2809 +
  1.2810 +	__ bne(AT, R0, exception_pending);
  1.2811 +	__ delayed()->nop();
  1.2812 +  }
  1.2813 +	// no exception, we're almost done
  1.2814 +
  1.2815 +	// check that only result value is on FPU stack
  1.2816 +	__ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit");
  1.2817 +
  1.2818 +  // Fixup floating pointer results so that result looks like a return from a compiled method
  1.2819 +/*  if (ret_type == T_FLOAT) {
  1.2820 +    if (UseSSE >= 1) {
  1.2821 +      // Pop st0 and store as float and reload into xmm register
  1.2822 +      __ fstp_s(Address(ebp, -4));
  1.2823 +      __ movss(xmm0, Address(ebp, -4));
  1.2824 +    }
  1.2825 +  } else if (ret_type == T_DOUBLE) {
  1.2826 +    if (UseSSE >= 2) {
  1.2827 +      // Pop st0 and store as double and reload into xmm register
  1.2828 +      __ fstp_d(Address(ebp, -8));
  1.2829 +      __ movsd(xmm0, Address(ebp, -8));
  1.2830 +    }
  1.2831 +  }
  1.2832 +*/
  1.2833 +  // Return
  1.2834 +#ifndef OPT_THREAD
  1.2835 +       __ get_thread(TREG);
  1.2836 +#endif
  1.2837 +	__ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
  1.2838 +	__ leave();
  1.2839 +
  1.2840 +	__ jr(RA);
  1.2841 +	__ delayed()->nop(); 
  1.2842 +	// Unexpected paths are out of line and go here
  1.2843 +/*
  1.2844 +  if (!is_critical_native) {
  1.2845 +    // forward the exception
  1.2846 +    __ bind(exception_pending);
  1.2847 +
  1.2848 +    // and forward the exception
  1.2849 +    __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
  1.2850 +  }
  1.2851 +*/
  1.2852 +	// Slow path locking & unlocking
  1.2853 +	if (method->is_synchronized()) {
  1.2854 +
  1.2855 +		// BEGIN Slow path lock
  1.2856 +
  1.2857 +		__ bind(slow_path_lock);
  1.2858 +
  1.2859 +                // protect the args we've loaded
  1.2860 +                save_args(masm, total_c_args, c_arg, out_regs);
  1.2861 +
  1.2862 +		// has last_Java_frame setup. No exceptions so do vanilla call not call_VM
  1.2863 +		// args are (oop obj, BasicLock* lock, JavaThread* thread)
  1.2864 +		
  1.2865 +		__ move(A0, obj_reg); 
  1.2866 +		__ move(A1, lock_reg); 
  1.2867 +		__ move(A2, thread); 
  1.2868 +		__ addi(SP, SP, - 3*wordSize); 
  1.2869 +
  1.2870 +                __ move(AT, -(StackAlignmentInBytes));
  1.2871 +                __ move(S2, SP);     // use S2 as a sender SP holder
  1.2872 +                __ andr(SP, SP, AT); // align stack as required by ABI
  1.2873 +
  1.2874 +		__ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type);
  1.2875 +		__ delayed()->nop();
  1.2876 +                __ move(SP, S2);
  1.2877 +		__ addi(SP, SP, 3*wordSize); 
  1.2878 +
  1.2879 +                restore_args(masm, total_c_args, c_arg, out_regs);
  1.2880 +
  1.2881 +#ifdef ASSERT
  1.2882 +		{ Label L;
  1.2883 +			// __ cmpl(Address(thread, in_bytes(Thread::pending_exception_offset())), (int)NULL_WORD);
  1.2884 +			__ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); 
  1.2885 +			//__ jcc(Assembler::equal, L);
  1.2886 +			__ beq(AT, R0, L); 
  1.2887 +			__ delayed()->nop(); 
  1.2888 +			__ stop("no pending exception allowed on exit from monitorenter");
  1.2889 +			__ bind(L);
  1.2890 +		}
  1.2891 +#endif
  1.2892 +		__ b(lock_done);
  1.2893 +		__ delayed()->nop();
  1.2894 +		// END Slow path lock
  1.2895 +
  1.2896 +		// BEGIN Slow path unlock
  1.2897 +		__ bind(slow_path_unlock);
  1.2898 +
  1.2899 +		// Slow path unlock
  1.2900 +
  1.2901 +		if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
  1.2902 +			save_native_result(masm, ret_type, stack_slots);
  1.2903 +		}
  1.2904 +		// Save pending exception around call to VM (which contains an EXCEPTION_MARK)
  1.2905 +
  1.2906 +		__ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); 
  1.2907 +		__ push(AT); 
  1.2908 +		__ sd(R0, thread, in_bytes(Thread::pending_exception_offset()));
  1.2909 +
  1.2910 +                __ move(AT, -(StackAlignmentInBytes));
  1.2911 +                __ move(S2, SP);     // use S2 as a sender SP holder
  1.2912 +                __ andr(SP, SP, AT); // align stack as required by ABI
  1.2913 +
  1.2914 +		// should be a peal
  1.2915 +		// +wordSize because of the push above
  1.2916 +		__ addi(A1, FP, lock_slot_ebp_offset);
  1.2917 +
  1.2918 +		__ move(A0, obj_reg); 
  1.2919 +		__ addi(SP,SP, -2*wordSize);
  1.2920 +		__ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C),
  1.2921 +				relocInfo::runtime_call_type);
  1.2922 +		__ delayed()->nop(); 
  1.2923 +		__ addi(SP,SP, 2*wordSize);
  1.2924 +                __ move(SP, S2);
  1.2925 +		//add for compressedoops
  1.2926 +		__ reinit_heapbase();
  1.2927 +#ifdef ASSERT
  1.2928 +		{
  1.2929 +			Label L;
  1.2930 +			//    __ cmpl(Address(thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD);
  1.2931 +			__ lw( AT, thread, in_bytes(Thread::pending_exception_offset())); 
  1.2932 +			//__ jcc(Assembler::equal, L);
  1.2933 +			__ beq(AT, R0, L); 
  1.2934 +			__ delayed()->nop(); 
  1.2935 +			__ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
  1.2936 +			__ bind(L);
  1.2937 +		}
  1.2938 +#endif /* ASSERT */
  1.2939 +
  1.2940 +		__ pop(AT); 
  1.2941 +		__ sd(AT, thread, in_bytes(Thread::pending_exception_offset()));
  1.2942 +		if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
  1.2943 +			restore_native_result(masm, ret_type, stack_slots);
  1.2944 +		}
  1.2945 +		__ b(unlock_done);
  1.2946 +		__ delayed()->nop(); 
  1.2947 +		// END Slow path unlock
  1.2948 +
  1.2949 +	}
  1.2950 +
  1.2951 +	// SLOW PATH Reguard the stack if needed
  1.2952 +
  1.2953 +	__ bind(reguard);
  1.2954 +	save_native_result(masm, ret_type, stack_slots);
  1.2955 +	__ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), 
  1.2956 +			relocInfo::runtime_call_type);
  1.2957 +	__ delayed()->nop();	
  1.2958 +	//add for compressedoops
  1.2959 +	__ reinit_heapbase();
  1.2960 +	restore_native_result(masm, ret_type, stack_slots);
  1.2961 +	__ b(reguard_done);
  1.2962 +	__ delayed()->nop();
  1.2963 +
  1.2964 +	// BEGIN EXCEPTION PROCESSING
  1.2965 +    if (!is_critical_native) {
  1.2966 +	// Forward  the exception
  1.2967 +	__ bind(exception_pending);
  1.2968 +
  1.2969 +	// remove possible return value from FPU register stack
  1.2970 +	__ empty_FPU_stack();
  1.2971 +
  1.2972 +	// pop our frame
  1.2973 + //forward_exception_entry need return address on stack
  1.2974 +        __ addiu(SP, FP, wordSize);
  1.2975 +	__ ld(FP, SP, (-1) * wordSize);
  1.2976 +
  1.2977 +	// and forward the exception
  1.2978 +	__ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
  1.2979 +	__ delayed()->nop();
  1.2980 +    }
  1.2981 +	__ flush();
  1.2982 +
  1.2983 +	nmethod *nm = nmethod::new_native_nmethod(method,
  1.2984 +                        compile_id,
  1.2985 +			masm->code(),
  1.2986 +			vep_offset,
  1.2987 +			frame_complete,
  1.2988 +			stack_slots / VMRegImpl::slots_per_word,
  1.2989 +			(is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
  1.2990 +			in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
  1.2991 +			oop_maps);
  1.2992 +
  1.2993 +  if (is_critical_native) {
  1.2994 +    nm->set_lazy_critical_native(true);
  1.2995 +  }
  1.2996 +	return nm;
  1.2997 +
  1.2998 +
  1.2999 +}
  1.3000 +
  1.3001 +#ifdef HAVE_DTRACE_H
  1.3002 +// ---------------------------------------------------------------------------
  1.3003 +// Generate a dtrace nmethod for a given signature.  The method takes arguments
  1.3004 +// in the Java compiled code convention, marshals them to the native
  1.3005 +// abi and then leaves nops at the position you would expect to call a native
  1.3006 +// function. When the probe is enabled the nops are replaced with a trap
  1.3007 +// instruction that dtrace inserts and the trace will cause a notification
  1.3008 +// to dtrace.
  1.3009 +//
  1.3010 +// The probes are only able to take primitive types and java/lang/String as
  1.3011 +// arguments.  No other java types are allowed. Strings are converted to utf8
  1.3012 +// strings so that from dtrace point of view java strings are converted to C
  1.3013 +// strings. There is an arbitrary fixed limit on the total space that a method
  1.3014 +// can use for converting the strings. (256 chars per string in the signature).
  1.3015 +// So any java string larger then this is truncated.
  1.3016 +
  1.3017 +static int  fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 };
  1.3018 +static bool offsets_initialized = false;
  1.3019 +
  1.3020 +static VMRegPair reg64_to_VMRegPair(Register r) {
  1.3021 +  VMRegPair ret;
  1.3022 +  if (wordSize == 8) {
  1.3023 +    ret.set2(r->as_VMReg());
  1.3024 +  } else {
  1.3025 +    ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg());
  1.3026 +  }
  1.3027 +  return ret;
  1.3028 +}
  1.3029 +
  1.3030 +
  1.3031 +nmethod *SharedRuntime::generate_dtrace_nmethod(
  1.3032 +    MacroAssembler *masm, methodHandle method) {
  1.3033 +
  1.3034 +
  1.3035 +  // generate_dtrace_nmethod is guarded by a mutex so we are sure to
  1.3036 +  // be single threaded in this method.
  1.3037 +  assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be");
  1.3038 +
  1.3039 +  // Fill in the signature array, for the calling-convention call.
  1.3040 +  int total_args_passed = method->size_of_parameters();
  1.3041 +
  1.3042 +  BasicType* in_sig_bt  = NEW_RESOURCE_ARRAY(BasicType, total_args_passed);
  1.3043 +  VMRegPair  *in_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed);
  1.3044 +
  1.3045 +  // The signature we are going to use for the trap that dtrace will see
  1.3046 +  // java/lang/String is converted. We drop "this" and any other object
  1.3047 +  // is converted to NULL.  (A one-slot java/lang/Long object reference
  1.3048 +  // is converted to a two-slot long, which is why we double the allocation).
  1.3049 +  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2);
  1.3050 +  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2);
  1.3051 +
  1.3052 +  int i=0;
  1.3053 +  int total_strings = 0;
  1.3054 +  int first_arg_to_pass = 0;
  1.3055 +  int total_c_args = 0;
  1.3056 +
  1.3057 +  // Skip the receiver as dtrace doesn't want to see it
  1.3058 +  if( !method->is_static() ) {
  1.3059 +    in_sig_bt[i++] = T_OBJECT;
  1.3060 +    first_arg_to_pass = 1;
  1.3061 +  }
  1.3062 +
  1.3063 +  SignatureStream ss(method->signature());
  1.3064 +  for ( ; !ss.at_return_type(); ss.next()) {
  1.3065 +    BasicType bt = ss.type();
  1.3066 +    in_sig_bt[i++] = bt;  // Collect remaining bits of signature
  1.3067 +    out_sig_bt[total_c_args++] = bt;
  1.3068 +    if( bt == T_OBJECT) {
  1.3069 +      symbolOop s = ss.as_symbol_or_null();
  1.3070 +      if (s == vmSymbols::java_lang_String()) {
  1.3071 +        total_strings++;
  1.3072 +        out_sig_bt[total_c_args-1] = T_ADDRESS;
  1.3073 +      } else if (s == vmSymbols::java_lang_Boolean() ||
  1.3074 +                 s == vmSymbols::java_lang_Byte()) {
  1.3075 +        out_sig_bt[total_c_args-1] = T_BYTE;
  1.3076 +      } else if (s == vmSymbols::java_lang_Character() ||
  1.3077 +                 s == vmSymbols::java_lang_Short()) {
  1.3078 +        out_sig_bt[total_c_args-1] = T_SHORT;
  1.3079 +      } else if (s == vmSymbols::java_lang_Integer() ||
  1.3080 +                 s == vmSymbols::java_lang_Float()) {
  1.3081 +        out_sig_bt[total_c_args-1] = T_INT;
  1.3082 +      } else if (s == vmSymbols::java_lang_Long() ||
  1.3083 +                 s == vmSymbols::java_lang_Double()) {
  1.3084 +        out_sig_bt[total_c_args-1] = T_LONG;
  1.3085 +        out_sig_bt[total_c_args++] = T_VOID;
  1.3086 +      }
  1.3087 +    } else if ( bt == T_LONG || bt == T_DOUBLE ) {
  1.3088 +      in_sig_bt[i++] = T_VOID;   // Longs & doubles take 2 Java slots
  1.3089 +      // We convert double to long
  1.3090 +      out_sig_bt[total_c_args-1] = T_LONG;
  1.3091 +      out_sig_bt[total_c_args++] = T_VOID;
  1.3092 +    } else if ( bt == T_FLOAT) {
  1.3093 +      // We convert float to int
  1.3094 +      out_sig_bt[total_c_args-1] = T_INT;
  1.3095 +    }
  1.3096 +  }
  1.3097 +
  1.3098 +  assert(i==total_args_passed, "validly parsed signature");
  1.3099 +
  1.3100 +  // Now get the compiled-Java layout as input arguments
  1.3101 +  int comp_args_on_stack;
  1.3102 +  comp_args_on_stack = SharedRuntime::java_calling_convention(
  1.3103 +      in_sig_bt, in_regs, total_args_passed, false);
  1.3104 +
  1.3105 +  // We have received a description of where all the java arg are located
  1.3106 +  // on entry to the wrapper. We need to convert these args to where
  1.3107 +  // the a  native (non-jni) function would expect them. To figure out
  1.3108 +  // where they go we convert the java signature to a C signature and remove
  1.3109 +  // T_VOID for any long/double we might have received.
  1.3110 +
  1.3111 +
  1.3112 +  // Now figure out where the args must be stored and how much stack space
  1.3113 +  // they require (neglecting out_preserve_stack_slots but space for storing
  1.3114 +  // the 1st six register arguments). It's weird see int_stk_helper.
  1.3115 +  //
  1.3116 +  int out_arg_slots;
  1.3117 +  out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
  1.3118 +
  1.3119 +  // Calculate the total number of stack slots we will need.
  1.3120 +
  1.3121 +  // First count the abi requirement plus all of the outgoing args
  1.3122 +  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
  1.3123 +
  1.3124 +  // Plus a temp for possible converion of float/double/long register args
  1.3125 +
  1.3126 +  int conversion_temp = stack_slots;
  1.3127 +  stack_slots += 2;
  1.3128 +
  1.3129 +
  1.3130 +  // Now space for the string(s) we must convert
  1.3131 +
  1.3132 +  int string_locs = stack_slots;
  1.3133 +  stack_slots += total_strings *
  1.3134 +                   (max_dtrace_string_size / VMRegImpl::stack_slot_size);
  1.3135 +
  1.3136 +  // Ok The space we have allocated will look like:
  1.3137 +  //
  1.3138 +  //
  1.3139 +  // FP-> |                     |
  1.3140 +  //      |---------------------|
  1.3141 +  //      | string[n]           |
  1.3142 +  //      |---------------------| <- string_locs[n]
  1.3143 +  //      | string[n-1]         |
  1.3144 +  //      |---------------------| <- string_locs[n-1]
  1.3145 +  //      | ...                 |
  1.3146 +  //      | ...                 |
  1.3147 +  //      |---------------------| <- string_locs[1]
  1.3148 +  //      | string[0]           |
  1.3149 +  //      |---------------------| <- string_locs[0]
  1.3150 +  //      | temp                |
  1.3151 +  //      |---------------------| <- conversion_temp
  1.3152 +  //      | outbound memory     |
  1.3153 +  //      | based arguments     |
  1.3154 +  //      |                     |
  1.3155 +  //      |---------------------|
  1.3156 +  //      |                     |
  1.3157 +  // SP-> | out_preserved_slots |
  1.3158 +  //
  1.3159 +  //
  1.3160 +
  1.3161 +  // Now compute actual number of stack words we need rounding to make
  1.3162 +  // stack properly aligned.
  1.3163 +  stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word);
  1.3164 +
  1.3165 +  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
  1.3166 +
  1.3167 +  intptr_t start = (intptr_t)__ pc();
  1.3168 +
  1.3169 +  // First thing make an ic check to see if we should even be here
  1.3170 +
  1.3171 +  {
  1.3172 +    Label L;
  1.3173 +    const Register temp_reg = G3_scratch;
  1.3174 +    Address ic_miss(temp_reg, SharedRuntime::get_ic_miss_stub());
  1.3175 +    __ verify_oop(O0);
  1.3176 +    __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg);
  1.3177 +    __ cmp(temp_reg, G5_inline_cache_reg);
  1.3178 +    __ brx(Assembler::equal, true, Assembler::pt, L);
  1.3179 +    __ delayed()->nop();
  1.3180 +
  1.3181 +    __ jump_to(ic_miss, 0);
  1.3182 +    __ delayed()->nop();
  1.3183 +    __ align(CodeEntryAlignment);
  1.3184 +    __ bind(L);
  1.3185 +  }
  1.3186 +
  1.3187 +  int vep_offset = ((intptr_t)__ pc()) - start;
  1.3188 +
  1.3189 +
  1.3190 +  // The instruction at the verified entry point must be 5 bytes or longer
  1.3191 +  // because it can be patched on the fly by make_non_entrant. The stack bang
  1.3192 +  // instruction fits that requirement.
  1.3193 +
  1.3194 +  // Generate stack overflow check before creating frame
  1.3195 +  __ generate_stack_overflow_check(stack_size);
  1.3196 +
  1.3197 +  assert(((intptr_t)__ pc() - start - vep_offset) >= 5,
  1.3198 +         "valid size for make_non_entrant");
  1.3199 +
  1.3200 +  // Generate a new frame for the wrapper.
  1.3201 +  __ save(SP, -stack_size, SP);
  1.3202 +
  1.3203 +  // Frame is now completed as far a size and linkage.
  1.3204 +
  1.3205 +  int frame_complete = ((intptr_t)__ pc()) - start;
  1.3206 +
  1.3207 +#ifdef ASSERT
  1.3208 +  bool reg_destroyed[RegisterImpl::number_of_registers];
  1.3209 +  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
  1.3210 +  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
  1.3211 +    reg_destroyed[r] = false;
  1.3212 +  }
  1.3213 +  for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
  1.3214 +    freg_destroyed[f] = false;
  1.3215 +  }
  1.3216 +
  1.3217 +#endif /* ASSERT */
  1.3218 +
  1.3219 +  VMRegPair zero;
  1.3220 +  const Register g0 = G0; // without this we get a compiler warning (why??)
  1.3221 +  zero.set2(g0->as_VMReg());
  1.3222 +
  1.3223 +  int c_arg, j_arg;
  1.3224 +
  1.3225 +  Register conversion_off = noreg;
  1.3226 +
  1.3227 +  for (j_arg = first_arg_to_pass, c_arg = 0 ;
  1.3228 +       j_arg < total_args_passed ; j_arg++, c_arg++ ) {
  1.3229 +
  1.3230 +    VMRegPair src = in_regs[j_arg];
  1.3231 +    VMRegPair dst = out_regs[c_arg];
  1.3232 +
  1.3233 +#ifdef ASSERT
  1.3234 +    if (src.first()->is_Register()) {
  1.3235 +      assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!");
  1.3236 +    } else if (src.first()->is_FloatRegister()) {
  1.3237 +      assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding(
  1.3238 +                                               FloatRegisterImpl::S)], "ack!");
  1.3239 +    }
  1.3240 +    if (dst.first()->is_Register()) {
  1.3241 +      reg_destroyed[dst.first()->as_Register()->encoding()] = true;
  1.3242 +    } else if (dst.first()->is_FloatRegister()) {
  1.3243 +      freg_destroyed[dst.first()->as_FloatRegister()->encoding(
  1.3244 +                                                 FloatRegisterImpl::S)] = true;
  1.3245 +    }
  1.3246 +#endif /* ASSERT */
  1.3247 +
  1.3248 +    switch (in_sig_bt[j_arg]) {
  1.3249 +      case T_ARRAY:
  1.3250 +      case T_OBJECT:
  1.3251 +        {
  1.3252 +          if (out_sig_bt[c_arg] == T_BYTE  || out_sig_bt[c_arg] == T_SHORT ||
  1.3253 +              out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) {
  1.3254 +            // need to unbox a one-slot value
  1.3255 +            Register in_reg = L0;
  1.3256 +            Register tmp = L2;
  1.3257 +            if ( src.first()->is_reg() ) {
  1.3258 +              in_reg = src.first()->as_Register();
  1.3259 +            } else {
  1.3260 +              assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS),
  1.3261 +                     "must be");
  1.3262 +              __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg);
  1.3263 +            }
  1.3264 +            // If the final destination is an acceptable register
  1.3265 +            if ( dst.first()->is_reg() ) {
  1.3266 +              if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) {
  1.3267 +                tmp = dst.first()->as_Register();
  1.3268 +              }
  1.3269 +            }
  1.3270 +
  1.3271 +            Label skipUnbox;
  1.3272 +            if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) {
  1.3273 +              __ mov(G0, tmp->successor());
  1.3274 +            }
  1.3275 +            __ br_null(in_reg, true, Assembler::pn, skipUnbox);
  1.3276 +            __ delayed()->mov(G0, tmp);
  1.3277 +
  1.3278 +            BasicType bt = out_sig_bt[c_arg];
  1.3279 +            int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt);
  1.3280 +            switch (bt) {
  1.3281 +                case T_BYTE:
  1.3282 +                  __ ldub(in_reg, box_offset, tmp); break;
  1.3283 +                case T_SHORT:
  1.3284 +                  __ lduh(in_reg, box_offset, tmp); break;
  1.3285 +                case T_INT:
  1.3286 +                  __ ld(in_reg, box_offset, tmp); break;
  1.3287 +                case T_LONG:
  1.3288 +                  __ ld_long(in_reg, box_offset, tmp); break;
  1.3289 +                default: ShouldNotReachHere();
  1.3290 +            }
  1.3291 +
  1.3292 +            __ bind(skipUnbox);
  1.3293 +            // If tmp wasn't final destination copy to final destination
  1.3294 +            if (tmp == L2) {
  1.3295 +              VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2);
  1.3296 +              if (out_sig_bt[c_arg] == T_LONG) {
  1.3297 +                long_move(masm, tmp_as_VM, dst);
  1.3298 +              } else {
  1.3299 +                move32_64(masm, tmp_as_VM, out_regs[c_arg]);
  1.3300 +              }
  1.3301 +            }
  1.3302 +            if (out_sig_bt[c_arg] == T_LONG) {
  1.3303 +              assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
  1.3304 +              ++c_arg; // move over the T_VOID to keep the loop indices in sync
  1.3305 +            }
  1.3306 +          } else if (out_sig_bt[c_arg] == T_ADDRESS) {
  1.3307 +            Register s =
  1.3308 +                src.first()->is_reg() ? src.first()->as_Register() : L2;
  1.3309 +            Register d =
  1.3310 +                dst.first()->is_reg() ? dst.first()->as_Register() : L2;
  1.3311 +
  1.3312 +            // We store the oop now so that the conversion pass can reach
  1.3313 +            // while in the inner frame. This will be the only store if
  1.3314 +            // the oop is NULL.
  1.3315 +            if (s != L2) {
  1.3316 +              // src is register
  1.3317 +              if (d != L2) {
  1.3318 +                // dst is register
  1.3319 +                __ mov(s, d);
  1.3320 +              } else {
  1.3321 +                assert(Assembler::is_simm13(reg2offset(dst.first()) +
  1.3322 +                          STACK_BIAS), "must be");
  1.3323 +                __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS);
  1.3324 +              }
  1.3325 +            } else {
  1.3326 +                // src not a register
  1.3327 +                assert(Assembler::is_simm13(reg2offset(src.first()) +
  1.3328 +                           STACK_BIAS), "must be");
  1.3329 +                __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d);
  1.3330 +                if (d == L2) {
  1.3331 +                  assert(Assembler::is_simm13(reg2offset(dst.first()) +
  1.3332 +                             STACK_BIAS), "must be");
  1.3333 +                  __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS);
  1.3334 +                }
  1.3335 +            }
  1.3336 +          } else if (out_sig_bt[c_arg] != T_VOID) {
  1.3337 +            // Convert the arg to NULL
  1.3338 +            if (dst.first()->is_reg()) {
  1.3339 +              __ mov(G0, dst.first()->as_Register());
  1.3340 +            } else {
  1.3341 +              assert(Assembler::is_simm13(reg2offset(dst.first()) +
  1.3342 +                         STACK_BIAS), "must be");
  1.3343 +              __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS);
  1.3344 +            }
  1.3345 +          }
  1.3346 +        }
  1.3347 +        break;
  1.3348 +      case T_VOID:
  1.3349 +        break;
  1.3350 +
  1.3351 +      case T_FLOAT:
  1.3352 +        if (src.first()->is_stack()) {
  1.3353 +          // Stack to stack/reg is simple
  1.3354 +          move32_64(masm, src, dst);
  1.3355 +        } else {
  1.3356 +          if (dst.first()->is_reg()) {
  1.3357 +            // freg -> reg
  1.3358 +            int off =
  1.3359 +              STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
  1.3360 +            Register d = dst.first()->as_Register();
  1.3361 +            if (Assembler::is_simm13(off)) {
  1.3362 +              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
  1.3363 +                     SP, off);
  1.3364 +              __ ld(SP, off, d);
  1.3365 +            } else {
  1.3366 +              if (conversion_off == noreg) {
  1.3367 +                __ set(off, L6);
  1.3368 +                conversion_off = L6;
  1.3369 +              }
  1.3370 +              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
  1.3371 +                     SP, conversion_off);
  1.3372 +              __ ld(SP, conversion_off , d);
  1.3373 +            }
  1.3374 +          } else {
  1.3375 +            // freg -> mem
  1.3376 +            int off = STACK_BIAS + reg2offset(dst.first());
  1.3377 +            if (Assembler::is_simm13(off)) {
  1.3378 +              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
  1.3379 +                     SP, off);
  1.3380 +            } else {
  1.3381 +              if (conversion_off == noreg) {
  1.3382 +                __ set(off, L6);
  1.3383 +                conversion_off = L6;
  1.3384 +              }
  1.3385 +              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
  1.3386 +                     SP, conversion_off);
  1.3387 +            }
  1.3388 +          }
  1.3389 +        }
  1.3390 +        break;
  1.3391 +
  1.3392 +      case T_DOUBLE:
  1.3393 +        assert( j_arg + 1 < total_args_passed &&
  1.3394 +                in_sig_bt[j_arg + 1] == T_VOID &&
  1.3395 +                out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
  1.3396 +        if (src.first()->is_stack()) {
  1.3397 +          // Stack to stack/reg is simple
  1.3398 +          long_move(masm, src, dst);
  1.3399 +        } else {
  1.3400 +          Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2;
  1.3401 +
  1.3402 +          // Destination could be an odd reg on 32bit in which case
  1.3403 +          // we can't load direct to the destination.
  1.3404 +
  1.3405 +          if (!d->is_even() && wordSize == 4) {
  1.3406 +            d = L2;
  1.3407 +          }
  1.3408 +          int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
  1.3409 +          if (Assembler::is_simm13(off)) {
  1.3410 +            __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
  1.3411 +                   SP, off);
  1.3412 +            __ ld_long(SP, off, d);
  1.3413 +          } else {
  1.3414 +            if (conversion_off == noreg) {
  1.3415 +              __ set(off, L6);
  1.3416 +              conversion_off = L6;
  1.3417 +            }
  1.3418 +            __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
  1.3419 +                   SP, conversion_off);
  1.3420 +            __ ld_long(SP, conversion_off, d);
  1.3421 +          }
  1.3422 +          if (d == L2) {
  1.3423 +            long_move(masm, reg64_to_VMRegPair(L2), dst);
  1.3424 +          }
  1.3425 +        }
  1.3426 +        break;
  1.3427 +
  1.3428 +      case T_LONG :
  1.3429 +        // 32bit can't do a split move of something like g1 -> O0, O1
  1.3430 +        // so use a memory temp
  1.3431 +        if (src.is_single_phys_reg() && wordSize == 4) {
  1.3432 +          Register tmp = L2;
  1.3433 +          if (dst.first()->is_reg() &&
  1.3434 +              (wordSize == 8 || dst.first()->as_Register()->is_even())) {
  1.3435 +            tmp = dst.first()->as_Register();
  1.3436 +          }
  1.3437 +
  1.3438 +          int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
  1.3439 +          if (Assembler::is_simm13(off)) {
  1.3440 +            __ stx(src.first()->as_Register(), SP, off);
  1.3441 +            __ ld_long(SP, off, tmp);
  1.3442 +          } else {
  1.3443 +            if (conversion_off == noreg) {
  1.3444 +              __ set(off, L6);
  1.3445 +              conversion_off = L6;
  1.3446 +            }
  1.3447 +            __ stx(src.first()->as_Register(), SP, conversion_off);
  1.3448 +            __ ld_long(SP, conversion_off, tmp);
  1.3449 +          }
  1.3450 +
  1.3451 +          if (tmp == L2) {
  1.3452 +            long_move(masm, reg64_to_VMRegPair(L2), dst);
  1.3453 +          }
  1.3454 +        } else {
  1.3455 +          long_move(masm, src, dst);
  1.3456 +        }
  1.3457 +        break;
  1.3458 +
  1.3459 +      case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
  1.3460 +
  1.3461 +      default:
  1.3462 +        move32_64(masm, src, dst);
  1.3463 +    }
  1.3464 +  }
  1.3465 +
  1.3466 +
  1.3467 +  // If we have any strings we must store any register based arg to the stack
  1.3468 +  // This includes any still live xmm registers too.
  1.3469 +
  1.3470 +  if (total_strings > 0 ) {
  1.3471 +
  1.3472 +    // protect all the arg registers
  1.3473 +    __ save_frame(0);
  1.3474 +    __ mov(G2_thread, L7_thread_cache);
  1.3475 +    const Register L2_string_off = L2;
  1.3476 +
  1.3477 +    // Get first string offset
  1.3478 +    __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off);
  1.3479 +
  1.3480 +    for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) {
  1.3481 +      if (out_sig_bt[c_arg] == T_ADDRESS) {
  1.3482 +
  1.3483 +        VMRegPair dst = out_regs[c_arg];
  1.3484 +        const Register d = dst.first()->is_reg() ?
  1.3485 +            dst.first()->as_Register()->after_save() : noreg;
  1.3486 +
  1.3487 +        // It's a string the oop and it was already copied to the out arg
  1.3488 +        // position
  1.3489 +        if (d != noreg) {
  1.3490 +          __ mov(d, O0);
  1.3491 +        } else {
  1.3492 +          assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
  1.3493 +                 "must be");
  1.3494 +          __ ld_ptr(FP,  reg2offset(dst.first()) + STACK_BIAS, O0);
  1.3495 +        }
  1.3496 +        Label skip;
  1.3497 +
  1.3498 +        __ br_null(O0, false, Assembler::pn, skip);
  1.3499 +        __ delayed()->add(FP, L2_string_off, O1);
  1.3500 +
  1.3501 +        if (d != noreg) {
  1.3502 +          __ mov(O1, d);
  1.3503 +        } else {
  1.3504 +          assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
  1.3505 +                 "must be");
  1.3506 +          __ st_ptr(O1, FP,  reg2offset(dst.first()) + STACK_BIAS);
  1.3507 +        }
  1.3508 +
  1.3509 +        __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf),
  1.3510 +                relocInfo::runtime_call_type);
  1.3511 +        __ delayed()->add(L2_string_off, max_dtrace_string_size, L2_string_off);
  1.3512 +
  1.3513 +        __ bind(skip);
  1.3514 +
  1.3515 +      }
  1.3516 +
  1.3517 +    }
  1.3518 +    __ mov(L7_thread_cache, G2_thread);
  1.3519 +    __ restore();
  1.3520 +
  1.3521 +  }
  1.3522 +
  1.3523 +
  1.3524 +  // Ok now we are done. Need to place the nop that dtrace wants in order to
  1.3525 +  // patch in the trap
  1.3526 +
  1.3527 +  int patch_offset = ((intptr_t)__ pc()) - start;
  1.3528 +
  1.3529 +  __ nop();
  1.3530 +
  1.3531 +
  1.3532 +  // Return
  1.3533 +
  1.3534 +  __ ret();
  1.3535 +  __ delayed()->restore();
  1.3536 +
  1.3537 +  __ flush();
  1.3538 +
  1.3539 +  nmethod *nm = nmethod::new_dtrace_nmethod(
  1.3540 +      method, masm->code(), vep_offset, patch_offset, frame_complete,
  1.3541 +      stack_slots / VMRegImpl::slots_per_word);
  1.3542 +  return nm;
  1.3543 +
  1.3544 +}
  1.3545 +
  1.3546 +#endif // HAVE_DTRACE_H
  1.3547 +
  1.3548 +// this function returns the adjust size (in number of words) to a c2i adapter
  1.3549 +// activation for use during deoptimization
  1.3550 +int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
  1.3551 +	return (callee_locals - callee_parameters) * Interpreter::stackElementWords;
  1.3552 +}
  1.3553 +
  1.3554 +// "Top of Stack" slots that may be unused by the calling convention but must
  1.3555 +// otherwise be preserved.
  1.3556 +// On Intel these are not necessary and the value can be zero.
  1.3557 +// On Sparc this describes the words reserved for storing a register window
  1.3558 +// when an interrupt occurs.
  1.3559 +uint SharedRuntime::out_preserve_stack_slots() {
  1.3560 +  //return frame::register_save_words * VMRegImpl::slots_per_word;
  1.3561 +	 return 0;
  1.3562 +}
  1.3563 +/*
  1.3564 +static void gen_new_frame(MacroAssembler* masm, bool deopt) {
  1.3565 +//
  1.3566 +// Common out the new frame generation for deopt and uncommon trap
  1.3567 +//
  1.3568 +  Register        G3pcs              = G3_scratch; // Array of new pcs (input)
  1.3569 +  Register        Oreturn0           = O0;
  1.3570 +  Register        Oreturn1           = O1;
  1.3571 +  Register        O2UnrollBlock      = O2;
  1.3572 +  Register        O3array            = O3;         // Array of frame sizes (input)
  1.3573 +  Register        O4array_size       = O4;         // number of frames (input)
  1.3574 +  Register        O7frame_size       = O7;         // number of frames (input)
  1.3575 +
  1.3576 +  __ ld_ptr(O3array, 0, O7frame_size);
  1.3577 +  __ sub(G0, O7frame_size, O7frame_size);
  1.3578 +  __ save(SP, O7frame_size, SP);
  1.3579 +  __ ld_ptr(G3pcs, 0, I7);                      // load frame's new pc
  1.3580 +
  1.3581 +  #ifdef ASSERT
  1.3582 +  // make sure that the frames are aligned properly
  1.3583 +#ifndef _LP64
  1.3584 +  __ btst(wordSize*2-1, SP);
  1.3585 +  __ breakpoint_trap(Assembler::notZero);
  1.3586 +#endif
  1.3587 +  #endif
  1.3588 +
  1.3589 +  // Deopt needs to pass some extra live values from frame to frame
  1.3590 +
  1.3591 +  if (deopt) {
  1.3592 +    __ mov(Oreturn0->after_save(), Oreturn0);
  1.3593 +    __ mov(Oreturn1->after_save(), Oreturn1);
  1.3594 +  }
  1.3595 +
  1.3596 +  __ mov(O4array_size->after_save(), O4array_size);
  1.3597 +  __ sub(O4array_size, 1, O4array_size);
  1.3598 +  __ mov(O3array->after_save(), O3array);
  1.3599 +  __ mov(O2UnrollBlock->after_save(), O2UnrollBlock);
  1.3600 +  __ add(G3pcs, wordSize, G3pcs);               // point to next pc value
  1.3601 +
  1.3602 +  #ifdef ASSERT
  1.3603 +  // trash registers to show a clear pattern in backtraces
  1.3604 +  __ set(0xDEAD0000, I0);
  1.3605 +  __ add(I0,  2, I1);
  1.3606 +  __ add(I0,  4, I2);
  1.3607 +  __ add(I0,  6, I3);
  1.3608 +  __ add(I0,  8, I4);
  1.3609 +  // Don't touch I5 could have valuable savedSP
  1.3610 +  __ set(0xDEADBEEF, L0);
  1.3611 +  __ mov(L0, L1);
  1.3612 +  __ mov(L0, L2);
  1.3613 +  __ mov(L0, L3);
  1.3614 +  __ mov(L0, L4);
  1.3615 +  __ mov(L0, L5);
  1.3616 +
  1.3617 +  // trash the return value as there is nothing to return yet
  1.3618 +  __ set(0xDEAD0001, O7);
  1.3619 +  #endif
  1.3620 +
  1.3621 +  __ mov(SP, O5_savedSP);
  1.3622 +}
  1.3623 +
  1.3624 +
  1.3625 +static void make_new_frames(MacroAssembler* masm, bool deopt) {
  1.3626 +  //
  1.3627 +  // loop through the UnrollBlock info and create new frames
  1.3628 +  //
  1.3629 +  Register        G3pcs              = G3_scratch;
  1.3630 +  Register        Oreturn0           = O0;
  1.3631 +  Register        Oreturn1           = O1;
  1.3632 +  Register        O2UnrollBlock      = O2;
  1.3633 +  Register        O3array            = O3;
  1.3634 +  Register        O4array_size       = O4;
  1.3635 +  Label           loop;
  1.3636 +
  1.3637 +  // Before we make new frames, check to see if stack is available.
  1.3638 +  // Do this after the caller's return address is on top of stack
  1.3639 +  if (UseStackBanging) {
  1.3640 +    // Get total frame size for interpreted frames
  1.3641 +    __ ld(Address(O2UnrollBlock, 0,
  1.3642 +         Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()), O4);
  1.3643 +    __ bang_stack_size(O4, O3, G3_scratch);
  1.3644 +  }
  1.3645 +
  1.3646 +  __ ld(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()), O4array_size);
  1.3647 +  __ ld_ptr(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()), G3pcs);
  1.3648 +
  1.3649 +  __ ld_ptr(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()), O3array);
  1.3650 +
  1.3651 +  // Adjust old interpreter frame to make space for new frame's extra java locals
  1.3652 +  //
  1.3653 +  // We capture the original sp for the transition frame only because it is needed in
  1.3654 +  // order to properly calculate interpreter_sp_adjustment. Even though in real life
  1.3655 +  // every interpreter frame captures a savedSP it is only needed at the transition
  1.3656 +  // (fortunately). If we had to have it correct everywhere then we would need to
  1.3657 +  // be told the sp_adjustment for each frame we create. If the frame size array
  1.3658 +  // were to have twice the frame count entries then we could have pairs [sp_adjustment, frame_size]
  1.3659 +  // for each frame we create and keep up the illusion every where.
  1.3660 +  //
  1.3661 +
  1.3662 +  __ ld(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()), O7);
  1.3663 +  __ mov(SP, O5_savedSP);       // remember initial sender's original sp before adjustment
  1.3664 +  __ sub(SP, O7, SP);
  1.3665 +
  1.3666 +#ifdef ASSERT
  1.3667 +  // make sure that there is at least one entry in the array
  1.3668 +  __ tst(O4array_size);
  1.3669 +  __ breakpoint_trap(Assembler::zero);
  1.3670 +#endif
  1.3671 +
  1.3672 +  // Now push the new interpreter frames
  1.3673 +  __ bind(loop);
  1.3674 +
  1.3675 +  // allocate a new frame, filling the registers
  1.3676 +
  1.3677 +  gen_new_frame(masm, deopt);        // allocate an interpreter frame
  1.3678 +
  1.3679 +  __ tst(O4array_size);
  1.3680 +  __ br(Assembler::notZero, false, Assembler::pn, loop);
  1.3681 +  __ delayed()->add(O3array, wordSize, O3array);
  1.3682 +  __ ld_ptr(G3pcs, 0, O7);                      // load final frame new pc
  1.3683 +
  1.3684 +}
  1.3685 +*/
  1.3686 +
  1.3687 +//------------------------------generate_deopt_blob----------------------------
  1.3688 +// Ought to generate an ideal graph & compile, but here's some SPARC ASM
  1.3689 +// instead.
  1.3690 +void SharedRuntime::generate_deopt_blob() {
  1.3691 +  // allocate space for the code
  1.3692 +  ResourceMark rm;
  1.3693 +  // setup code generation tools
  1.3694 +  //CodeBuffer     buffer ("deopt_blob", 4000, 2048);
  1.3695 +  CodeBuffer     buffer ("deopt_blob", 8000, 2048);//aoqi FIXME for debug
  1.3696 +  MacroAssembler* masm  = new MacroAssembler( & buffer);
  1.3697 +  int frame_size_in_words;
  1.3698 +  OopMap* map = NULL;
  1.3699 +  // Account for the extra args we place on the stack
  1.3700 +  // by the time we call fetch_unroll_info
  1.3701 +  const int additional_words = 2; // deopt kind, thread
  1.3702 +
  1.3703 +  OopMapSet *oop_maps = new OopMapSet();
  1.3704 +
  1.3705 +  address start = __ pc();
  1.3706 +  Label cont;
  1.3707 +  // we use S3 for DeOpt reason register
  1.3708 +  Register reason = S3;
  1.3709 +  // use S6 for thread register
  1.3710 +  Register thread = TREG;
  1.3711 +  // use S7 for fetch_unroll_info returned UnrollBlock
  1.3712 +  Register unroll = S7;
  1.3713 +  // Prolog for non exception case!
  1.3714 +  // Correct the return address we were given.
  1.3715 +  //FIXME, return address is on the tos or Ra? 
  1.3716 +  __ addi(RA, RA, - (NativeCall::return_address_offset));
  1.3717 +  // Save everything in sight.
  1.3718 +  map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
  1.3719 +  // Normal deoptimization
  1.3720 +  __ move(reason, Deoptimization::Unpack_deopt);
  1.3721 +  __ b(cont);
  1.3722 +  __ delayed()->nop();
  1.3723 +
  1.3724 +  int reexecute_offset = __ pc() - start;
  1.3725 +
  1.3726 +   // Reexecute case
  1.3727 +   // return address is the pc describes what bci to do re-execute at
  1.3728 +
  1.3729 +   // No need to update map as each call to save_live_registers will produce identical oopmap
  1.3730 +  //__ addi(RA, RA, - (NativeCall::return_address_offset));
  1.3731 +  (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
  1.3732 +  __ move(reason, Deoptimization::Unpack_reexecute); 
  1.3733 +  __ b(cont);
  1.3734 +  __ delayed()->nop();
  1.3735 +
  1.3736 +  int   exception_offset = __ pc() - start;
  1.3737 +  // Prolog for exception case
  1.3738 +
  1.3739 +  // all registers are dead at this entry point, except for eax and
  1.3740 +  // edx which contain the exception oop and exception pc
  1.3741 +  // respectively.  Set them in TLS and fall thru to the
  1.3742 +  // unpack_with_exception_in_tls entry point.
  1.3743 +  
  1.3744 +  __ get_thread(thread);
  1.3745 +  __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); 
  1.3746 +  __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
  1.3747 +  int exception_in_tls_offset = __ pc() - start;
  1.3748 +  // new implementation because exception oop is now passed in JavaThread
  1.3749 +
  1.3750 +  // Prolog for exception case
  1.3751 +  // All registers must be preserved because they might be used by LinearScan
  1.3752 +  // Exceptiop oop and throwing PC are passed in JavaThread
  1.3753 +  // tos: stack at point of call to method that threw the exception (i.e. only
  1.3754 +  // args are on the stack, no return address)
  1.3755 +
  1.3756 +  // Return address will be patched later with the throwing pc. The correct value is not 
  1.3757 +  // available now because loading it from memory would destroy registers.
  1.3758 +   // Save everything in sight.
  1.3759 +  // No need to update map as each call to save_live_registers will produce identical oopmap
  1.3760 +  __ addi(RA, RA, - (NativeCall::return_address_offset));
  1.3761 +  (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
  1.3762 +
  1.3763 +  // Now it is safe to overwrite any register
  1.3764 +  // store the correct deoptimization type
  1.3765 +  __ move(reason, Deoptimization::Unpack_exception);
  1.3766 +  // load throwing pc from JavaThread and patch it as the return address 
  1.3767 +  // of the current frame. Then clear the field in JavaThread
  1.3768 +  __ get_thread(thread);
  1.3769 +  __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
  1.3770 +  __ st_ptr(V1, SP, RegisterSaver::raOffset() * wordSize); //save ra
  1.3771 +  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
  1.3772 +
  1.3773 +
  1.3774 +#ifdef ASSERT
  1.3775 +  // verify that there is really an exception oop in JavaThread
  1.3776 +  __ ld_ptr(AT, thread, in_bytes(JavaThread::exception_oop_offset()));
  1.3777 +  __ verify_oop(AT);
  1.3778 +  // verify that there is no pending exception
  1.3779 +  Label no_pending_exception;
  1.3780 +  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
  1.3781 +  __ beq(AT, R0, no_pending_exception); 
  1.3782 +  __ delayed()->nop(); 
  1.3783 +  __ stop("must not have pending exception here");
  1.3784 +  __ bind(no_pending_exception);
  1.3785 +#endif
  1.3786 +  __ bind(cont);
  1.3787 +  // Compiled code leaves the floating point stack dirty, empty it.
  1.3788 +  __ empty_FPU_stack();
  1.3789 +
  1.3790 +
  1.3791 +  // Call C code.  Need thread and this frame, but NOT official VM entry
  1.3792 +  // crud.  We cannot block on this call, no GC can happen.  
  1.3793 +#ifndef OPT_THREAD
  1.3794 +  __ get_thread(thread);
  1.3795 +#endif
  1.3796 +
  1.3797 +/*
  1.3798 + *
  1.3799 +   0x000000555bd82aec: dadd a0, s6, zero                ; __ move(A0, thread);
  1.3800 +   0x000000555bd82af0: daddi sp, sp, 0xfffffff0         ; __ addi(SP, SP, -additional_words  * wordSize);
  1.3801 +   0x000000555bd82af4: sd sp, 0x1c8(s6)                 ; __ set_last_Java_frame(thread, NOREG, NOREG, NULL);
  1.3802 +   0x000000555bd82af8: lui at, 0x0                      ; __ li64(AT, save_pc);
  1.3803 +   0x000000555bd82afc: ori at, at, 0x55
  1.3804 +   0x000000555bd82b00: dsll at, at, 16
  1.3805 +   0x000000555bd82b04: ori at, at, 0x5bd8
  1.3806 +   0x000000555bd82b08: dsll at, at, 16
  1.3807 +   0x000000555bd82b0c: ori at, at, 0x2b34       ; save_pc = pc() +  NativeMovConstReg::instruction_size + NativeCall::return_address_offset + 4
  1.3808 +   0x000000555bd82b10: sd at, 0x1d0(s6)
  1.3809 +   0x000000555bd82b14: lui t9, 0x0
  1.3810 +   0x000000555bd82b18: ori t9, t9, 0x55
  1.3811 +   0x000000555bd82b1c: dsll t9, t9, 16
  1.3812 +   0x000000555bd82b20: ori t9, t9, 0x5aa6
  1.3813 +   0x000000555bd82b24: dsll t9, t9, 16
  1.3814 +   0x000000555bd82b28: ori t9, t9, 0x4074
  1.3815 +   0x000000555bd82b2c: jalr t9
  1.3816 +   0x000000555bd82b30: sll zero, zero, 0
  1.3817 +
  1.3818 +   0x000000555bd82b34: daddiu sp, sp, 0x10	; save_pc
  1.3819 + */
  1.3820 +  __ move(A0, thread);
  1.3821 +  __ addi(SP, SP, -additional_words  * wordSize);
  1.3822 +
  1.3823 +  __ set_last_Java_frame(NOREG, NOREG, NULL);
  1.3824 +
  1.3825 +  // Call fetch_unroll_info().  Need thread and this frame, but NOT official VM entry - cannot block on
  1.3826 +  // this call, no GC can happen.  Call should capture return values.
  1.3827 +
  1.3828 +  __ relocate(relocInfo::internal_pc_type); 
  1.3829 +  {	
  1.3830 +    intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + NativeCall::return_address_offset + 4;
  1.3831 +    __ li48(AT, save_pc);
  1.3832 +  }
  1.3833 +  __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
  1.3834 +
  1.3835 +  __ call((address)Deoptimization::fetch_unroll_info);
  1.3836 +  //__ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type);
  1.3837 +  __ delayed()->nop();
  1.3838 +  oop_maps->add_gc_map(__ pc() - start, map);
  1.3839 +  __ addiu(SP, SP, additional_words * wordSize);
  1.3840 +  __ get_thread(thread);
  1.3841 +  __ reset_last_Java_frame(false, true);
  1.3842 +
  1.3843 +  // Load UnrollBlock into S7
  1.3844 +  __ move(unroll, V0);
  1.3845 +
  1.3846 +
  1.3847 +  // Move the unpack kind to a safe place in the UnrollBlock because
  1.3848 +  // we are very short of registers
  1.3849 +
  1.3850 +  Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes());
  1.3851 +  //__ pop(reason);	
  1.3852 +  __ sw(reason, unpack_kind);
  1.3853 +  // save the unpack_kind value
  1.3854 +  // Retrieve the possible live values (return values)
  1.3855 +  // All callee save registers representing jvm state
  1.3856 +  // are now in the vframeArray.
  1.3857 +
  1.3858 +  Label noException;
  1.3859 +  __ move(AT, Deoptimization::Unpack_exception);
  1.3860 +  __ bne(AT, reason, noException);// Was exception pending?
  1.3861 +  __ delayed()->nop();
  1.3862 +  __ ld_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
  1.3863 +  __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
  1.3864 +  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
  1.3865 +  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset()));
  1.3866 + 
  1.3867 +  __ verify_oop(V0);
  1.3868 +
  1.3869 +  // Overwrite the result registers with the exception results.
  1.3870 +  __ st_ptr(V0, SP, RegisterSaver::v0Offset()*wordSize); 
  1.3871 +  __ st_ptr(V1, SP, RegisterSaver::v1Offset()*wordSize);
  1.3872 +  
  1.3873 +  __ bind(noException);
  1.3874 +
  1.3875 +
  1.3876 +  // Stack is back to only having register save data on the stack.
  1.3877 +  // Now restore the result registers. Everything else is either dead or captured
  1.3878 +  // in the vframeArray.
  1.3879 +
  1.3880 +  RegisterSaver::restore_result_registers(masm);
  1.3881 +  // All of the register save area has been popped of the stack. Only the
  1.3882 +  // return address remains.
  1.3883 +  // Pop all the frames we must move/replace. 
  1.3884 +  // Frame picture (youngest to oldest)
  1.3885 +  // 1: self-frame (no frame link)
  1.3886 +  // 2: deopting frame  (no frame link)
  1.3887 +  // 3: caller of deopting frame (could be compiled/interpreted). 
  1.3888 +  //
  1.3889 +  // Note: by leaving the return address of self-frame on the stack
  1.3890 +  // and using the size of frame 2 to adjust the stack
  1.3891 +  // when we are done the return to frame 3 will still be on the stack.
  1.3892 +
  1.3893 +  // register for the sender's sp
  1.3894 +  Register sender_sp = Rsender;
  1.3895 +  // register for frame pcs
  1.3896 +  Register pcs = T0;
  1.3897 +  // register for frame sizes
  1.3898 +  Register sizes = T1;
  1.3899 +  // register for frame count
  1.3900 +  Register count = T3;
  1.3901 +	
  1.3902 +  // Pop deoptimized frame
  1.3903 +  __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
  1.3904 +  __ add(SP, SP, AT);
  1.3905 +  // sp should be pointing at the return address to the caller (3)
  1.3906 + 
  1.3907 +  // Load array of frame pcs into pcs
  1.3908 +  __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
  1.3909 +  __ addi(SP, SP, wordSize);  // trash the old pc
  1.3910 +  // Load array of frame sizes into T6
  1.3911 +  __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
  1.3912 +
  1.3913 + 
  1.3914 +
  1.3915 +  // Load count of frams into T3
  1.3916 +  __ lw(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
  1.3917 +  // Pick up the initial fp we should save
  1.3918 +  __ ld(FP, unroll,  Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
  1.3919 +   // Now adjust the caller's stack to make up for the extra locals
  1.3920 +  // but record the original sp so that we can save it in the skeletal interpreter
  1.3921 +  // frame and the stack walking of interpreter_sender will get the unextended sp
  1.3922 +  // value and not the "real" sp value.
  1.3923 +  __ move(sender_sp, SP);
  1.3924 +  __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
  1.3925 +  __ sub(SP, SP, AT);
  1.3926 +
  1.3927 +  // Push interpreter frames in a loop
  1.3928 +/*
  1.3929 + *
  1.3930 +Loop:
  1.3931 +   0x000000555bd82d18: lw t2, 0x0(t1)           ; lw sizes[i]	<--- error lw->ld
  1.3932 +   0x000000555bd82d1c: ld at, 0x0(t0)           ; ld pcs[i]
  1.3933 +   0x000000555bd82d20: daddi t2, t2, 0xfffffff0 ; t2 -= 16 
  1.3934 +   0x000000555bd82d24: daddi sp, sp, 0xfffffff0
  1.3935 +   0x000000555bd82d28: sd fp, 0x0(sp)           ; push fp
  1.3936 +   0x000000555bd82d2c: sd at, 0x8(sp)           ; push at
  1.3937 +   0x000000555bd82d30: dadd fp, sp, zero        ; fp <- sp 
  1.3938 +   0x000000555bd82d34: dsub sp, sp, t2          ; sp -= t2 
  1.3939 +   0x000000555bd82d38: sd zero, 0xfffffff0(fp)  ; __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
  1.3940 +   0x000000555bd82d3c: sd s4, 0xfffffff8(fp)    ; __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
  1.3941 +   0x000000555bd82d40: dadd s4, sp, zero        ; move(sender_sp, SP);
  1.3942 +   0x000000555bd82d44: daddi t3, t3, 0xffffffff ; count --
  1.3943 +   0x000000555bd82d48: daddi t1, t1, 0x4        ; sizes += 4
  1.3944 +   0x000000555bd82d4c: bne t3, zero, 0x000000555bd82d18
  1.3945 +   0x000000555bd82d50: daddi t0, t0, 0x4        ; <--- error    t0 += 8
  1.3946 + */
  1.3947 +
  1.3948 +// pcs[0] = frame_pcs[0] = deopt_sender.raw_pc(); regex.split
  1.3949 +  Label loop;
  1.3950 +  __ bind(loop);
  1.3951 +  __ ld(T2, sizes, 0);		// Load frame size
  1.3952 +  __ ld_ptr(AT, pcs, 0);  	       // save return address
  1.3953 +  __ addi(T2, T2, -2*wordSize);           // we'll push pc and rbp, by hand
  1.3954 +  __ push2(AT, FP);			
  1.3955 +  __ move(FP, SP);
  1.3956 +  __ sub(SP, SP, T2); 			// Prolog!
  1.3957 +  // This value is corrected by layout_activation_impl
  1.3958 +  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); 
  1.3959 +  __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
  1.3960 +  __ move(sender_sp, SP);	// pass to next frame
  1.3961 +  __ addi(count, count, -1); 	// decrement counter
  1.3962 +  __ addi(sizes, sizes, wordSize); 	// Bump array pointer (sizes)
  1.3963 +  __ bne(count, R0, loop);
  1.3964 +  __ delayed()->addi(pcs, pcs, wordSize); 	// Bump array pointer (pcs)
  1.3965 +  __ ld(AT, pcs, 0);			// frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0);
  1.3966 +  // Re-push self-frame
  1.3967 +  __ push2(AT, FP);			
  1.3968 +  __ move(FP, SP);
  1.3969 +  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); 
  1.3970 +  __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize); 
  1.3971 +  __ addi(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize);
  1.3972 +
  1.3973 +  // Restore frame locals after moving the frame
  1.3974 +  __ sd(V0, SP, RegisterSaver::v0Offset() * wordSize);
  1.3975 +  __ sd(V1, SP, RegisterSaver::v1Offset() * wordSize);
  1.3976 +  __ sdc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local
  1.3977 +  __ sdc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize);
  1.3978 +
  1.3979 +  
  1.3980 +  // Call unpack_frames().  Need thread and this frame, but NOT official VM entry - cannot block on
  1.3981 +  // this call, no GC can happen.
  1.3982 +  __ move(A1, reason);	// exec_mode
  1.3983 +  __ get_thread(thread);
  1.3984 +  __ move(A0, thread);	// thread
  1.3985 +  __ addi(SP, SP, (-additional_words) *wordSize);
  1.3986 +
  1.3987 +  // set last_Java_sp, last_Java_fp
  1.3988 +  __ set_last_Java_frame(NOREG, FP, NULL);
  1.3989 +
  1.3990 +  __ move(AT, -(StackAlignmentInBytes));
  1.3991 +  __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
  1.3992 +
  1.3993 +  __ relocate(relocInfo::internal_pc_type); 
  1.3994 +  {	
  1.3995 +    intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + NativeCall::return_address_offset + 4;
  1.3996 +    __ li48(AT, save_pc);
  1.3997 +  }
  1.3998 +  __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
  1.3999 +	
  1.4000 +  //__ call(Deoptimization::unpack_frames);
  1.4001 +  __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type);
  1.4002 +  __ delayed()->nop();
  1.4003 +  // Revert SP alignment after call since we're going to do some SP relative addressing below
  1.4004 +  __ ld(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
  1.4005 +  // Set an oopmap for the call site
  1.4006 +  oop_maps->add_gc_map(__ offset(), new OopMap( frame_size_in_words , 0));
  1.4007 +
  1.4008 +  __ push(V0);
  1.4009 +	
  1.4010 +  __ get_thread(thread);
  1.4011 +  __ reset_last_Java_frame(false, false);
  1.4012 +
  1.4013 +  // Collect return values
  1.4014 +  __ ld(V0, SP, (RegisterSaver::v0Offset() + additional_words +1) * wordSize);
  1.4015 +  __ ld(V1, SP, (RegisterSaver::v1Offset() + additional_words +1) * wordSize);
  1.4016 +  __ ldc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local
  1.4017 +  __ ldc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize);
  1.4018 +  //FIXME, 
  1.4019 +  // Clear floating point stack before returning to interpreter
  1.4020 +  __ empty_FPU_stack();
  1.4021 +  //FIXME, we should consider about float and double
  1.4022 +  // Push a float or double return value if necessary.
  1.4023 +  __ leave();
  1.4024 +
  1.4025 +  // Jump to interpreter
  1.4026 +  __ jr(RA);
  1.4027 +  __ delayed()->nop();
  1.4028 +
  1.4029 +  masm->flush();
  1.4030 +  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
  1.4031 +  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
  1.4032 +}
  1.4033 +
  1.4034 +#ifdef COMPILER2
  1.4035 +
  1.4036 +//------------------------------generate_uncommon_trap_blob--------------------
  1.4037 +// Ought to generate an ideal graph & compile, but here's some SPARC ASM
  1.4038 +// instead.
  1.4039 +void SharedRuntime::generate_uncommon_trap_blob() {
  1.4040 +  // allocate space for the code
  1.4041 +  ResourceMark rm;
  1.4042 +  // setup code generation tools
  1.4043 +  CodeBuffer  buffer ("uncommon_trap_blob", 512*80 , 512*40 ); 
  1.4044 +  MacroAssembler* masm = new MacroAssembler(&buffer);   
  1.4045 +
  1.4046 +  enum frame_layout {
  1.4047 +	s0_off, s0_off2,
  1.4048 +	s1_off, s1_off2,
  1.4049 +	s2_off, s2_off2,
  1.4050 +	s3_off, s3_off2,
  1.4051 +	s4_off, s4_off2,
  1.4052 +	s5_off, s5_off2,
  1.4053 +	s6_off, s6_off2,
  1.4054 +	s7_off, s7_off2,
  1.4055 +	fp_off, fp_off2,
  1.4056 +	return_off, return_off2,    // slot for return address    sp + 9
  1.4057 +    framesize
  1.4058 +  };
  1.4059 +  assert(framesize % 4 == 0, "sp not 16-byte aligned");
  1.4060 +
  1.4061 +  address start = __ pc();
  1.4062 +
  1.4063 +  // Push self-frame.
  1.4064 +  __ daddiu(SP, SP, -framesize * BytesPerInt);
  1.4065 +
  1.4066 +  __ sd(RA, SP, return_off * BytesPerInt);
  1.4067 +  __ sd(FP, SP, fp_off * BytesPerInt);
  1.4068 +
  1.4069 +  // Save callee saved registers.  None for UseSSE=0, 
  1.4070 +  // floats-only for UseSSE=1, and doubles for UseSSE=2.
  1.4071 +  __ sd(S0, SP, s0_off * BytesPerInt);
  1.4072 +  __ sd(S1, SP, s1_off * BytesPerInt);
  1.4073 +  __ sd(S2, SP, s2_off * BytesPerInt);
  1.4074 +  __ sd(S3, SP, s3_off * BytesPerInt);
  1.4075 +  __ sd(S4, SP, s4_off * BytesPerInt);
  1.4076 +  __ sd(S5, SP, s5_off * BytesPerInt);
  1.4077 +  __ sd(S6, SP, s6_off * BytesPerInt);
  1.4078 +  __ sd(S7, SP, s7_off * BytesPerInt);
  1.4079 +
  1.4080 +  __ daddi(FP, SP, fp_off * BytesPerInt);
  1.4081 +
  1.4082 +  // Clear the floating point exception stack
  1.4083 +  __ empty_FPU_stack();
  1.4084 +
  1.4085 +  Register thread = TREG;
  1.4086 +
  1.4087 +#ifndef OPT_THREAD
  1.4088 +  __ get_thread(thread);
  1.4089 +#endif
  1.4090 +  // set last_Java_sp
  1.4091 +  __ set_last_Java_frame(NOREG, FP, NULL);
  1.4092 +  __ relocate(relocInfo::internal_pc_type); 
  1.4093 +  assert(NativeCall::return_address_offset == 24, "in sharedRuntime return_address_offset");
  1.4094 +  {	
  1.4095 +    long save_pc = (long)__ pc() +  28 + NativeCall::return_address_offset;
  1.4096 +    __ li48(AT, (long)save_pc);
  1.4097 +    __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
  1.4098 +  }
  1.4099 +  // Call C code.  Need thread but NOT official VM entry
  1.4100 +  // crud.  We cannot block on this call, no GC can happen.  Call should
  1.4101 +  // capture callee-saved registers as well as return values.
  1.4102 +  __ move(A0, thread);
  1.4103 +  // argument already in T0
  1.4104 +  __ move(A1, T0);
  1.4105 +  __ li48(T9, (long)Deoptimization::uncommon_trap);
  1.4106 +  __ jalr(T9);
  1.4107 +  __ delayed()->nop();
  1.4108 +
  1.4109 +  // Set an oopmap for the call site
  1.4110 +  OopMapSet *oop_maps = new OopMapSet();
  1.4111 +  OopMap* map =  new OopMap( framesize, 0 );
  1.4112 +
  1.4113 +  map->set_callee_saved( VMRegImpl::stack2reg(s0_off    ),  S0->as_VMReg() ); 
  1.4114 +  map->set_callee_saved( VMRegImpl::stack2reg(s1_off    ),  S1->as_VMReg() );
  1.4115 +  map->set_callee_saved( VMRegImpl::stack2reg(s2_off    ),  S2->as_VMReg() );
  1.4116 +  map->set_callee_saved( VMRegImpl::stack2reg(s3_off    ),  S3->as_VMReg() );
  1.4117 +  map->set_callee_saved( VMRegImpl::stack2reg(s4_off    ),  S4->as_VMReg() );
  1.4118 +  map->set_callee_saved( VMRegImpl::stack2reg(s5_off    ),  S5->as_VMReg() );
  1.4119 +  map->set_callee_saved( VMRegImpl::stack2reg(s6_off    ),  S6->as_VMReg() );
  1.4120 +  map->set_callee_saved( VMRegImpl::stack2reg(s7_off    ),  S7->as_VMReg() );
  1.4121 +
  1.4122 +  //oop_maps->add_gc_map( __ offset(), true, map);
  1.4123 +  oop_maps->add_gc_map( __ offset(),  map); 
  1.4124 +
  1.4125 +#ifndef OPT_THREAD
  1.4126 +  __ get_thread(thread);
  1.4127 +#endif
  1.4128 +  __ reset_last_Java_frame(false,false);
  1.4129 +
  1.4130 +  // Load UnrollBlock into S7
  1.4131 +  Register unroll = S7;
  1.4132 +  __ move(unroll, V0);
  1.4133 +
  1.4134 +  // Pop all the frames we must move/replace. 
  1.4135 +  // 
  1.4136 +  // Frame picture (youngest to oldest)
  1.4137 +  // 1: self-frame (no frame link)
  1.4138 +  // 2: deopting frame  (no frame link)
  1.4139 +  // 3: possible-i2c-adapter-frame 
  1.4140 +  // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an
  1.4141 +  //    and c2i here)
  1.4142 +
  1.4143 +  // Pop self-frame.  We have no frame, and must rely only on EAX and ESP.
  1.4144 +  __ daddiu(SP, SP, framesize * BytesPerInt);
  1.4145 +
  1.4146 +  // Pop deoptimized frame
  1.4147 +  __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
  1.4148 +  __ dadd(SP, SP, AT);
  1.4149 +
  1.4150 +  // register for frame pcs
  1.4151 +  Register pcs = T8;
  1.4152 +  // register for frame sizes
  1.4153 +  Register sizes = T9;
  1.4154 +  // register for frame count
  1.4155 +  Register count = T3;
  1.4156 +  // register for the sender's sp
  1.4157 +  Register sender_sp = T1;
  1.4158 +
  1.4159 +  // sp should be pointing at the return address to the caller (4)
  1.4160 +  // Load array of frame pcs into ECX
  1.4161 +  __ ld(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
  1.4162 +
  1.4163 +/* 2012/9/7 Not needed in MIPS
  1.4164 +  __ addiu(SP, SP, wordSize);
  1.4165 +*/
  1.4166 +
  1.4167 +  // Load array of frame sizes into ESI
  1.4168 +  __ ld(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
  1.4169 +  __ lwu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
  1.4170 +
  1.4171 +  // Pick up the initial fp we should save
  1.4172 +  __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
  1.4173 +  // Now adjust the caller's stack to make up for the extra locals
  1.4174 +  // but record the original sp so that we can save it in the skeletal interpreter
  1.4175 +  // frame and the stack walking of interpreter_sender will get the unextended sp
  1.4176 +  // value and not the "real" sp value.
  1.4177 +
  1.4178 +  __ move(sender_sp, SP);
  1.4179 +  __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
  1.4180 +  __ dsub(SP, SP, AT);
  1.4181 +  // Push interpreter frames in a loop
  1.4182 +  Label loop;
  1.4183 +  __ bind(loop);
  1.4184 +  __ ld(T2, sizes, 0);          // Load frame size
  1.4185 +  __ ld(AT, pcs, 0);           // save return address
  1.4186 +  __ daddi(T2, T2, -2*wordSize);           // we'll push pc and rbp, by hand
  1.4187 +  __ push2(AT, FP);
  1.4188 +  __ move(FP, SP);
  1.4189 +  __ dsub(SP, SP, T2);                   // Prolog!
  1.4190 +  // This value is corrected by layout_activation_impl
  1.4191 +  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
  1.4192 +  __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
  1.4193 +  __ move(sender_sp, SP);       // pass to next frame
  1.4194 +  __ daddi(count, count, -1);    // decrement counter
  1.4195 +  __ daddi(sizes, sizes, wordSize);     // Bump array pointer (sizes)
  1.4196 +  __ addi(pcs, pcs, wordSize);      // Bump array pointer (pcs)
  1.4197 +  __ bne(count, R0, loop);
  1.4198 +  __ delayed()->nop();      // Bump array pointer (pcs)
  1.4199 +
  1.4200 +  __ ld(RA, pcs, 0);
  1.4201 +
  1.4202 +  // Re-push self-frame
  1.4203 +  __ daddi(SP, SP, - 2 * wordSize);      // save old & set new FP
  1.4204 +  __ sd(FP, SP, 0 * wordSize);          // save final return address
  1.4205 +  __ sd(RA, SP, 1 * wordSize);
  1.4206 +  __ move(FP, SP); 
  1.4207 +  __ daddi(SP, SP, -(framesize / 2 - 2) * wordSize);
  1.4208 +
  1.4209 +  // set last_Java_sp, last_Java_fp
  1.4210 +  __ set_last_Java_frame(NOREG, FP, NULL);
  1.4211 +
  1.4212 +  __ move(AT, -(StackAlignmentInBytes));
  1.4213 +  __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
  1.4214 +
  1.4215 +  __ relocate(relocInfo::internal_pc_type); 
  1.4216 +  {	
  1.4217 +    long save_pc = (long)__ pc() +  28 + NativeCall::return_address_offset;
  1.4218 +    __ li48(AT, (long)save_pc);
  1.4219 +  }
  1.4220 +  __ sd(AT, thread,in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
  1.4221 +
  1.4222 +  // Call C code.  Need thread but NOT official VM entry
  1.4223 +  // crud.  We cannot block on this call, no GC can happen.  Call should
  1.4224 +  // restore return values to their stack-slots with the new SP.
  1.4225 +  __ move(A0, thread);
  1.4226 +  __ move(A1, Deoptimization::Unpack_uncommon_trap);
  1.4227 +  __ li48(T9, (long)Deoptimization::unpack_frames);
  1.4228 +  __ jalr(T9);
  1.4229 +  __ delayed()->nop();
  1.4230 +  // Set an oopmap for the call site
  1.4231 +  //oop_maps->add_gc_map( __ offset(), true, new OopMap( framesize, 0 ) ); 
  1.4232 +  oop_maps->add_gc_map( __ offset(),  new OopMap( framesize, 0 ) );//Fu
  1.4233 +
  1.4234 +  __ reset_last_Java_frame(true,true);
  1.4235 +
  1.4236 +  // Pop self-frame.
  1.4237 +  __ leave();     // Epilog!
  1.4238 +
  1.4239 +  // Jump to interpreter
  1.4240 +  __ jr(RA);
  1.4241 +  __ delayed()->nop();
  1.4242 +  // -------------
  1.4243 +  // make sure all code is generated
  1.4244 +  masm->flush();
  1.4245 +
  1.4246 +  _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize / 2);
  1.4247 +}
  1.4248 +
  1.4249 +#endif // COMPILER2
  1.4250 +
  1.4251 +//------------------------------generate_handler_blob-------------------
  1.4252 +//
  1.4253 +// Generate a special Compile2Runtime blob that saves all registers, and sets
  1.4254 +// up an OopMap and calls safepoint code to stop the compiled code for
  1.4255 +// a safepoint.
  1.4256 +//
  1.4257 +// This blob is jumped to (via a breakpoint and the signal handler) from a
  1.4258 +// safepoint in compiled code. 
  1.4259 + 
  1.4260 +SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int pool_type) {
  1.4261 +
  1.4262 +  // Account for thread arg in our frame
  1.4263 +  const int additional_words = 0; 
  1.4264 +  int frame_size_in_words;
  1.4265 +
  1.4266 +  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");  
  1.4267 +
  1.4268 +  ResourceMark rm;
  1.4269 +  OopMapSet *oop_maps = new OopMapSet();
  1.4270 +  OopMap* map;
  1.4271 +
  1.4272 +  // allocate space for the code
  1.4273 +  // setup code generation tools  
  1.4274 +  CodeBuffer  buffer ("handler_blob", 2048, 512);
  1.4275 +  MacroAssembler* masm = new MacroAssembler( &buffer);
  1.4276 +  
  1.4277 +  const Register thread = TREG; 
  1.4278 +  address start   = __ pc();  
  1.4279 +  address call_pc = NULL;  
  1.4280 +  bool cause_return = (pool_type == POLL_AT_RETURN);
  1.4281 +  bool save_vectors = (pool_type == POLL_AT_VECTOR_LOOP);
  1.4282 +
  1.4283 +  // If cause_return is true we are at a poll_return and there is
  1.4284 +  // the return address in RA to the caller on the nmethod
  1.4285 +  // that is safepoint. We can leave this return in RA and
  1.4286 +  // effectively complete the return and safepoint in the caller.
  1.4287 +  // Otherwise we load exception pc to RA.
  1.4288 +  __ push(thread);
  1.4289 +#ifndef OPT_THREAD
  1.4290 +  __ get_thread(thread);
  1.4291 +#endif
  1.4292 +
  1.4293 +  if(!cause_return) {
  1.4294 +    __ ld_ptr(RA, Address(thread, JavaThread::saved_exception_pc_offset()));
  1.4295 +  }
  1.4296 +  
  1.4297 +  __ pop(thread);
  1.4298 +  map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, save_vectors);
  1.4299 +
  1.4300 +#ifndef OPT_THREAD
  1.4301 +  __ get_thread(thread);
  1.4302 +#endif
  1.4303 +  // The following is basically a call_VM. However, we need the precise
  1.4304 +  // address of the call in order to generate an oopmap. Hence, we do all the
  1.4305 +  // work outselvs.
  1.4306 +
  1.4307 +  __ move(A0, thread);
  1.4308 +  __ set_last_Java_frame(NOREG, NOREG, NULL);
  1.4309 +
  1.4310 +  //__ relocate(relocInfo::internal_pc_type); 
  1.4311 +  if (!cause_return)
  1.4312 +  {	
  1.4313 +/*
  1.4314 +    intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + NativeCall::return_address_offset + 4;
  1.4315 +    __ li48(AT, save_pc);
  1.4316 +    __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
  1.4317 +*/
  1.4318 +  }
  1.4319 +
  1.4320 +
  1.4321 +  // do the call
  1.4322 +  //__ lui(T9, Assembler::split_high((int)call_ptr));
  1.4323 +  //__ addiu(T9, T9, Assembler::split_low((int)call_ptr));
  1.4324 +  __ call(call_ptr);
  1.4325 +  __ delayed()->nop();
  1.4326 +
  1.4327 +  // Set an oopmap for the call site.  This oopmap will map all
  1.4328 +  // oop-registers and debug-info registers as callee-saved.  This
  1.4329 +  // will allow deoptimization at this safepoint to find all possible
  1.4330 +  // debug-info recordings, as well as let GC find all oops.
  1.4331 +  oop_maps->add_gc_map(__ offset(),  map);
  1.4332 +
  1.4333 +  Label noException;
  1.4334 +
  1.4335 +  // Clear last_Java_sp again
  1.4336 +  __ reset_last_Java_frame(false, false);
  1.4337 +
  1.4338 +  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
  1.4339 +  __ beq(AT, R0, noException);
  1.4340 +  __ delayed()->nop();
  1.4341 +
  1.4342 +  // Exception pending
  1.4343 +
  1.4344 +  RegisterSaver::restore_live_registers(masm, save_vectors);
  1.4345 +  //forward_exception_entry need return address on the stack
  1.4346 +  __ push(RA);
  1.4347 +  //__ lui(T9, Assembler::split_high((int)StubRoutines::forward_exception_entry()));
  1.4348 +  //__ addiu(T9, T9, Assembler::split_low((int)StubRoutines::forward_exception_entry()));
  1.4349 +  __ li(T9, StubRoutines::forward_exception_entry());
  1.4350 +  __ jr(T9);
  1.4351 +  __ delayed()->nop();
  1.4352 +
  1.4353 +  // No exception case
  1.4354 +  __ bind(noException);
  1.4355 +  // Normal exit, register restoring and exit  
  1.4356 +  RegisterSaver::restore_live_registers(masm, save_vectors);
  1.4357 +  __ jr(RA);
  1.4358 +  __ delayed()->nop();
  1.4359 +  
  1.4360 +  masm->flush();  
  1.4361 +
  1.4362 +  // Fill-out other meta info
  1.4363 +  return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);      
  1.4364 +}
  1.4365 +
  1.4366 +//
  1.4367 +// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
  1.4368 +//
  1.4369 +// Generate a stub that calls into vm to find out the proper destination
  1.4370 +// of a java call. All the argument registers are live at this point
  1.4371 +// but since this is generic code we don't know what they are and the caller
  1.4372 +// must do any gc of the args.
  1.4373 +//
  1.4374 +RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
  1.4375 +  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
  1.4376 +
  1.4377 +  // allocate space for the code
  1.4378 +  ResourceMark rm;
  1.4379 +
  1.4380 +  //CodeBuffer buffer(name, 1000, 512);
  1.4381 +  //FIXME. aoqi. code_size
  1.4382 +  CodeBuffer buffer(name, 20000, 2048);
  1.4383 +  MacroAssembler* masm  = new MacroAssembler(&buffer);
  1.4384 +
  1.4385 +  int frame_size_words;
  1.4386 +  //we put the thread in A0 
  1.4387 +
  1.4388 +  OopMapSet *oop_maps = new OopMapSet();
  1.4389 +  OopMap* map = NULL;
  1.4390 +
  1.4391 +  int start = __ offset();
  1.4392 +  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
  1.4393 +
  1.4394 +
  1.4395 +  int frame_complete = __ offset();
  1.4396 +
  1.4397 +  const Register thread = T8;
  1.4398 +  __ get_thread(thread);
  1.4399 +
  1.4400 +  __ move(A0, thread); 
  1.4401 +  __ set_last_Java_frame(noreg, FP, NULL);
  1.4402 +  //__ addi(SP, SP, -wordSize);
  1.4403 +  //align the stack before invoke native 
  1.4404 +  __ move(AT, -(StackAlignmentInBytes));
  1.4405 +  __ andr(SP, SP, AT); 
  1.4406 +  __ relocate(relocInfo::internal_pc_type); 
  1.4407 +  {	
  1.4408 +    intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + NativeCall::return_address_offset + 1 * BytesPerInstWord;
  1.4409 +//tty->print_cr(" %s :%d, name:%s, pc: %lx, save_pc: %lx, frame_size_words: %lx", __func__, __LINE__, name, __ pc(), save_pc, frame_size_words); //aoqi_test
  1.4410 +    __ li48(AT, save_pc);
  1.4411 +  }
  1.4412 +  __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
  1.4413 +
  1.4414 +  __ call(destination);
  1.4415 +  __ delayed()->nop();
  1.4416 +
  1.4417 +  // Set an oopmap for the call site.
  1.4418 +  // We need this not only for callee-saved registers, but also for volatile
  1.4419 +  // registers that the compiler might be keeping live across a safepoint.
  1.4420 +  oop_maps->add_gc_map( __ offset() - start, map);
  1.4421 +  // V0 contains the address we are going to jump to assuming no exception got installed
  1.4422 +  __ get_thread(thread);
  1.4423 +  __ ld_ptr(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
  1.4424 +  // clear last_Java_sp
  1.4425 +  __ reset_last_Java_frame(true, true);
  1.4426 +  // check for pending exceptions
  1.4427 +  Label pending;
  1.4428 +  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
  1.4429 +  __ bne(AT, R0, pending);
  1.4430 +  __ delayed()->nop(); 
  1.4431 +  // get the returned Method* 
  1.4432 +  //FIXME, do mips need this ? 
  1.4433 +  __ get_vm_result_2(Rmethod, thread);  // Refer to OpenJDK8
  1.4434 +  __ st_ptr(Rmethod, SP, RegisterSaver::methodOffset() * wordSize);
  1.4435 +  __ st_ptr(V0, SP, RegisterSaver::v0Offset() * wordSize);
  1.4436 +  RegisterSaver::restore_live_registers(masm);
  1.4437 +
  1.4438 +  // We are back the the original state on entry and ready to go the callee method.
  1.4439 +  __ jr(V0);
  1.4440 +  __ delayed()->nop();
  1.4441 +  // Pending exception after the safepoint
  1.4442 +
  1.4443 +  __ bind(pending);
  1.4444 +
  1.4445 +  RegisterSaver::restore_live_registers(masm);
  1.4446 +
  1.4447 +  // exception pending => remove activation and forward to exception handler
  1.4448 +  //forward_exception_entry need return address on the stack 
  1.4449 +  __ push(RA);
  1.4450 +  __ get_thread(thread);
  1.4451 +  __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset())); 
  1.4452 +  __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset()));
  1.4453 +  __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
  1.4454 +  __ delayed() -> nop();
  1.4455 +  // -------------
  1.4456 +  // make sure all code is generated
  1.4457 +  masm->flush();  
  1.4458 +
  1.4459 +  RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
  1.4460 +  return tmp;
  1.4461 +}
  1.4462 +
  1.4463 +/*void SharedRuntime::generate_stubs() {
  1.4464 +	_wrong_method_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, 
  1.4465 +				SharedRuntime::handle_wrong_method),"wrong_method_stub");
  1.4466 +	_ic_miss_blob      = generate_resolve_blob(CAST_FROM_FN_PTR(address, 
  1.4467 +				SharedRuntime::handle_wrong_method_ic_miss),"ic_miss_stub");
  1.4468 +	_resolve_opt_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, 
  1.4469 +				SharedRuntime::resolve_opt_virtual_call_C),"resolve_opt_virtual_call");
  1.4470 +	_resolve_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, 
  1.4471 +				SharedRuntime::resolve_virtual_call_C),"resolve_virtual_call");
  1.4472 +	_resolve_static_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, 
  1.4473 +				SharedRuntime::resolve_static_call_C),"resolve_static_call");
  1.4474 +	_polling_page_safepoint_handler_blob =generate_handler_blob(CAST_FROM_FN_PTR(address, 
  1.4475 +				SafepointSynchronize::handle_polling_page_exception), false);
  1.4476 +	_polling_page_return_handler_blob =generate_handler_blob(CAST_FROM_FN_PTR(address,
  1.4477 +				SafepointSynchronize::handle_polling_page_exception), true);
  1.4478 +	generate_deopt_blob();
  1.4479 +#ifdef COMPILER2
  1.4480 +	generate_uncommon_trap_blob();
  1.4481 +#endif // COMPILER2
  1.4482 +}*/
  1.4483 +
  1.4484 +extern "C" int SpinPause() {return 0;}
  1.4485 +// extern "C" int SafeFetch32 (int * adr, int errValue) {return 0;} ;
  1.4486 +// extern "C" intptr_t SafeFetchN (intptr_t * adr, intptr_t errValue) {return *adr; } ;

mercurial