src/cpu/mips/vm/c1_Runtime1_mips.cpp

changeset 1
2d8a650513c2
child 373
3a34fc828b4a
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/cpu/mips/vm/c1_Runtime1_mips.cpp	Fri Apr 29 00:06:10 2016 +0800
     1.3 @@ -0,0 +1,1649 @@
     1.4 +/*
     1.5 + * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
     1.6 + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
     1.7 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.8 + *
     1.9 + * This code is free software; you can redistribute it and/or modify it
    1.10 + * under the terms of the GNU General Public License version 2 only, as
    1.11 + * published by the Free Software Foundation.
    1.12 + *
    1.13 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.14 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.15 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.16 + * version 2 for more details (a copy is included in the LICENSE file that
    1.17 + * accompanied this code).
    1.18 + *
    1.19 + * You should have received a copy of the GNU General Public License version
    1.20 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.21 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.22 + *
    1.23 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    1.24 + * or visit www.oracle.com if you need additional information or have any
    1.25 + * questions.
    1.26 + *
    1.27 + */
    1.28 +
    1.29 +#include "precompiled.hpp"
    1.30 +#include "asm/assembler.hpp"
    1.31 +#include "c1/c1_Defs.hpp"
    1.32 +#include "c1/c1_MacroAssembler.hpp"
    1.33 +#include "c1/c1_Runtime1.hpp"
    1.34 +#include "interpreter/interpreter.hpp"
    1.35 +#include "nativeInst_mips.hpp"
    1.36 +#include "oops/compiledICHolder.hpp"
    1.37 +#include "oops/oop.inline.hpp"
    1.38 +#include "prims/jvmtiExport.hpp"
    1.39 +#include "register_mips.hpp"
    1.40 +#include "runtime/sharedRuntime.hpp"
    1.41 +#include "runtime/signature.hpp"
    1.42 +#include "runtime/vframeArray.hpp"
    1.43 +#include "vmreg_mips.inline.hpp"
    1.44 +
    1.45 +
    1.46 +// Implementation of StubAssembler
    1.47 +// this method will preserve the stack space for arguments as indicated by args_size
    1.48 +// for stack alignment consideration, you cannot call this with argument in stack.
    1.49 +// if you need >3 arguments, you must implement this method yourself.
    1.50 +int StubAssembler::call_RT(Register oop_result1, Register oop_result2, address entry, int args_size) {
    1.51 +	// i use S7 for edi. 
    1.52 +	// setup registers
    1.53 +	const Register thread = TREG; // is callee-saved register (Visual C++ calling conventions)
    1.54 +	assert(!(oop_result1->is_valid() || oop_result2->is_valid()) || oop_result1 != oop_result2,                            "registers must be different");
    1.55 +	assert(oop_result1 != thread && oop_result2 != thread, "registers must be different");
    1.56 +	assert(args_size >= 0, "illegal args_size");
    1.57 +
    1.58 +	set_num_rt_args(1 + args_size);
    1.59 +
    1.60 +
    1.61 +	// push java thread (becomes first argument of C function)
    1.62 +#ifndef OPT_THREAD
    1.63 +	get_thread(thread);
    1.64 +#endif
    1.65 +	move(A0, thread);
    1.66 +
    1.67 +	set_last_Java_frame(thread, NOREG, FP, NULL);
    1.68 +	NOT_LP64(addi(SP, SP, - wordSize * (1+args_size)));
    1.69 +	move(AT, -(StackAlignmentInBytes));
    1.70 +	andr(SP, SP, AT);
    1.71 +
    1.72 +	relocate(relocInfo::internal_pc_type); 
    1.73 +	{	
    1.74 +#ifndef _LP64
    1.75 +		int save_pc = (int)pc() +  12 + NativeCall::return_address_offset;
    1.76 +		lui(AT, Assembler::split_high(save_pc));
    1.77 +		addiu(AT, AT, Assembler::split_low(save_pc));
    1.78 +#else
    1.79 +		uintptr_t save_pc = (uintptr_t)pc() + NativeMovConstReg::instruction_size + 1 * BytesPerInstWord + NativeCall::return_address_offset;
    1.80 +		li48(AT, save_pc);
    1.81 +#endif
    1.82 +	}
    1.83 +	st_ptr(AT, thread, in_bytes(JavaThread::last_Java_pc_offset())); 
    1.84 +
    1.85 +	// do the call
    1.86 +//#define aoqi_test
    1.87 +#ifdef aoqi_test
    1.88 +tty->print_cr("StubRuntime::%s:%d entry: %lx", __func__, __LINE__, entry);
    1.89 +#endif
    1.90 +#ifndef _LP64
    1.91 +	lui(T9, Assembler::split_high((int)entry));
    1.92 +	addiu(T9, T9, Assembler::split_low((int)entry));
    1.93 +#else
    1.94 +	li48(T9, (intptr_t)entry);
    1.95 +#endif
    1.96 +	jalr(T9);
    1.97 +	delayed()->nop();
    1.98 +	int call_offset = offset();
    1.99 +
   1.100 +	// verify callee-saved register
   1.101 +#ifdef ASSERT
   1.102 +	guarantee(thread != V0, "change this code");
   1.103 +	push(V0);
   1.104 +	{ 
   1.105 +		Label L;
   1.106 +		get_thread(V0);
   1.107 +		beq(thread, V0, L);
   1.108 +		delayed()->nop();
   1.109 +		int3(); 
   1.110 +		stop("StubAssembler::call_RT: edi not callee saved?");
   1.111 +		bind(L);
   1.112 +	}
   1.113 +	super_pop(V0);
   1.114 +#endif
   1.115 +	// discard thread and arguments
   1.116 +	ld_ptr(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); //by yyq
   1.117 +	//FIXME , in x86 version , the second parameter is false, why true here? @jerome, 12/31, 06  
   1.118 +	//  reset_last_Java_frame(thread, true);
   1.119 +	reset_last_Java_frame(thread, true, false);
   1.120 +	// check for pending exceptions
   1.121 +	{ 
   1.122 +		Label L;
   1.123 +		ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
   1.124 +		beq(AT, R0, L);
   1.125 +		delayed()->nop();
   1.126 +		// exception pending => remove activation and forward to exception handler
   1.127 +		// make sure that the vm_results are cleared
   1.128 +		if (oop_result1->is_valid()) {
   1.129 +			st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset()));
   1.130 +		}
   1.131 +		if (oop_result2->is_valid()) {
   1.132 +			st_ptr(R0, thread, in_bytes(JavaThread::vm_result_2_offset()));
   1.133 +		}
   1.134 +		// the leave() in x86 just pops ebp and remains the return address on the top 
   1.135 +		// of stack   
   1.136 +		// the return address will be needed by forward_exception_entry()
   1.137 +		if (frame_size() == no_frame_size) {
   1.138 +			addiu(SP, FP, wordSize);
   1.139 +			ld_ptr(FP, SP, (-1) * wordSize);
   1.140 +			jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
   1.141 +			delayed()->nop();
   1.142 +		} else if (_stub_id == Runtime1::forward_exception_id) {
   1.143 +			should_not_reach_here();
   1.144 +		} else {
   1.145 +			jmp(Runtime1::entry_for(Runtime1::forward_exception_id), 
   1.146 +					relocInfo::runtime_call_type);
   1.147 +			delayed()->nop();
   1.148 +		}
   1.149 +		bind(L);
   1.150 +	}
   1.151 +	// get oop results if there are any and reset the values in the thread
   1.152 +	if (oop_result1->is_valid()) {
   1.153 +		ld_ptr(oop_result1, thread, in_bytes(JavaThread::vm_result_offset()));
   1.154 +		st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset()));
   1.155 +		verify_oop(oop_result1);
   1.156 +	}
   1.157 +	if (oop_result2->is_valid()) {
   1.158 +		ld_ptr(oop_result2, thread, in_bytes(JavaThread::vm_result_2_offset()));
   1.159 +		st_ptr(R0, thread, in_bytes(JavaThread::vm_result_2_offset()));
   1.160 +		verify_oop(oop_result2);
   1.161 +	}
   1.162 +	return call_offset;
   1.163 +}
   1.164 +
   1.165 +
   1.166 +int StubAssembler::call_RT(Register oop_result1, Register oop_result2, address entry, Register arg1) {
   1.167 +	if (arg1 != A1) move(A1, arg1);
   1.168 +	return call_RT(oop_result1, oop_result2, entry, 1);
   1.169 +}
   1.170 +
   1.171 +
   1.172 +int StubAssembler::call_RT(Register oop_result1, Register oop_result2, address entry, Register arg1, Register arg2) {
   1.173 +	if (arg1!=A1) move(A1, arg1);
   1.174 +	if (arg2!=A2) move(A2, arg2); assert(arg2 != A1, "smashed argument");
   1.175 +	return call_RT(oop_result1, oop_result2, entry, 2);
   1.176 +}
   1.177 +
   1.178 +
   1.179 +int StubAssembler::call_RT(Register oop_result1, Register oop_result2, address entry, Register arg1, Register arg2, Register arg3) {
   1.180 +	if (arg1!=A1) move(A1, arg1);
   1.181 +	if (arg2!=A2) move(A2, arg2); assert(arg2 != A1, "smashed argument");
   1.182 +	if (arg3!=A3) move(A3, arg3); assert(arg3 != A1 && arg3 != A2, "smashed argument");			
   1.183 +	return call_RT(oop_result1, oop_result2, entry, 3);
   1.184 +}
   1.185 +
   1.186 +
   1.187 +// Implementation of StubFrame
   1.188 +
   1.189 +class StubFrame: public StackObj {
   1.190 +	private:
   1.191 +		StubAssembler* _sasm;
   1.192 +
   1.193 +	public:
   1.194 +		StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments);
   1.195 +		void load_argument(int offset_in_words, Register reg);
   1.196 +		~StubFrame();
   1.197 +};
   1.198 +
   1.199 +
   1.200 +#define __ _sasm->
   1.201 +
   1.202 +StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments) {
   1.203 +	_sasm = sasm;
   1.204 +	__ set_info(name, must_gc_arguments);
   1.205 +	__ enter();
   1.206 +}
   1.207 +
   1.208 +
   1.209 +//FIXME, I have no idea the frame architecture of mips
   1.210 +// load parameters that were stored with LIR_Assembler::store_parameter
   1.211 +// // Note: offsets for store_parameter and load_argument must match
   1.212 +void StubFrame::load_argument(int offset_in_words, Register reg) {
   1.213 +	//ebp + 0: link
   1.214 +	//    + 1: return address
   1.215 +	//    + 2: argument with offset 0
   1.216 +	//    + 3: argument with offset 1
   1.217 +	//    + 4: ...
   1.218 +	//__ movl(reg, Address(ebp, (offset_in_words + 2) * BytesPerWord));
   1.219 +	__ ld_ptr(reg, Address(FP, (offset_in_words + 2) * BytesPerWord));
   1.220 +}
   1.221 +StubFrame::~StubFrame() {
   1.222 +	__ leave();
   1.223 +	__ jr(RA);
   1.224 +	__ delayed()->nop();
   1.225 +}
   1.226 +
   1.227 +#undef __
   1.228 +
   1.229 +
   1.230 +// Implementation of Runtime1
   1.231 +
   1.232 +#define __ sasm->
   1.233 +
   1.234 +//static OopMap* save_live_registers(MacroAssembler* sasm, int num_rt_args);
   1.235 +//static void restore_live_registers(MacroAssembler* sasm);
   1.236 +//DeoptimizationBlob* SharedRuntime::_deopt_blob = NULL;
   1.237 +/*
   1.238 +const int fpu_stack_as_doubles_size_in_words = 16;
   1.239 +const int fpu_stack_as_doubles_size = 64;
   1.240 +*/
   1.241 +const int float_regs_as_doubles_size_in_words = 16;
   1.242 +
   1.243 +//FIXME, 
   1.244 +// Stack layout for saving/restoring  all the registers needed during a runtime
   1.245 +// call (this includes deoptimization)
   1.246 +// Note: note that users of this frame may well have arguments to some runtime
   1.247 +// while these values are on the stack. These positions neglect those arguments
   1.248 +// but the code in save_live_registers will take the argument count into
   1.249 +// account.
   1.250 +//
   1.251 +#ifdef _LP64
   1.252 +  #define SLOT2(x) x,
   1.253 +  #define SLOT_PER_WORD 2
   1.254 +#else
   1.255 +  #define SLOT2(x)
   1.256 +  #define SLOT_PER_WORD 1
   1.257 +#endif // _LP64
   1.258 +
   1.259 +enum reg_save_layout {
   1.260 +#ifndef _LP64
   1.261 +  T0_off = 0,
   1.262 +  S0_off = T0_off + SLOT_PER_WORD * 8,
   1.263 +#else
   1.264 +  A4_off = 0,
   1.265 +  S0_off = A4_off + SLOT_PER_WORD * 8,
   1.266 +#endif
   1.267 +  FP_off = S0_off + SLOT_PER_WORD * 8, SLOT2(FPH_off)
   1.268 +  T8_off, SLOT2(T8H_off)
   1.269 +  T9_off, SLOT2(T9H_off)
   1.270 +  SP_off, SLOT2(SPH_off)
   1.271 +  V0_off, SLOT2(V0H_off)
   1.272 +  V1_off, SLOT2(V1H_off)
   1.273 +  A0_off, SLOT2(A0H_off)
   1.274 +  A1_off, SLOT2(A1H_off)
   1.275 +  A2_off, SLOT2(A2H_off)
   1.276 +  A3_off, SLOT2(A3H_off)
   1.277 +
   1.278 +  // Float registers
   1.279 +  /* FIXME: Jin: In MIPS64, F0~23 are all caller-saved registers */
   1.280 +#if 1
   1.281 +  F0_off, SLOT2( F0H_off)
   1.282 +  F1_off, SLOT2( F1H_off)
   1.283 +  F2_off, SLOT2( F2H_off)
   1.284 +  F3_off, SLOT2( F3H_off)
   1.285 +  F4_off, SLOT2( F4H_off)
   1.286 +  F5_off, SLOT2( F5H_off)
   1.287 +  F6_off, SLOT2( F6H_off)
   1.288 +  F7_off, SLOT2( F7H_off)
   1.289 +  F8_off, SLOT2( F8H_off)
   1.290 +  F9_off, SLOT2( F9H_off)
   1.291 +  F10_off, SLOT2( F10H_off)
   1.292 +  F11_off, SLOT2( F11H_off)
   1.293 +  F12_off, SLOT2( F12H_off)
   1.294 +  F13_off, SLOT2( F13H_off)
   1.295 +  F14_off, SLOT2( F14H_off)
   1.296 +  F15_off, SLOT2( F15H_off)
   1.297 +  F16_off, SLOT2( F16H_off)
   1.298 +  F17_off, SLOT2( F17H_off)
   1.299 +  F18_off, SLOT2( F18H_off)
   1.300 +  F19_off, SLOT2( F19H_off)
   1.301 +#endif
   1.302 +
   1.303 +  GP_off, SLOT2( GPH_off)
   1.304 +  //temp_2_off,
   1.305 +  temp_1_off, SLOT2(temp_1H_off)
   1.306 +  saved_fp_off, SLOT2(saved_fpH_off)
   1.307 +  return_off, SLOT2(returnH_off)
   1.308 +
   1.309 +  reg_save_frame_size,
   1.310 +
   1.311 +  // illegal instruction handler
   1.312 +  continue_dest_off = temp_1_off,
   1.313 +
   1.314 +  // deoptimization equates
   1.315 +  //deopt_type = temp_2_off,             // slot for type of deopt in progress
   1.316 +  ret_type = temp_1_off                // slot for return type
   1.317 +};
   1.318 +
   1.319 +// Save off registers which might be killed by calls into the runtime.
   1.320 +// Tries to smart of about FP registers.  In particular we separate
   1.321 +// saving and describing the FPU registers for deoptimization since we
   1.322 +// have to save the FPU registers twice if we describe them and on P4
   1.323 +// saving FPU registers which don't contain anything appears
   1.324 +// expensive.  The deopt blob is the only thing which needs to
   1.325 +// describe FPU registers.  In all other cases it should be sufficient
   1.326 +// to simply save their current value.
   1.327 +//FIXME, I have no idea which register should be saved . @jerome
   1.328 +static OopMap* generate_oop_map(StubAssembler* sasm, int num_rt_args,
   1.329 +    bool save_fpu_registers = true, bool describe_fpu_registers = false) {
   1.330 +
   1.331 +  /* Jin: num_rt_args is caculated by 8 bytes. */
   1.332 +  int frame_size_in_slots = reg_save_frame_size + num_rt_args * wordSize / SLOT_PER_WORD;   // args + thread
   1.333 +  sasm->set_frame_size(frame_size_in_slots / SLOT_PER_WORD);
   1.334 +
   1.335 +  // record saved value locations in an OopMap
   1.336 +  // locations are offsets from sp after runtime call; num_rt_args is number of arguments 
   1.337 +  // in call, including thread
   1.338 +  OopMap* map = new OopMap(reg_save_frame_size, 0);
   1.339 +  
   1.340 +  map->set_callee_saved(VMRegImpl::stack2reg(V0_off + num_rt_args), V0->as_VMReg());
   1.341 +  map->set_callee_saved(VMRegImpl::stack2reg(V1_off + num_rt_args), V1->as_VMReg());
   1.342 +#ifdef _LP64
   1.343 +  map->set_callee_saved(VMRegImpl::stack2reg(V0H_off + num_rt_args), V0->as_VMReg()->next());
   1.344 +  map->set_callee_saved(VMRegImpl::stack2reg(V1H_off + num_rt_args), V1->as_VMReg()->next());
   1.345 +#endif
   1.346 +
   1.347 +  int i = 0;
   1.348 +#ifndef _LP64
   1.349 +  for (Register r = T0; r != T7->successor(); r = r->successor() ) {
   1.350 +    map->set_callee_saved(VMRegImpl::stack2reg(T0_off + num_rt_args + i++), r->as_VMReg());
   1.351 +  }
   1.352 +#else
   1.353 +  for (Register r = A4; r != T3->successor(); r = r->successor() ) {
   1.354 +    map->set_callee_saved(VMRegImpl::stack2reg(A4_off + num_rt_args + i++), r->as_VMReg());
   1.355 +    map->set_callee_saved(VMRegImpl::stack2reg(A4_off + num_rt_args + i++), r->as_VMReg()->next());
   1.356 +  }
   1.357 +#endif
   1.358 +
   1.359 +  i = 0;
   1.360 +  for (Register r = S0; r != S7->successor(); r = r->successor() ) {
   1.361 +    map->set_callee_saved(VMRegImpl::stack2reg(S0_off + num_rt_args + i++), r->as_VMReg());
   1.362 +#ifdef _LP64
   1.363 +    map->set_callee_saved(VMRegImpl::stack2reg(S0_off + num_rt_args + i++), r->as_VMReg()->next());
   1.364 +#endif
   1.365 +  }
   1.366 +
   1.367 +  map->set_callee_saved(VMRegImpl::stack2reg(FP_off + num_rt_args), FP->as_VMReg());
   1.368 +  map->set_callee_saved(VMRegImpl::stack2reg(GP_off + num_rt_args), GP->as_VMReg());
   1.369 +  map->set_callee_saved(VMRegImpl::stack2reg(T8_off + num_rt_args), T8->as_VMReg());
   1.370 +  map->set_callee_saved(VMRegImpl::stack2reg(T9_off + num_rt_args), T9->as_VMReg());
   1.371 +  map->set_callee_saved(VMRegImpl::stack2reg(A0_off + num_rt_args), A0->as_VMReg());
   1.372 +  map->set_callee_saved(VMRegImpl::stack2reg(A1_off + num_rt_args), A1->as_VMReg());
   1.373 +  map->set_callee_saved(VMRegImpl::stack2reg(A2_off + num_rt_args), A2->as_VMReg());
   1.374 +  map->set_callee_saved(VMRegImpl::stack2reg(A3_off + num_rt_args), A3->as_VMReg());
   1.375 +
   1.376 +#if 1
   1.377 +  map->set_callee_saved(VMRegImpl::stack2reg(F0_off + num_rt_args), F0->as_VMReg());
   1.378 +  map->set_callee_saved(VMRegImpl::stack2reg(F1_off + num_rt_args), F1->as_VMReg());
   1.379 +  map->set_callee_saved(VMRegImpl::stack2reg(F2_off + num_rt_args), F2->as_VMReg());
   1.380 +  map->set_callee_saved(VMRegImpl::stack2reg(F3_off + num_rt_args), F1->as_VMReg());
   1.381 +  map->set_callee_saved(VMRegImpl::stack2reg(F4_off + num_rt_args), F4->as_VMReg());
   1.382 +  map->set_callee_saved(VMRegImpl::stack2reg(F5_off + num_rt_args), F4->as_VMReg());
   1.383 +  map->set_callee_saved(VMRegImpl::stack2reg(F6_off + num_rt_args), F4->as_VMReg());
   1.384 +  map->set_callee_saved(VMRegImpl::stack2reg(F7_off + num_rt_args), F4->as_VMReg());
   1.385 +  map->set_callee_saved(VMRegImpl::stack2reg(F8_off + num_rt_args), F4->as_VMReg());
   1.386 +  map->set_callee_saved(VMRegImpl::stack2reg(F9_off + num_rt_args), F4->as_VMReg());
   1.387 +  map->set_callee_saved(VMRegImpl::stack2reg(F10_off + num_rt_args), F4->as_VMReg());
   1.388 +  map->set_callee_saved(VMRegImpl::stack2reg(F11_off + num_rt_args), F4->as_VMReg());
   1.389 +  map->set_callee_saved(VMRegImpl::stack2reg(F12_off + num_rt_args), F12->as_VMReg());
   1.390 +  map->set_callee_saved(VMRegImpl::stack2reg(F13_off + num_rt_args), F13->as_VMReg());
   1.391 +  map->set_callee_saved(VMRegImpl::stack2reg(F14_off + num_rt_args), F14->as_VMReg());
   1.392 +  map->set_callee_saved(VMRegImpl::stack2reg(F15_off + num_rt_args), F15->as_VMReg());
   1.393 +  map->set_callee_saved(VMRegImpl::stack2reg(F16_off + num_rt_args), F16->as_VMReg());
   1.394 +  map->set_callee_saved(VMRegImpl::stack2reg(F17_off + num_rt_args), F17->as_VMReg());
   1.395 +  map->set_callee_saved(VMRegImpl::stack2reg(F18_off + num_rt_args), F18->as_VMReg());
   1.396 +  map->set_callee_saved(VMRegImpl::stack2reg(F19_off + num_rt_args), F19->as_VMReg());
   1.397 +#endif
   1.398 +
   1.399 +#ifdef _LP64
   1.400 +  map->set_callee_saved(VMRegImpl::stack2reg(FPH_off + num_rt_args), FP->as_VMReg()->next());
   1.401 +  map->set_callee_saved(VMRegImpl::stack2reg(GPH_off + num_rt_args), GP->as_VMReg()->next());
   1.402 +  map->set_callee_saved(VMRegImpl::stack2reg(T8H_off + num_rt_args), T8->as_VMReg()->next());
   1.403 +  map->set_callee_saved(VMRegImpl::stack2reg(T9H_off + num_rt_args), T9->as_VMReg()->next());
   1.404 +  map->set_callee_saved(VMRegImpl::stack2reg(A0H_off + num_rt_args), A0->as_VMReg()->next());
   1.405 +  map->set_callee_saved(VMRegImpl::stack2reg(A1H_off + num_rt_args), A1->as_VMReg()->next());
   1.406 +  map->set_callee_saved(VMRegImpl::stack2reg(A2H_off + num_rt_args), A2->as_VMReg()->next());
   1.407 +  map->set_callee_saved(VMRegImpl::stack2reg(A3H_off + num_rt_args), A3->as_VMReg()->next());
   1.408 +#endif
   1.409 +  return map;
   1.410 +}
   1.411 +
   1.412 +//FIXME, Is it enough to save this registers  by yyq
   1.413 +static OopMap* save_live_registers(StubAssembler* sasm, 
   1.414 +                                   int num_rt_args,
   1.415 +		                   bool save_fpu_registers = true, 
   1.416 +                                   bool describe_fpu_registers = false) {
   1.417 +  //const int reg_save_frame_size = return_off + 1 + num_rt_args;
   1.418 +  __ block_comment("save_live_registers");
   1.419 +
   1.420 +  // save all register state - int, fpu  
   1.421 +  __ addi(SP, SP, -(reg_save_frame_size / SLOT_PER_WORD - 2)* wordSize);
   1.422 +  
   1.423 +#ifndef _LP64
   1.424 +  for (Register r = T0; r != T7->successor(); r = r->successor() ) {
   1.425 +    __ sw(r, SP, (r->encoding() - T0->encoding() + T0_off / SLOT_PER_WORD) * wordSize);
   1.426 +#else
   1.427 +  for (Register r = A4; r != T3->successor(); r = r->successor() ) {
   1.428 +    __ sd(r, SP, (r->encoding() - A4->encoding() + A4_off / SLOT_PER_WORD) * wordSize);
   1.429 +#endif
   1.430 +  }
   1.431 +  for (Register r = S0; r != S7->successor(); r = r->successor() ) {
   1.432 +    __ st_ptr(r, SP, (r->encoding() - S0->encoding() + S0_off / SLOT_PER_WORD) * wordSize);
   1.433 +  }
   1.434 +  __ st_ptr(FP, SP, FP_off * wordSize / SLOT_PER_WORD);
   1.435 +  __ st_ptr(GP, SP, GP_off * wordSize / SLOT_PER_WORD);
   1.436 +  __ st_ptr(T8, SP, T8_off * wordSize / SLOT_PER_WORD);
   1.437 +  __ st_ptr(T9, SP, T9_off * wordSize / SLOT_PER_WORD);
   1.438 +  __ st_ptr(A0, SP, A0_off * wordSize / SLOT_PER_WORD);
   1.439 +  __ st_ptr(A1, SP, A1_off * wordSize / SLOT_PER_WORD);
   1.440 +  __ st_ptr(A2, SP, A2_off * wordSize / SLOT_PER_WORD);
   1.441 +  __ st_ptr(A3, SP, A3_off * wordSize / SLOT_PER_WORD);
   1.442 +  __ st_ptr(V0, SP, V0_off * wordSize / SLOT_PER_WORD);
   1.443 +  __ st_ptr(V1, SP, V1_off * wordSize / SLOT_PER_WORD);	
   1.444 +
   1.445 +#if 1
   1.446 +  __ sdc1(F0, SP, F0_off * wordSize / SLOT_PER_WORD);	
   1.447 +  __ sdc1(F1, SP, F1_off * wordSize / SLOT_PER_WORD);	
   1.448 +  __ sdc1(F2, SP, F2_off * wordSize / SLOT_PER_WORD);	
   1.449 +  __ sdc1(F3, SP, F3_off * wordSize / SLOT_PER_WORD);	
   1.450 +  __ sdc1(F4, SP, F4_off * wordSize / SLOT_PER_WORD);	
   1.451 +  __ sdc1(F5, SP, F5_off * wordSize / SLOT_PER_WORD);	
   1.452 +  __ sdc1(F6, SP, F6_off * wordSize / SLOT_PER_WORD);	
   1.453 +  __ sdc1(F7, SP, F7_off * wordSize / SLOT_PER_WORD);	
   1.454 +  __ sdc1(F8, SP, F8_off * wordSize / SLOT_PER_WORD);	
   1.455 +  __ sdc1(F9, SP, F9_off * wordSize / SLOT_PER_WORD);	
   1.456 +  __ sdc1(F10, SP, F10_off * wordSize / SLOT_PER_WORD);	
   1.457 +  __ sdc1(F11, SP, F11_off * wordSize / SLOT_PER_WORD);	
   1.458 +  __ sdc1(F12, SP, F12_off * wordSize / SLOT_PER_WORD);	
   1.459 +  __ sdc1(F13, SP, F13_off * wordSize / SLOT_PER_WORD);	
   1.460 +  __ sdc1(F14, SP, F14_off * wordSize / SLOT_PER_WORD);	
   1.461 +  __ sdc1(F15, SP, F15_off * wordSize / SLOT_PER_WORD);	
   1.462 +  __ sdc1(F16, SP, F16_off * wordSize / SLOT_PER_WORD);	
   1.463 +  __ sdc1(F17, SP, F17_off * wordSize / SLOT_PER_WORD);	
   1.464 +  __ sdc1(F18, SP, F18_off * wordSize / SLOT_PER_WORD);	
   1.465 +  __ sdc1(F19, SP, F19_off * wordSize / SLOT_PER_WORD);	
   1.466 +#endif
   1.467 +
   1.468 +  return generate_oop_map(sasm, num_rt_args, save_fpu_registers, describe_fpu_registers);
   1.469 +}
   1.470 +
   1.471 +static void restore_fpu(StubAssembler* sasm, bool restore_fpu_registers = true) {
   1.472 +  //static void restore_live_registers(MacroAssembler* sasm) {
   1.473 +#ifndef _LP64
   1.474 +  for (Register r = T0; r != T7->successor(); r = r->successor() ) {
   1.475 +    __ lw(r, SP, (r->encoding() - T0->encoding() + T0_off / SLOT_PER_WORD) * wordSize);
   1.476 +#else
   1.477 +  for (Register r = A4; r != T3->successor(); r = r->successor() ) {
   1.478 +    __ ld(r, SP, (r->encoding() - A4->encoding() + A4_off / SLOT_PER_WORD) * wordSize);
   1.479 +#endif
   1.480 +  }
   1.481 +  for (Register r = S0; r != S7->successor(); r = r->successor() ) {
   1.482 +    __ ld_ptr(r, SP, (r->encoding() - S0->encoding() + S0_off / SLOT_PER_WORD) * wordSize);
   1.483 +  }
   1.484 +  __ ld_ptr(FP, SP, FP_off * wordSize / SLOT_PER_WORD);
   1.485 +  __ ld_ptr(GP, SP, GP_off * wordSize / SLOT_PER_WORD);
   1.486 +
   1.487 +  __ ld_ptr(T8, SP, T8_off * wordSize / SLOT_PER_WORD);
   1.488 +  __ ld_ptr(T9, SP, T9_off * wordSize / SLOT_PER_WORD);
   1.489 +  __ ld_ptr(A0, SP, A0_off * wordSize / SLOT_PER_WORD);
   1.490 +  __ ld_ptr(A1, SP, A1_off * wordSize / SLOT_PER_WORD);
   1.491 +  __ ld_ptr(A2, SP, A2_off * wordSize / SLOT_PER_WORD);
   1.492 +  __ ld_ptr(A3, SP, A3_off * wordSize / SLOT_PER_WORD);
   1.493 +
   1.494 +  __ ld_ptr(V0, SP, V0_off * wordSize / SLOT_PER_WORD);
   1.495 +  __ ld_ptr(V1, SP, V1_off * wordSize / SLOT_PER_WORD);	
   1.496 +
   1.497 +#if 1
   1.498 +  __ ldc1(F0, SP, F0_off * wordSize / SLOT_PER_WORD);
   1.499 +  __ ldc1(F1, SP, F1_off * wordSize / SLOT_PER_WORD);
   1.500 +  __ ldc1(F2, SP, F2_off * wordSize / SLOT_PER_WORD);
   1.501 +  __ ldc1(F3, SP, F3_off * wordSize / SLOT_PER_WORD);
   1.502 +  __ ldc1(F4, SP, F4_off * wordSize / SLOT_PER_WORD);
   1.503 +  __ ldc1(F5, SP, F5_off * wordSize / SLOT_PER_WORD);
   1.504 +  __ ldc1(F6, SP, F6_off * wordSize / SLOT_PER_WORD);
   1.505 +  __ ldc1(F7, SP, F7_off * wordSize / SLOT_PER_WORD);
   1.506 +  __ ldc1(F8, SP, F8_off * wordSize / SLOT_PER_WORD);
   1.507 +  __ ldc1(F9, SP, F9_off * wordSize / SLOT_PER_WORD);
   1.508 +  __ ldc1(F10, SP, F10_off * wordSize / SLOT_PER_WORD);
   1.509 +  __ ldc1(F11, SP, F11_off * wordSize / SLOT_PER_WORD);
   1.510 +  __ ldc1(F12, SP, F12_off * wordSize / SLOT_PER_WORD);
   1.511 +  __ ldc1(F13, SP, F13_off * wordSize / SLOT_PER_WORD);
   1.512 +  __ ldc1(F14, SP, F14_off * wordSize / SLOT_PER_WORD);
   1.513 +  __ ldc1(F15, SP, F15_off * wordSize / SLOT_PER_WORD);
   1.514 +  __ ldc1(F16, SP, F16_off * wordSize / SLOT_PER_WORD);
   1.515 +  __ ldc1(F17, SP, F17_off * wordSize / SLOT_PER_WORD);
   1.516 +  __ ldc1(F18, SP, F18_off * wordSize / SLOT_PER_WORD);
   1.517 +  __ ldc1(F19, SP, F19_off * wordSize / SLOT_PER_WORD);
   1.518 +#endif
   1.519 +
   1.520 +  __ addiu(SP, SP, (reg_save_frame_size / SLOT_PER_WORD - 2) * wordSize);
   1.521 +}
   1.522 +
   1.523 +static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registers = true) {
   1.524 +  __ block_comment("restore_live_registers");
   1.525 +  restore_fpu(sasm, restore_fpu_registers);
   1.526 +}
   1.527 +
   1.528 +static void restore_live_registers_except_V0(StubAssembler* sasm, bool restore_fpu_registers = true) {	
   1.529 +  //static void restore_live_registers(MacroAssembler* sasm) {
   1.530 +  //FIXME , maybe V1 need to be saved too
   1.531 +  __ block_comment("restore_live_registers except V0");
   1.532 +#ifndef _LP64
   1.533 +  for (Register r = T0; r != T7->successor(); r = r->successor() ) {
   1.534 +  	__ lw(r, SP, (r->encoding() - T0->encoding() + T0_off / SLOT_PER_WORD) * wordSize);
   1.535 +#else
   1.536 +  for (Register r = A4; r != T3->successor(); r = r->successor() ) {
   1.537 +  	__ ld(r, SP, (r->encoding() - A4->encoding() + A4_off / SLOT_PER_WORD) * wordSize);
   1.538 +#endif
   1.539 +  }
   1.540 +  for (Register r = S0; r != S7->successor(); r = r->successor() ) {
   1.541 +  	__ ld_ptr(r, SP, (r->encoding() - S0->encoding() + S0_off / SLOT_PER_WORD) * wordSize);
   1.542 +  }
   1.543 +  __ ld_ptr(FP, SP, FP_off * wordSize / SLOT_PER_WORD);
   1.544 +  __ ld_ptr(GP, SP, GP_off * wordSize / SLOT_PER_WORD);
   1.545 +
   1.546 +  __ ld_ptr(T8, SP, T8_off * wordSize / SLOT_PER_WORD);
   1.547 +  __ ld_ptr(T9, SP, T9_off * wordSize / SLOT_PER_WORD);
   1.548 +  __ ld_ptr(A0, SP, A0_off * wordSize / SLOT_PER_WORD);
   1.549 +  __ ld_ptr(A1, SP, A1_off * wordSize / SLOT_PER_WORD);
   1.550 +  __ ld_ptr(A2, SP, A2_off * wordSize / SLOT_PER_WORD);
   1.551 +  __ ld_ptr(A3, SP, A3_off * wordSize / SLOT_PER_WORD);
   1.552 +
   1.553 +#if 1
   1.554 +  __ ldc1(F0, SP, F0_off * wordSize / SLOT_PER_WORD);
   1.555 +  __ ldc1(F1, SP, F1_off * wordSize / SLOT_PER_WORD);
   1.556 +  __ ldc1(F2, SP, F2_off * wordSize / SLOT_PER_WORD);
   1.557 +  __ ldc1(F3, SP, F3_off * wordSize / SLOT_PER_WORD);
   1.558 +  __ ldc1(F4, SP, F4_off * wordSize / SLOT_PER_WORD);
   1.559 +  __ ldc1(F5, SP, F5_off * wordSize / SLOT_PER_WORD);
   1.560 +  __ ldc1(F6, SP, F6_off * wordSize / SLOT_PER_WORD);
   1.561 +  __ ldc1(F7, SP, F7_off * wordSize / SLOT_PER_WORD);
   1.562 +  __ ldc1(F8, SP, F8_off * wordSize / SLOT_PER_WORD);
   1.563 +  __ ldc1(F9, SP, F9_off * wordSize / SLOT_PER_WORD);
   1.564 +  __ ldc1(F10, SP, F10_off * wordSize / SLOT_PER_WORD);
   1.565 +  __ ldc1(F11, SP, F11_off * wordSize / SLOT_PER_WORD);
   1.566 +  __ ldc1(F12, SP, F12_off * wordSize / SLOT_PER_WORD);
   1.567 +  __ ldc1(F13, SP, F13_off * wordSize / SLOT_PER_WORD);
   1.568 +  __ ldc1(F14, SP, F14_off * wordSize / SLOT_PER_WORD);
   1.569 +  __ ldc1(F15, SP, F15_off * wordSize / SLOT_PER_WORD);
   1.570 +  __ ldc1(F16, SP, F16_off * wordSize / SLOT_PER_WORD);
   1.571 +  __ ldc1(F17, SP, F17_off * wordSize / SLOT_PER_WORD);
   1.572 +  __ ldc1(F18, SP, F18_off * wordSize / SLOT_PER_WORD);
   1.573 +  __ ldc1(F19, SP, F19_off * wordSize / SLOT_PER_WORD);
   1.574 +#endif
   1.575 +
   1.576 +  __ ld_ptr(V1, SP, V1_off * wordSize / SLOT_PER_WORD);	
   1.577 +
   1.578 +  __ addiu(SP, SP, (reg_save_frame_size / SLOT_PER_WORD - 2) * wordSize);
   1.579 +}
   1.580 +
   1.581 +void Runtime1::initialize_pd() {
   1.582 +  // nothing to do
   1.583 +}
   1.584 +
   1.585 +// target: the entry point of the method that creates and posts the exception oop
   1.586 +// has_argument: true if the exception needs an argument (passed on stack because registers must be preserved)
   1.587 +OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target, bool has_argument) {
   1.588 +	// preserve all registers
   1.589 +	OopMap* oop_map = save_live_registers(sasm, 0);
   1.590 +
   1.591 +	// now all registers are saved and can be used freely
   1.592 +	// verify that no old value is used accidentally
   1.593 +	//all reigster are saved , I think mips do not need this
   1.594 +
   1.595 +	// registers used by this stub
   1.596 +	const Register temp_reg = T3; 
   1.597 +	// load argument for exception that is passed as an argument into the stub
   1.598 +	if (has_argument) {
   1.599 +		__ ld_ptr(temp_reg, Address(FP, 2*BytesPerWord));
   1.600 +	}
   1.601 +	int call_offset;
   1.602 +	if (has_argument) 
   1.603 +	 	call_offset = __ call_RT(noreg, noreg, target, temp_reg);
   1.604 +  else
   1.605 +	 	call_offset = __ call_RT(noreg, noreg, target);
   1.606 +	
   1.607 +	OopMapSet* oop_maps = new OopMapSet();
   1.608 +	oop_maps->add_gc_map(call_offset, oop_map);
   1.609 +
   1.610 +	__ stop("should not reach here");
   1.611 +
   1.612 +	return oop_maps;
   1.613 +}
   1.614 +
   1.615 +//FIXME I do not know which reigster to use.should use T3 as real_return_addr @jerome
   1.616 +OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
   1.617 +        __ block_comment("generate_handle_exception");	
   1.618 + // incoming parameters
   1.619 +	const Register exception_oop = V0;
   1.620 +	const Register exception_pc = V1;
   1.621 +	// other registers used in this stub
   1.622 +//	const Register real_return_addr = T3;
   1.623 +	const Register thread = T8;
   1.624 +  // Save registers, if required.
   1.625 +   OopMapSet* oop_maps = new OopMapSet();
   1.626 +   OopMap* oop_map = NULL;
   1.627 +   switch (id) {
   1.628 +   case forward_exception_id:
   1.629 +     // We're handling an exception in the context of a compiled frame.
   1.630 +     // The registers have been saved in the standard places.  Perform
   1.631 +     // an exception lookup in the caller and dispatch to the handler
   1.632 +     // if found.  Otherwise unwind and dispatch to the callers
   1.633 +     // exception handler.
   1.634 +     oop_map = generate_oop_map(sasm, 1 /*thread*/);
   1.635 + 
   1.636 +     // load and clear pending exception oop into RAX
   1.637 +     __ ld(exception_oop, Address(thread, Thread::pending_exception_offset()));
   1.638 +     __ sw(R0,Address(thread, Thread::pending_exception_offset()));
   1.639 + 
   1.640 +     // load issuing PC (the return address for this stub) into rdx
   1.641 +     __ ld(exception_pc, Address(FP, 1*BytesPerWord));
   1.642 + 
   1.643 +     // make sure that the vm_results are cleared (may be unnecessary)
   1.644 +     __ sw(R0,Address(thread, JavaThread::vm_result_offset()));
   1.645 +     __ sw(R0,Address(thread, JavaThread::vm_result_2_offset()));
   1.646 +     break;
   1.647 +   case handle_exception_nofpu_id:
   1.648 +   case handle_exception_id:
   1.649 +     // At this point all registers MAY be live.
   1.650 +     oop_map = save_live_registers(sasm, 1 /*thread*/, id == handle_exception_nofpu_id);
   1.651 +     break;
   1.652 +   case handle_exception_from_callee_id: {
   1.653 +     // At this point all registers except exception oop (RAX) and
   1.654 +     // exception pc (RDX) are dead.
   1.655 +     const int frame_size = 2 /*BP, return address*/ NOT_LP64(+ 1 /*thread*/) WIN64_ONLY(+ frame::arg_reg_save_area_by     tes / BytesPerWord);
   1.656 +     oop_map = new OopMap(frame_size * VMRegImpl::slots_per_word, 0);
   1.657 +     sasm->set_frame_size(frame_size);
   1.658 +     WIN64_ONLY(__ subq(rsp, frame::arg_reg_save_area_bytes));
   1.659 +     break;
   1.660 +   }
   1.661 +   default:  ShouldNotReachHere();
   1.662 +   }
   1.663 +
   1.664 +#ifdef TIERED
   1.665 +	// C2 can leave the fpu stack dirty
   1.666 +	__ empty_FPU_stack();
   1.667 +	//}
   1.668 +#endif // TIERED
   1.669 +
   1.670 +	// verify that only V0 and V1 is valid at this time
   1.671 +	// verify that V0 contains a valid exception
   1.672 +	__ verify_not_null_oop(exception_oop);
   1.673 +
   1.674 +	// load address of JavaThread object for thread-local data
   1.675 +	__ get_thread(thread);
   1.676 +
   1.677 +#ifdef ASSERT
   1.678 +	// check that fields in JavaThread for exception oop and issuing pc are 
   1.679 +	// empty before writing to them
   1.680 +	Label oop_empty;
   1.681 +	__ ld_ptr(AT, Address(thread, in_bytes(JavaThread::exception_oop_offset()))); 
   1.682 +	__ beq(AT, R0, oop_empty); 
   1.683 +	__ delayed()->nop(); 
   1.684 +	__ stop("exception oop already set");
   1.685 +	__ bind(oop_empty);
   1.686 +	Label pc_empty;
   1.687 +	__ ld_ptr(AT, Address(thread, in_bytes(JavaThread::exception_pc_offset()))); 
   1.688 +	__ beq(AT, R0, pc_empty); 
   1.689 +	__ delayed()->nop(); 
   1.690 +	__ stop("exception pc already set");
   1.691 +	__ bind(pc_empty);
   1.692 +#endif
   1.693 +
   1.694 +	// save exception oop and issuing pc into JavaThread
   1.695 +	// (exception handler will load it from here)
   1.696 +	__ st_ptr(exception_oop, Address(thread, in_bytes(JavaThread::exception_oop_offset())));
   1.697 +	__ st_ptr(exception_pc, Address(thread, in_bytes(JavaThread::exception_pc_offset())));
   1.698 +
   1.699 +	// save real return address (pc that called this stub)
   1.700 +//	__ ld_ptr(real_return_addr, FP, 1*BytesPerWord);   
   1.701 +//	__ st_ptr(real_return_addr, SP, temp_1_off * BytesPerWord / SLOT_PER_WORD);
   1.702 +
   1.703 +	// patch throwing pc into return address (has bci & oop map)
   1.704 +	__ st_ptr(exception_pc, FP, 1*BytesPerWord);       
   1.705 +	// compute the exception handler. 
   1.706 +	// the exception oop and the throwing pc are read from the fields in JavaThread
   1.707 +	int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, 
   1.708 +				exception_handler_for_pc));
   1.709 +	oop_maps->add_gc_map(call_offset, oop_map);
   1.710 +	// V0:  handler address or NULL if no handler exists
   1.711 +	//      will be the deopt blob if nmethod was deoptimized while we looked up
   1.712 +	//      handler regardless of whether handler existed in the nmethod.
   1.713 +
   1.714 +	// only V0 is valid at this time, all other registers have been destroyed by the 
   1.715 +	// runtime call
   1.716 +
   1.717 +	// Do we have an exception handler in the nmethod?
   1.718 +	/*Label no_handler;
   1.719 +	Label done;
   1.720 +	__ beq(V0, R0, no_handler);
   1.721 +	__ delayed()->nop(); */
   1.722 +	// exception handler found
   1.723 +	// patch the return address -> the stub will directly return to the exception handler
   1.724 +	__ st_ptr(V0, FP, 1 * BytesPerWord); 
   1.725 +
   1.726 +	// restore registers
   1.727 +//	restore_live_registers(sasm, save_fpu_registers);
   1.728 +
   1.729 +	// return to exception handler
   1.730 +//	__ leave();
   1.731 +//	__ jr(RA);
   1.732 +//	__ delayed()->nop(); 
   1.733 +//	__ bind(no_handler);
   1.734 +	// no exception handler found in this method, so the exception is 
   1.735 +	// forwarded to the caller (using the unwind code of the nmethod)
   1.736 +	// there is no need to restore the registers
   1.737 +
   1.738 +	// restore the real return address that was saved before the RT-call
   1.739 +//	__ ld_ptr(real_return_addr, SP, temp_1_off * BytesPerWord / SLOT_PER_WORD);
   1.740 +//	__ st_ptr(real_return_addr, FP, 1 * BytesPerWord); 
   1.741 +	// load address of JavaThread object for thread-local data
   1.742 +//	__ get_thread(thread);
   1.743 +	// restore exception oop into eax (convention for unwind code)
   1.744 +//	__ ld_ptr(exception_oop, thread, in_bytes(JavaThread::exception_oop_offset()));
   1.745 +
   1.746 +	// clear exception fields in JavaThread because they are no longer needed
   1.747 +	// (fields must be cleared because they are processed by GC otherwise)
   1.748 +//	__ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset()));
   1.749 +//	__ st_ptr(R0,thread, in_bytes(JavaThread::exception_pc_offset())); 
   1.750 +	// pop the stub frame off
   1.751 +//	__ leave();
   1.752 +//	generate_unwind_exception(sasm);
   1.753 +//	__ stop("should not reach here");
   1.754 +//}
   1.755 +   switch (id) {
   1.756 +   case forward_exception_id:
   1.757 +   case handle_exception_nofpu_id:
   1.758 +   case handle_exception_id:
   1.759 +     // Restore the registers that were saved at the beginning.
   1.760 +     restore_live_registers(sasm, id == handle_exception_nofpu_id);
   1.761 +     break;
   1.762 +   case handle_exception_from_callee_id:
   1.763 +     // WIN64_ONLY: No need to add frame::arg_reg_save_area_bytes to SP
   1.764 +     // since we do a leave anyway.
   1.765 + 
   1.766 +     // Pop the return address since we are possibly changing SP (restoring from BP).
   1.767 +     __ leave();
   1.768 +     // Restore SP from BP if the exception PC is a method handle call site.
   1.769 +     NOT_LP64(__ get_thread(thread);)
   1.770 +     /*__ ld(AT, Address(thread, JavaThread::is_method_handle_return_offset()));
   1.771 +     __ beq(AT, R0, done);
   1.772 +     __ move(SP, rbp_mh_SP_save);
   1.773 +     __ bind(done);
   1.774 +     __ jr(RA);  // jump to exception handler
   1.775 +     __ delayed()->nop();*/
   1.776 +// 759     __ cmpl(Address(thread, JavaThread::is_method_handle_return_offset()), 0);
   1.777 +// 760     __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save);
   1.778 +// 761     __ jmp(rcx);  // jump to exception handler
   1.779 + 
   1.780 +     break;
   1.781 +   default:  ShouldNotReachHere();
   1.782 +   }
   1.783 +  
   1.784 +   return oop_maps;
   1.785 + }
   1.786 +
   1.787 +
   1.788 +
   1.789 +
   1.790 +
   1.791 +void Runtime1::generate_unwind_exception(StubAssembler *sasm) {
   1.792 +	// incoming parameters
   1.793 +	const Register exception_oop = V0;
   1.794 +	// other registers used in this stub
   1.795 +	const Register exception_pc = V1;
   1.796 +	const Register handler_addr = T3;
   1.797 +	const Register thread = T8;
   1.798 +
   1.799 +	// verify that only eax is valid at this time
   1.800 +	//  __ invalidate_registers(false, true, true, true, true, true);
   1.801 +
   1.802 +#ifdef ASSERT
   1.803 +	// check that fields in JavaThread for exception oop and issuing pc are empty
   1.804 +	__ get_thread(thread);
   1.805 +	Label oop_empty;
   1.806 +	__ ld_ptr(AT, thread, in_bytes(JavaThread::exception_oop_offset())); 
   1.807 +	__ beq(AT, R0, oop_empty); 
   1.808 +	__ delayed()->nop(); 
   1.809 +	__ stop("exception oop must be empty");
   1.810 +	__ bind(oop_empty);
   1.811 +
   1.812 +	Label pc_empty;
   1.813 +	__ ld_ptr(AT, thread, in_bytes(JavaThread::exception_pc_offset())); 
   1.814 +	__ beq(AT,R0, pc_empty); 
   1.815 +	__ delayed()->nop(); 
   1.816 +	__ stop("exception pc must be empty");
   1.817 +	__ bind(pc_empty);
   1.818 +#endif
   1.819 +	// clear the FPU stack in case any FPU results are left behind
   1.820 +	__ empty_FPU_stack();
   1.821 +
   1.822 +	// leave activation of nmethod
   1.823 +	__ addi(SP, FP, wordSize);	
   1.824 +	__ ld_ptr(FP, SP, - wordSize);
   1.825 +	// store return address (is on top of stack after leave)
   1.826 +	__ ld_ptr(exception_pc, SP, 0);
   1.827 +	__ verify_oop(exception_oop);
   1.828 +
   1.829 +	// save exception oop from eax to stack before call
   1.830 +	__ push(exception_oop);
   1.831 +	// search the exception handler address of the caller (using the return address)
   1.832 +	__ call_VM_leaf(CAST_FROM_FN_PTR(address, 
   1.833 +			SharedRuntime::exception_handler_for_return_address), exception_pc);
   1.834 +	// eax: exception handler address of the caller
   1.835 +
   1.836 +	// only eax is valid at this time, all other registers have been destroyed by the call
   1.837 +
   1.838 +	// move result of call into correct register
   1.839 +	__ move(handler_addr, V0);
   1.840 +	// restore exception oop in eax (required convention of exception handler)
   1.841 +	__ super_pop(exception_oop);
   1.842 +
   1.843 +	__ verify_oop(exception_oop);
   1.844 +
   1.845 +	// get throwing pc (= return address).
   1.846 +	// edx has been destroyed by the call, so it must be set again
   1.847 +	// the pop is also necessary to simulate the effect of a ret(0)
   1.848 +	__ super_pop(exception_pc);
   1.849 +	// verify that that there is really a valid exception in eax
   1.850 +	__ verify_not_null_oop(exception_oop);
   1.851 +
   1.852 +	// continue at exception handler (return address removed)
   1.853 +	// note: do *not* remove arguments when unwinding the
   1.854 +	//       activation since the caller assumes having
   1.855 +	//       all arguments on the stack when entering the
   1.856 +	//       runtime to determine the exception handler
   1.857 +	//       (GC happens at call site with arguments!)
   1.858 +	// eax: exception oop
   1.859 +	// edx: throwing pc
   1.860 +	// ebx: exception handler
   1.861 +	__ jr(handler_addr);
   1.862 +	__ delayed()->nop();
   1.863 +}
   1.864 +
   1.865 +
   1.866 +
   1.867 +
   1.868 +//static address deopt_with_exception_entry_for_patch = NULL;
   1.869 +
   1.870 +OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
   1.871 + 
   1.872 +	// use the maximum number of runtime-arguments here because it is difficult to 
   1.873 +	// distinguish each RT-Call.
   1.874 +	// Note: This number affects also the RT-Call in generate_handle_exception because
   1.875 +	//       the oop-map is shared for all calls.
   1.876 +
   1.877 +
   1.878 +
   1.879 +
   1.880 +	DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
   1.881 +	assert(deopt_blob != NULL, "deoptimization blob must have been created");
   1.882 +	// assert(deopt_with_exception_entry_for_patch != NULL, 
   1.883 +	// "deoptimization blob must have been created");
   1.884 +
   1.885 +	//OopMap* oop_map = save_live_registers(sasm, num_rt_args);
   1.886 +	OopMap* oop_map = save_live_registers(sasm, 0);
   1.887 +#ifndef OPT_THREAD
   1.888 +	const Register thread = T8; 
   1.889 +	// push java thread (becomes first argument of C function)
   1.890 +	__ get_thread(thread);
   1.891 +#else
   1.892 +	const Register thread = TREG;
   1.893 +#endif
   1.894 +	__ move(A0, thread);
   1.895 +
   1.896 +
   1.897 +/*	
   1.898 + *	NOTE: this frame should be compiled frame, but at this point, the pc in frame-anchor
   1.899 + *	is contained in interpreter. It should be wrong, and should be cleared but is not.
   1.900 + * 	even if we cleared the wrong pc in anchor, the default way to get caller pc in class frame
   1.901 + * 	is not right. It depends on that the caller pc is stored in *(sp - 1) but it's not the case
   1.902 + */
   1.903 +	__ set_last_Java_frame(thread, NOREG, FP, NULL);
   1.904 +	NOT_LP64(__ addiu(SP, SP, (-1) * wordSize));
   1.905 +	__ move(AT, -(StackAlignmentInBytes));
   1.906 +	__ andr(SP, SP, AT);
   1.907 +	__ relocate(relocInfo::internal_pc_type); 
   1.908 +	{	
   1.909 +#ifndef _LP64
   1.910 +		int save_pc = (int)__ pc() +  12 + NativeCall::return_address_offset;
   1.911 +		__ lui(AT, Assembler::split_high(save_pc));
   1.912 +		__ addiu(AT, AT, Assembler::split_low(save_pc));
   1.913 +#else
   1.914 +		uintptr_t save_pc = (uintptr_t)__ pc() + NativeMovConstReg::instruction_size + 1 * BytesPerInstWord + NativeCall::return_address_offset;
   1.915 +		__ li48(AT, save_pc);
   1.916 +#endif
   1.917 +	}
   1.918 +	__ st_ptr(AT, thread, in_bytes(JavaThread::last_Java_pc_offset())); 
   1.919 +
   1.920 +	// do the call
   1.921 +#ifndef _LP64
   1.922 +	__ lui(T9, Assembler::split_high((int)target));	
   1.923 +	__ addiu(T9, T9, Assembler::split_low((int)target));
   1.924 +#else
   1.925 +	__ li48(T9, (intptr_t)target);	
   1.926 +#endif
   1.927 +	__ jalr(T9);
   1.928 +	__ delayed()->nop();
   1.929 +	OopMapSet*  oop_maps = new OopMapSet();
   1.930 +	oop_maps->add_gc_map(__ offset(),  oop_map);
   1.931 +
   1.932 +#ifndef OPT_THREAD
   1.933 +	__ get_thread(thread);
   1.934 +#endif
   1.935 +    
   1.936 +	__ ld_ptr (SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
   1.937 +	__ reset_last_Java_frame(thread, true,true);
   1.938 +	// discard thread arg
   1.939 +	// check for pending exceptions
   1.940 +	{ 
   1.941 +		Label L, skip;
   1.942 +		//Label no_deopt;
   1.943 +		__ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
   1.944 +		__ beq(AT, R0, L);
   1.945 +		__ delayed()->nop();
   1.946 +		// exception pending => remove activation and forward to exception handler
   1.947 +
   1.948 +		__ bne(V0,R0, skip);	
   1.949 +		__ delayed()->nop();	
   1.950 +		//			relocInfo::runtime_call_type);
   1.951 +		__ jmp(Runtime1::entry_for(Runtime1::forward_exception_id), 
   1.952 +				relocInfo::runtime_call_type); 
   1.953 +		__ delayed()->nop(); 	
   1.954 +		__ bind(skip);	
   1.955 +
   1.956 +		// the deopt blob expects exceptions in the special fields of
   1.957 +		// JavaThread, so copy and clear pending exception.
   1.958 +
   1.959 +		// load and clear pending exception
   1.960 +		__ ld_ptr(V0, Address(thread,in_bytes(Thread::pending_exception_offset())));
   1.961 +		__ st_ptr(R0, Address(thread, in_bytes(Thread::pending_exception_offset())));
   1.962 +
   1.963 +		// check that there is really a valid exception 
   1.964 +		__ verify_not_null_oop(V0);
   1.965 +
   1.966 +		// load throwing pc: this is the return address of the stub
   1.967 +		__ ld_ptr(V1, Address(SP, return_off * BytesPerWord));
   1.968 +
   1.969 +
   1.970 +#ifdef ASSERT
   1.971 +		// check that fields in JavaThread for exception oop and issuing pc are empty
   1.972 +		Label oop_empty;
   1.973 +		__ ld_ptr(AT, Address(thread, in_bytes(JavaThread::exception_oop_offset()))); 
   1.974 +		__ beq(AT,R0,oop_empty); 
   1.975 +		__ delayed()->nop(); 
   1.976 +		__ stop("exception oop must be empty");
   1.977 +		__ bind(oop_empty);
   1.978 +
   1.979 +		Label pc_empty;
   1.980 +		__ ld_ptr(AT, Address(thread, in_bytes(JavaThread::exception_pc_offset()))); 
   1.981 +		__ beq(AT,R0,pc_empty); 
   1.982 +		__ delayed()->nop(); 
   1.983 +		__ stop("exception pc must be empty");
   1.984 +		__ bind(pc_empty);
   1.985 +#endif
   1.986 +
   1.987 +		// store exception oop and throwing pc to JavaThread
   1.988 +		__ st_ptr(V0,Address(thread, in_bytes(JavaThread::exception_oop_offset())));
   1.989 +		__ st_ptr(V1,Address(thread, in_bytes(JavaThread::exception_pc_offset())));
   1.990 +
   1.991 +		restore_live_registers(sasm);
   1.992 +
   1.993 +		__ leave();
   1.994 +
   1.995 +		// Forward the exception directly to deopt blob. We can blow no
   1.996 +		// registers and must leave throwing pc on the stack.  A patch may
   1.997 +		// have values live in registers so the entry point with the
   1.998 +		// exception in tls.
   1.999 +		__ jmp(deopt_blob->unpack_with_exception_in_tls(), relocInfo::runtime_call_type);
  1.1000 +		__ delayed()->nop();
  1.1001 +		  
  1.1002 +		__ bind(L);
  1.1003 +	}
  1.1004 +
  1.1005 +	// Runtime will return true if the nmethod has been deoptimized during
  1.1006 +	// the patching process. In that case we must do a deopt reexecute instead.
  1.1007 +
  1.1008 +	Label reexecuteEntry, cont;
  1.1009 +
  1.1010 +	__ beq(V0, R0, cont);                              // have we deoptimized?
  1.1011 +	__ delayed()->nop();
  1.1012 +
  1.1013 +	// Will reexecute. Proper return address is already on the stack we just restore
  1.1014 +	// registers, pop all of our frame but the return address and jump to the deopt blob
  1.1015 +	restore_live_registers(sasm);
  1.1016 +
  1.1017 +	__ leave();
  1.1018 +	__ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type);
  1.1019 +	__ delayed()->nop();
  1.1020 +
  1.1021 +	__ bind(cont);
  1.1022 +	restore_live_registers(sasm);
  1.1023 +
  1.1024 +	__ leave();
  1.1025 +	__ jr(RA);
  1.1026 +	__ delayed()->nop();
  1.1027 +
  1.1028 +	return oop_maps;
  1.1029 +}
  1.1030 +
  1.1031 +
  1.1032 +OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
  1.1033 +	// for better readability
  1.1034 +	const bool must_gc_arguments = true;
  1.1035 +	const bool dont_gc_arguments = false;
  1.1036 +
  1.1037 +
  1.1038 +	// default value; overwritten for some optimized stubs that are called 
  1.1039 +	// from methods that do not use the fpu
  1.1040 +	bool save_fpu_registers = true;
  1.1041 +
  1.1042 +
  1.1043 +	// stub code & info for the different stubs
  1.1044 +	OopMapSet* oop_maps = NULL;
  1.1045 +
  1.1046 +  switch (id) {
  1.1047 +    case forward_exception_id:
  1.1048 +      {
  1.1049 +        // we're handling an exception in the context of a compiled
  1.1050 +        // frame.  The registers have been saved in the standard
  1.1051 +        // places.  Perform an exception lookup in the caller and
  1.1052 +        // dispatch to the handler if found.  Otherwise unwind and
  1.1053 +        // dispatch to the callers exception handler.
  1.1054 +
  1.1055 +        const Register exception_oop = V0;
  1.1056 +        const Register exception_pc = V1;
  1.1057 +#ifndef OPT_THREAD
  1.1058 +	const Register thread = T8; 
  1.1059 +	__ get_thread(thread);
  1.1060 +#else
  1.1061 +	const Register thread = TREG;
  1.1062 +#endif
  1.1063 +        // load pending exception oop into eax
  1.1064 +        __ ld_ptr(exception_oop, thread, in_bytes(Thread::pending_exception_offset()));
  1.1065 +        // clear pending exception
  1.1066 +        __ st_ptr(R0, thread, in_bytes(Thread::pending_exception_offset()));
  1.1067 +
  1.1068 +        // load issuing PC (the return address for this stub) into V1
  1.1069 +        __ ld_ptr(exception_pc, FP, 1*BytesPerWord);
  1.1070 +
  1.1071 +        // make sure that the vm_results are cleared (may be unnecessary)
  1.1072 +        __ st_ptr(R0, Address(thread, in_bytes(JavaThread::vm_result_offset())));
  1.1073 +        __ st_ptr(R0, Address(thread, in_bytes(JavaThread::vm_result_2_offset())));
  1.1074 +
  1.1075 +        // verify that that there is really a valid exception in eax
  1.1076 +        __ verify_not_null_oop(exception_oop);
  1.1077 +
  1.1078 +
  1.1079 +        oop_maps = new OopMapSet();
  1.1080 +        OopMap* oop_map = generate_oop_map(sasm, 0);
  1.1081 +        generate_handle_exception(id, sasm);
  1.1082 +        __ stop("should not reach here");
  1.1083 +      }
  1.1084 +      break;
  1.1085 +
  1.1086 +    case new_instance_id:
  1.1087 +    case fast_new_instance_id:
  1.1088 +    case fast_new_instance_init_check_id:
  1.1089 +      {
  1.1090 +        // i use T4 as klass register, V0 as result register. MUST accord with NewInstanceStub::emit_code
  1.1091 +#ifndef _LP64
  1.1092 +        Register klass = T4; // Incoming
  1.1093 +#else
  1.1094 +        Register klass = A4; // Incoming
  1.1095 +#endif
  1.1096 +        Register obj   = V0; // Result
  1.1097 +
  1.1098 +        if (id == new_instance_id) {
  1.1099 +          __ set_info("new_instance", dont_gc_arguments);
  1.1100 +        } else if (id == fast_new_instance_id) {
  1.1101 +          __ set_info("fast new_instance", dont_gc_arguments);
  1.1102 +        } else {
  1.1103 +          assert(id == fast_new_instance_init_check_id, "bad StubID");
  1.1104 +          __ set_info("fast new_instance init check", dont_gc_arguments);
  1.1105 +        }
  1.1106 +
  1.1107 +        if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) 
  1.1108 +             && UseTLAB && FastTLABRefill) {
  1.1109 +          Label slow_path;
  1.1110 +          Register obj_size = T0;
  1.1111 +          Register t1       = T2;
  1.1112 +          Register t2       = T3;
  1.1113 +          assert_different_registers(klass, obj, obj_size, t1, t2);
  1.1114 +          if (id == fast_new_instance_init_check_id) {
  1.1115 +            // make sure the klass is initialized
  1.1116 +            __ lw(AT, klass, in_bytes(InstanceKlass::init_state_offset()));
  1.1117 +            __ move(t1, InstanceKlass::fully_initialized);
  1.1118 +            __ bne(AT, t1, slow_path);
  1.1119 +            __ delayed()->nop();
  1.1120 +          }
  1.1121 +#ifdef ASSERT
  1.1122 +          // assert object can be fast path allocated
  1.1123 +          {
  1.1124 +            Label ok, not_ok;
  1.1125 +            __ lw(obj_size, klass, in_bytes(Klass::layout_helper_offset()));
  1.1126 +            __ blez(obj_size, not_ok);
  1.1127 +            __ delayed()->nop();
  1.1128 +            __ andi(t1 , obj_size, Klass::_lh_instance_slow_path_bit);
  1.1129 +            __ beq(t1, R0, ok);
  1.1130 +            __ delayed()->nop();
  1.1131 +            __ bind(not_ok);
  1.1132 +            __ stop("assert(can be fast path allocated)");
  1.1133 +            __ should_not_reach_here();
  1.1134 +            __ bind(ok);
  1.1135 +          }
  1.1136 +#endif // ASSERT
  1.1137 +          // if we got here then the TLAB allocation failed, so try
  1.1138 +          // refilling the TLAB or allocating directly from eden.
  1.1139 +          
  1.1140 +          Label retry_tlab, try_eden;
  1.1141 +          __ tlab_refill(retry_tlab, try_eden, slow_path); // does not destroy edx (klass)
  1.1142 +          
  1.1143 +          __ bind(retry_tlab);
  1.1144 +          
  1.1145 +          // get the instance size
  1.1146 +          __ lw(obj_size, klass, in_bytes(Klass::layout_helper_offset()));
  1.1147 +          __ tlab_allocate(obj, obj_size, 0, t1, t2, slow_path);
  1.1148 +          __ initialize_object(obj, klass, obj_size, 0, t1, t2);
  1.1149 +          __ verify_oop(obj);
  1.1150 +          __ jr(RA);
  1.1151 +          __ delayed()->nop();
  1.1152 +         
  1.1153 +          __ bind(try_eden);
  1.1154 +
  1.1155 +          // get the instance size  
  1.1156 +          __ lw(obj_size, klass, in_bytes(Klass::layout_helper_offset()));
  1.1157 +          __ eden_allocate(obj, obj_size, 0, t1, t2, slow_path);
  1.1158 +          __ initialize_object(obj, klass, obj_size, 0, t1, t2);
  1.1159 +          __ verify_oop(obj);
  1.1160 +          __ jr(RA);
  1.1161 +          __ delayed()->nop();
  1.1162 +          
  1.1163 +          __ bind(slow_path);
  1.1164 +        }
  1.1165 +        __ enter();
  1.1166 +        OopMap* map = save_live_registers(sasm, 0);
  1.1167 +        int call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_instance), klass);
  1.1168 +        oop_maps = new OopMapSet();
  1.1169 +        oop_maps->add_gc_map(call_offset, map);
  1.1170 +        restore_live_registers_except_V0(sasm);
  1.1171 +        __ verify_oop(obj);
  1.1172 +        __ leave();
  1.1173 +        __ jr(RA);
  1.1174 +        __ delayed()->nop();
  1.1175 +        
  1.1176 +        // V0: new instance
  1.1177 +      }
  1.1178 +      break;
  1.1179 +
  1.1180 +
  1.1181 +#ifdef TIERED
  1.1182 +//FIXME, I hava no idea which register to use
  1.1183 +    case counter_overflow_id:
  1.1184 +      {
  1.1185 +#ifndef _LP64
  1.1186 +        Register bci = T5;
  1.1187 +#else
  1.1188 +        Register bci = A5;
  1.1189 +#endif
  1.1190 +        __ enter();
  1.1191 +        OopMap* map = save_live_registers(sasm, 0);
  1.1192 +        // Retrieve bci
  1.1193 +        __ lw(bci, Address(FP, 2*BytesPerWord));// FIXME:wuhui.ebp==??
  1.1194 +	int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, counter_overflow), bci);
  1.1195 +        oop_maps = new OopMapSet();
  1.1196 +        oop_maps->add_gc_map(call_offset, map);
  1.1197 +        restore_live_registers(sasm);
  1.1198 +        __ leave();
  1.1199 +        __ jr(RA); 
  1.1200 +        __ delayed()->nop(); 
  1.1201 +      }
  1.1202 +      break;
  1.1203 +#endif // TIERED
  1.1204 +
  1.1205 +
  1.1206 +
  1.1207 +    case new_type_array_id:
  1.1208 +    case new_object_array_id:
  1.1209 +      { 
  1.1210 +        // i use T2 as length register, T4 as klass register, V0 as result register. 
  1.1211 +        // MUST accord with NewTypeArrayStub::emit_code, NewObjectArrayStub::emit_code
  1.1212 +        Register length   = T2; // Incoming
  1.1213 +#ifndef _LP64
  1.1214 +        Register klass    = T4; // Incoming
  1.1215 +#else
  1.1216 +        Register klass    = A4; // Incoming
  1.1217 +#endif
  1.1218 +        Register obj      = V0; // Result
  1.1219 +        
  1.1220 +        if (id == new_type_array_id) {
  1.1221 +          __ set_info("new_type_array", dont_gc_arguments);
  1.1222 +        } else {
  1.1223 +          __ set_info("new_object_array", dont_gc_arguments);
  1.1224 +        }
  1.1225 +               
  1.1226 +        if (UseTLAB && FastTLABRefill) {
  1.1227 +          Register arr_size = T0;
  1.1228 +          Register t1       = T1; 
  1.1229 +          Register t2       = T3;
  1.1230 +          Label slow_path;
  1.1231 +          assert_different_registers(length, klass, obj, arr_size, t1, t2);
  1.1232 +        
  1.1233 +          // check that array length is small enough for fast path
  1.1234 +          __ move(AT, C1_MacroAssembler::max_array_allocation_length);
  1.1235 +          __ sltu(AT, AT, length);
  1.1236 +          __ bne(AT, R0, slow_path);
  1.1237 +          __ delayed()->nop();
  1.1238 +
  1.1239 +          // if we got here then the TLAB allocation failed, so try
  1.1240 +          // refilling the TLAB or allocating directly from eden.
  1.1241 +          Label retry_tlab, try_eden;
  1.1242 +          //T0,T1,T5,T8 have changed! 
  1.1243 +          __ tlab_refill(retry_tlab, try_eden, slow_path); // preserves T2 & T4
  1.1244 +        
  1.1245 +          __ bind(retry_tlab);
  1.1246 +        
  1.1247 +          // get the allocation size: (length << (layout_helper & 0x1F)) + header_size
  1.1248 +          __ lw(t1, klass, in_bytes(Klass::layout_helper_offset()));	 
  1.1249 +          __ andi(AT, t1, 0x1f);
  1.1250 +          __ sllv(arr_size, length, AT);
  1.1251 +          __ srl(t1, t1, Klass::_lh_header_size_shift);
  1.1252 +          __ andi(t1, t1, Klass::_lh_header_size_mask);
  1.1253 +          __ add(arr_size, t1, arr_size);
  1.1254 +          __ addi(arr_size, arr_size, MinObjAlignmentInBytesMask);  // align up
  1.1255 +          __ move(AT, ~MinObjAlignmentInBytesMask);
  1.1256 +          __ andr(arr_size, arr_size, AT);
  1.1257 +        
  1.1258 +        
  1.1259 +          __ tlab_allocate(obj, arr_size, 0, t1, t2, slow_path);  // preserves arr_size
  1.1260 +          __ initialize_header(obj, klass, length,t1,t2);
  1.1261 +          __ lbu(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) 
  1.1262 +                                    + (Klass::_lh_header_size_shift / BitsPerByte)));
  1.1263 +          assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise");
  1.1264 +          assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise");
  1.1265 +          __ andi(t1, t1, Klass::_lh_header_size_mask);
  1.1266 +          __ sub(arr_size, arr_size, t1);  // body length
  1.1267 +          __ add(t1, t1, obj);             // body start
  1.1268 +          __ initialize_body(t1, arr_size, 0, t2);
  1.1269 +          __ verify_oop(obj);
  1.1270 +          __ jr(RA);
  1.1271 +          __ delayed()->nop();
  1.1272 +        
  1.1273 +          __ bind(try_eden);
  1.1274 +          // get the allocation size: (length << (layout_helper & 0x1F)) + header_size
  1.1275 +          __ lw(t1, klass, in_bytes(Klass::layout_helper_offset()));	 
  1.1276 +          __ andi(AT, t1, 0x1f);
  1.1277 +          __ sllv(arr_size, length, AT);
  1.1278 +          __ srl(t1, t1, Klass::_lh_header_size_shift);
  1.1279 +          __ andi(t1, t1, Klass::_lh_header_size_mask);
  1.1280 +          __ add(arr_size, t1, arr_size);
  1.1281 +          __ addi(arr_size, arr_size, MinObjAlignmentInBytesMask);  // align up
  1.1282 +          __ move(AT, ~MinObjAlignmentInBytesMask);
  1.1283 +          __ andr(arr_size, arr_size, AT);
  1.1284 +          __ eden_allocate(obj, arr_size, 0, t1, t2, slow_path);  // preserves arr_size
  1.1285 +          __ initialize_header(obj, klass, length,t1,t2);
  1.1286 +          __ lbu(t1, Address(klass, in_bytes(Klass::layout_helper_offset())
  1.1287 +                                    + (Klass::_lh_header_size_shift / BitsPerByte)));
  1.1288 +          __ andi(t1, t1, Klass::_lh_header_size_mask);
  1.1289 +          __ sub(arr_size, arr_size, t1);  // body length
  1.1290 +          __ add(t1, t1, obj);             // body start
  1.1291 +        
  1.1292 +          __ initialize_body(t1, arr_size, 0, t2);
  1.1293 +          __ verify_oop(obj);
  1.1294 +          __ jr(RA);
  1.1295 +          __ delayed()->nop();
  1.1296 +          __ bind(slow_path);
  1.1297 +        }
  1.1298 +       
  1.1299 +      
  1.1300 +        __ enter();
  1.1301 +        OopMap* map = save_live_registers(sasm, 0);
  1.1302 +        int call_offset;
  1.1303 +        if (id == new_type_array_id) {
  1.1304 +          call_offset = __ call_RT(obj, noreg, 
  1.1305 +                                    CAST_FROM_FN_PTR(address, new_type_array), klass, length);
  1.1306 +        } else {
  1.1307 +          call_offset = __ call_RT(obj, noreg, 
  1.1308 +                                   CAST_FROM_FN_PTR(address, new_object_array), klass, length);
  1.1309 +        }
  1.1310 +      
  1.1311 +        oop_maps = new OopMapSet();
  1.1312 +        oop_maps->add_gc_map(call_offset, map);
  1.1313 +        restore_live_registers_except_V0(sasm);
  1.1314 +        __ verify_oop(obj);
  1.1315 +        __ leave();	
  1.1316 +        __ jr(RA);
  1.1317 +        __ delayed()->nop();
  1.1318 +      }
  1.1319 +      break;
  1.1320 +
  1.1321 +    case new_multi_array_id:
  1.1322 +      { 
  1.1323 +	      StubFrame f(sasm, "new_multi_array", dont_gc_arguments);
  1.1324 +	     //refer to c1_LIRGenerate_mips.cpp:do_NewmultiArray 
  1.1325 +	      // V0: klass
  1.1326 +	      // T2: rank
  1.1327 +	      // T0: address of 1st dimension
  1.1328 +	      //__ call_RT(V0, noreg, CAST_FROM_FN_PTR(address, new_multi_array), A1, A2, A3);
  1.1329 +	      //OopMap* map = save_live_registers(sasm, 4);
  1.1330 +	      OopMap* map = save_live_registers(sasm, 0);
  1.1331 +	      int call_offset = __ call_RT(V0, noreg, CAST_FROM_FN_PTR(address, new_multi_array), 
  1.1332 +			      V0,T2,T0);
  1.1333 +	      oop_maps = new OopMapSet();
  1.1334 +	      oop_maps->add_gc_map(call_offset, map);
  1.1335 +	      //FIXME 
  1.1336 +	      restore_live_registers_except_V0(sasm);
  1.1337 +	      // V0: new multi array
  1.1338 +	      __ verify_oop(V0);
  1.1339 +      }
  1.1340 +      break;
  1.1341 +
  1.1342 +		
  1.1343 +    case register_finalizer_id:
  1.1344 +      {
  1.1345 +	      __ set_info("register_finalizer", dont_gc_arguments);
  1.1346 +
  1.1347 +	      // The object is passed on the stack and we haven't pushed a
  1.1348 +	      // frame yet so it's one work away from top of stack.
  1.1349 +        //reference to LIRGenerator::do_RegisterFinalizer, call_runtime
  1.1350 +	      __ move(V0, A0); 
  1.1351 +	      __ verify_oop(V0);
  1.1352 +	      // load the klass and check the has finalizer flag
  1.1353 +	      Label register_finalizer;
  1.1354 +#ifndef _LP64
  1.1355 +	      Register t = T5;
  1.1356 +#else
  1.1357 +	      Register t = A5;
  1.1358 +#endif
  1.1359 +	      //__ ld_ptr(t, Address(V0, oopDesc::klass_offset_in_bytes()));
  1.1360 +	      __ load_klass(t, V0);
  1.1361 +	      __ lw(t, Address(t, Klass::access_flags_offset()));
  1.1362 +	      __ move(AT, JVM_ACC_HAS_FINALIZER); 
  1.1363 +	      __ andr(AT, AT, t); 
  1.1364 +	    
  1.1365 +	      __ bne(AT, R0, register_finalizer);	
  1.1366 +	      __ delayed()->nop();	
  1.1367 +	      __ jr(RA); 
  1.1368 +	      __ delayed()->nop(); 
  1.1369 +	      __ bind(register_finalizer);
  1.1370 +	      __ enter();
  1.1371 +	      OopMap* map = save_live_registers(sasm, 0 /*num_rt_args */);
  1.1372 +
  1.1373 +	      int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, 
  1.1374 +				      SharedRuntime::register_finalizer), V0);
  1.1375 +	      oop_maps = new OopMapSet();
  1.1376 +        oop_maps->add_gc_map(call_offset, map);
  1.1377 +
  1.1378 +	      // Now restore all the live registers
  1.1379 +	      restore_live_registers(sasm);
  1.1380 +
  1.1381 +	      __ leave();
  1.1382 +	      __ jr(RA);
  1.1383 +	      __ delayed()->nop();
  1.1384 +      }
  1.1385 +      break;
  1.1386 +
  1.1387 +//	case range_check_failed_id:
  1.1388 +	case throw_range_check_failed_id:
  1.1389 +      { StubFrame f(sasm, "range_check_failed", dont_gc_arguments);
  1.1390 +	      oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, 
  1.1391 +              throw_range_check_exception),true);
  1.1392 +      }
  1.1393 +      break;
  1.1394 +
  1.1395 +      case throw_index_exception_id:
  1.1396 +      { 
  1.1397 +	      // i use A1 as the index register, for this will be the first argument, see call_RT
  1.1398 +	      StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments);
  1.1399 +	      oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, 
  1.1400 +				      throw_index_exception), true);
  1.1401 +      }
  1.1402 +      break;
  1.1403 +
  1.1404 +	case throw_div0_exception_id:
  1.1405 +      { StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments);
  1.1406 +	      oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, 
  1.1407 +				      throw_div0_exception), false);
  1.1408 +      }
  1.1409 +      break;
  1.1410 +
  1.1411 +	case throw_null_pointer_exception_id:
  1.1412 +      { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments);
  1.1413 +	      oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, 
  1.1414 +				      throw_null_pointer_exception),false);
  1.1415 +      }
  1.1416 +      break;
  1.1417 +
  1.1418 +        case handle_exception_nofpu_id:
  1.1419 +		save_fpu_registers = false;
  1.1420 +		 // fall through
  1.1421 +	case handle_exception_id:
  1.1422 +		{
  1.1423 +
  1.1424 +
  1.1425 +			StubFrame f(sasm, "handle_exception", dont_gc_arguments);
  1.1426 +			
  1.1427 +			//OopMap* oop_map = save_live_registers(sasm, 1, save_fpu_registers);
  1.1428 +			oop_maps = generate_handle_exception(id, sasm);
  1.1429 +		}
  1.1430 +		break;
  1.1431 +        case handle_exception_from_callee_id:
  1.1432 +                {
  1.1433 +                        StubFrame f(sasm, "handle_exception_from_callee", dont_gc_arguments);
  1.1434 +                        oop_maps = generate_handle_exception(id, sasm);
  1.1435 +                }
  1.1436 +                break;
  1.1437 +	case unwind_exception_id:
  1.1438 +		{ 
  1.1439 +			__ set_info("unwind_exception", dont_gc_arguments);
  1.1440 +
  1.1441 +			generate_unwind_exception(sasm);
  1.1442 +		}
  1.1443 +		break;
  1.1444 +
  1.1445 +
  1.1446 +	case throw_array_store_exception_id:
  1.1447 +		{ StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments);
  1.1448 +			// tos + 0: link
  1.1449 +			//     + 1: return address
  1.1450 +			oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, 
  1.1451 +						throw_array_store_exception), false);
  1.1452 +		}
  1.1453 +		break;
  1.1454 +
  1.1455 +	case throw_class_cast_exception_id:
  1.1456 +		{ StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments);
  1.1457 +			oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, 
  1.1458 +						throw_class_cast_exception), V0);
  1.1459 +		}
  1.1460 +		break;
  1.1461 +
  1.1462 +	case throw_incompatible_class_change_error_id:
  1.1463 +		{ 
  1.1464 +		StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments);
  1.1465 +		oop_maps = generate_exception_throw(sasm, 
  1.1466 +			CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false);
  1.1467 +		}
  1.1468 +		break;
  1.1469 +
  1.1470 +	case slow_subtype_check_id:
  1.1471 +		{
  1.1472 +		//actually , We do not use it	
  1.1473 +			// A0:klass_RInfo		sub
  1.1474 +			// A1:k->encoding() super
  1.1475 +			__ set_info("slow_subtype_check", dont_gc_arguments);
  1.1476 +			__ st_ptr(T0, SP, (-1) * wordSize);
  1.1477 +			__ st_ptr(T1, SP, (-2) * wordSize);
  1.1478 +			__ addiu(SP, SP, (-2) * wordSize);
  1.1479 +
  1.1480 +			//+ Klass::secondary_supers_offset_in_bytes()));
  1.1481 +			__ ld_ptr(AT, A0, in_bytes( Klass::secondary_supers_offset()));
  1.1482 +			__ lw(T1, AT, arrayOopDesc::length_offset_in_bytes());
  1.1483 +			__ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
  1.1484 +
  1.1485 +			Label miss, hit, loop;
  1.1486 +			//			T1:count, AT:array, A1:sub maybe supper
  1.1487 +			__ bind(loop);
  1.1488 +			__ beq(T1, R0, miss);
  1.1489 +#ifndef _LP64
  1.1490 +			__ delayed()->lw(T0, AT, 0);
  1.1491 +#else
  1.1492 +			__ delayed()->ld(T0, AT, 0);
  1.1493 +#endif
  1.1494 +			__ beq(T0, A1, hit);
  1.1495 +			__ delayed();
  1.1496 +			__ addiu(T1, T1, -1);
  1.1497 +			__ b(loop);
  1.1498 +			__ delayed();
  1.1499 +			__ addiu(AT, AT, 4);
  1.1500 +
  1.1501 +			__ bind(hit);
  1.1502 +			//+ Klass::secondary_super_cache_offset_in_bytes()), eax);
  1.1503 +			__ st_ptr(A1, A0,  
  1.1504 +					in_bytes( Klass::secondary_super_cache_offset()));
  1.1505 +			__ addiu(V0, R0, 1);
  1.1506 +			__ addiu(SP, SP, 2 * wordSize);
  1.1507 +			__ ld_ptr(T0, SP, (-1) * wordSize);
  1.1508 +			__ ld_ptr(T1, SP, (-2) * wordSize);
  1.1509 +			__ jr(RA);
  1.1510 +			__ delayed()->nop();
  1.1511 +
  1.1512 +
  1.1513 +			__ bind(miss);
  1.1514 +			__ move(V0, R0);
  1.1515 +			__ addiu(SP, SP, 2 * wordSize);
  1.1516 +			__ ld_ptr(T0, SP, (-1) * wordSize);
  1.1517 +			__ ld_ptr(T1, SP, (-2) * wordSize);
  1.1518 +			__ jr(RA);
  1.1519 +			__ delayed()->nop();
  1.1520 +		}
  1.1521 +		break;
  1.1522 +
  1.1523 +  case monitorenter_nofpu_id:
  1.1524 +    save_fpu_registers = false;// fall through
  1.1525 +
  1.1526 +	case monitorenter_id:
  1.1527 +    {
  1.1528 +	    StubFrame f(sasm, "monitorenter", dont_gc_arguments);
  1.1529 +	    OopMap* map = save_live_registers(sasm, 0, save_fpu_registers);
  1.1530 +
  1.1531 +	    f.load_argument(1, V0); // V0: object
  1.1532 +#ifndef _LP64
  1.1533 +	    f.load_argument(0, T6); // T6: lock address
  1.1534 +	    int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, 
  1.1535 +		       monitorenter), V0, T6);
  1.1536 +#else
  1.1537 +	    f.load_argument(0, A6); // A6: lock address
  1.1538 +	    int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, 
  1.1539 +		       monitorenter), V0, A6);
  1.1540 +#endif
  1.1541 +
  1.1542 +	    oop_maps = new OopMapSet();
  1.1543 +	    oop_maps->add_gc_map(call_offset, map);
  1.1544 +	    restore_live_registers(sasm, save_fpu_registers);
  1.1545 +	  }
  1.1546 +	  break;
  1.1547 +
  1.1548 +	case monitorexit_nofpu_id:
  1.1549 +	  save_fpu_registers = false;
  1.1550 +	      // fall through
  1.1551 +	case monitorexit_id:
  1.1552 +    { 
  1.1553 +      StubFrame f(sasm, "monitorexit", dont_gc_arguments);
  1.1554 +      OopMap* map = save_live_registers(sasm, 0, save_fpu_registers);
  1.1555 +  
  1.1556 +#ifndef _LP64
  1.1557 +      f.load_argument(0, T6); // eax: lock address
  1.1558 +#else
  1.1559 +      f.load_argument(0, A6); // A6: lock address
  1.1560 +#endif
  1.1561 +      // note: really a leaf routine but must setup last java sp
  1.1562 +      //       => use call_RT for now (speed can be improved by
  1.1563 +      //       doing last java sp setup manually)
  1.1564 +#ifndef _LP64
  1.1565 +      int call_offset = __ call_RT(noreg, noreg, 
  1.1566 +  	                                CAST_FROM_FN_PTR(address, monitorexit), T6);
  1.1567 +#else
  1.1568 +      int call_offset = __ call_RT(noreg, noreg, 
  1.1569 +  	                                CAST_FROM_FN_PTR(address, monitorexit), A6);
  1.1570 +#endif
  1.1571 +      oop_maps = new OopMapSet();
  1.1572 +      oop_maps->add_gc_map(call_offset, map);
  1.1573 +      restore_live_registers(sasm, save_fpu_registers);
  1.1574 +  
  1.1575 +    }
  1.1576 +    break;
  1.1577 +	      //  case init_check_patching_id:
  1.1578 +	case access_field_patching_id:
  1.1579 +    { 
  1.1580 +      StubFrame f(sasm, "access_field_patching", dont_gc_arguments);
  1.1581 +      // we should set up register map
  1.1582 +      oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching));
  1.1583 +
  1.1584 +    }
  1.1585 +    break;
  1.1586 +
  1.1587 +	case load_klass_patching_id:
  1.1588 +		{ 
  1.1589 +			StubFrame f(sasm, "load_klass_patching", dont_gc_arguments);
  1.1590 +			// we should set up register map
  1.1591 +			oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, 
  1.1592 +						move_klass_patching));
  1.1593 +		}
  1.1594 +		break;
  1.1595 +/*	case jvmti_exception_throw_id:
  1.1596 +		{ 
  1.1597 +			// V0: exception oop
  1.1598 +			// V1: exception pc
  1.1599 +			StubFrame f(sasm, "jvmti_exception_throw", dont_gc_arguments);
  1.1600 +			// Preserve all registers across this potentially blocking call
  1.1601 +			const int num_rt_args = 2;  // thread, exception oop
  1.1602 +			//OopMap* map = save_live_registers(sasm, num_rt_args);
  1.1603 +			OopMap* map = save_live_registers(sasm, 0);
  1.1604 +			int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, 
  1.1605 +						Runtime1::post_jvmti_exception_throw), V0);
  1.1606 +			oop_maps = new OopMapSet();
  1.1607 +			oop_maps->add_gc_map(call_offset,  map);
  1.1608 +			restore_live_registers(sasm);
  1.1609 +		}*/
  1.1610 +        case load_mirror_patching_id:
  1.1611 +                {
  1.1612 +                     StubFrame f(sasm, "load_mirror_patching" , dont_gc_arguments);
  1.1613 +                     oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching));
  1.1614 +                }
  1.1615 +		break;
  1.1616 +	case dtrace_object_alloc_id:
  1.1617 +		{ 
  1.1618 +			// V0:object 
  1.1619 +			StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments);
  1.1620 +			// we can't gc here so skip the oopmap but make sure that all
  1.1621 +			// the live registers get saved.
  1.1622 +			save_live_registers(sasm, 0);
  1.1623 +
  1.1624 +			__ push_reg(V0);
  1.1625 +			__ call(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc),
  1.1626 +					relocInfo::runtime_call_type);
  1.1627 +			__ super_pop(V0);
  1.1628 +
  1.1629 +			restore_live_registers(sasm);
  1.1630 +		}
  1.1631 +		break;
  1.1632 +	case fpu2long_stub_id:
  1.1633 +	  {
  1.1634 +                   //FIXME, I hava no idea how to port this	
  1.1635 +	  }
  1.1636 +	default:
  1.1637 +		{ StubFrame f(sasm, "unimplemented entry", dont_gc_arguments);
  1.1638 +			__ move(A1, (int)id);
  1.1639 +			__ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), A1);
  1.1640 +			__ should_not_reach_here();
  1.1641 +		}
  1.1642 +		break;
  1.1643 +	}
  1.1644 +	return oop_maps;
  1.1645 +}
  1.1646 +
  1.1647 +#undef __
  1.1648 +
  1.1649 +const char *Runtime1::pd_name_for_address(address entry) {
  1.1650 +  return "<unknown function>";
  1.1651 +}
  1.1652 +

mercurial