diff -r 11d997b1e656 -r 52ea28d233d2 src/cpu/mips/vm/sharedRuntime_mips_64.cpp
--- a/src/cpu/mips/vm/sharedRuntime_mips_64.cpp	Fri Sep 01 10:28:22 2017 +0800
+++ b/src/cpu/mips/vm/sharedRuntime_mips_64.cpp	Thu Sep 07 09:12:16 2017 +0800
@@ -43,126 +43,95 @@
 #endif
 
 #define __ masm->
+
 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
 
 class RegisterSaver {
-	enum { FPU_regs_live = 32 };
-	// Capture info about frame layout
-	enum layout { 
+  enum { FPU_regs_live = 32 };
+  // Capture info about frame layout
+  enum layout {
 #define DEF_LAYOUT_OFFS(regname)  regname ## _off,  regname ## H_off,
-		DEF_LAYOUT_OFFS(for_16_bytes_aligned)
-		DEF_LAYOUT_OFFS(fpr0)
-		DEF_LAYOUT_OFFS(fpr1)
-		DEF_LAYOUT_OFFS(fpr2)
-		DEF_LAYOUT_OFFS(fpr3)
-		DEF_LAYOUT_OFFS(fpr4)
-		DEF_LAYOUT_OFFS(fpr5)
-		DEF_LAYOUT_OFFS(fpr6)
-		DEF_LAYOUT_OFFS(fpr7)
-		DEF_LAYOUT_OFFS(fpr8)
-		DEF_LAYOUT_OFFS(fpr9)
-		DEF_LAYOUT_OFFS(fpr10)
-		DEF_LAYOUT_OFFS(fpr11)
-		DEF_LAYOUT_OFFS(fpr12)
-		DEF_LAYOUT_OFFS(fpr13)
-		DEF_LAYOUT_OFFS(fpr14)
-		DEF_LAYOUT_OFFS(fpr15)
-		DEF_LAYOUT_OFFS(fpr16)
-		DEF_LAYOUT_OFFS(fpr17)
-		DEF_LAYOUT_OFFS(fpr18)
-		DEF_LAYOUT_OFFS(fpr19)
-		DEF_LAYOUT_OFFS(fpr20)
-		DEF_LAYOUT_OFFS(fpr21)
-		DEF_LAYOUT_OFFS(fpr22)
-		DEF_LAYOUT_OFFS(fpr23)
-		DEF_LAYOUT_OFFS(fpr24)
-		DEF_LAYOUT_OFFS(fpr25)
-		DEF_LAYOUT_OFFS(fpr26)
-		DEF_LAYOUT_OFFS(fpr27)
-		DEF_LAYOUT_OFFS(fpr28)
-		DEF_LAYOUT_OFFS(fpr29)
-		DEF_LAYOUT_OFFS(fpr30)
-		DEF_LAYOUT_OFFS(fpr31)
-
-		DEF_LAYOUT_OFFS(v0)
-		DEF_LAYOUT_OFFS(v1)
-		DEF_LAYOUT_OFFS(a0)
-		DEF_LAYOUT_OFFS(a1)
-		DEF_LAYOUT_OFFS(a2)
-		DEF_LAYOUT_OFFS(a3)
-		DEF_LAYOUT_OFFS(a4)
-		DEF_LAYOUT_OFFS(a5)
-		DEF_LAYOUT_OFFS(a6)
-		DEF_LAYOUT_OFFS(a7)
-		DEF_LAYOUT_OFFS(t0)
-		DEF_LAYOUT_OFFS(t1)
-		DEF_LAYOUT_OFFS(t2)
-		DEF_LAYOUT_OFFS(t3)
-		DEF_LAYOUT_OFFS(s0)
-		DEF_LAYOUT_OFFS(s1)
-		DEF_LAYOUT_OFFS(s2)
-		DEF_LAYOUT_OFFS(s3)
-		DEF_LAYOUT_OFFS(s4)
-		DEF_LAYOUT_OFFS(s5)
-		DEF_LAYOUT_OFFS(s6)
-		DEF_LAYOUT_OFFS(s7)
-		DEF_LAYOUT_OFFS(t8)
-		DEF_LAYOUT_OFFS(t9)
-
-		DEF_LAYOUT_OFFS(gp)
-		DEF_LAYOUT_OFFS(fp)
-		DEF_LAYOUT_OFFS(return)
-/*
-		fpr0_off, fpr1_off,
-		fpr2_off, fpr3_off,
-		fpr4_off, fpr5_off,
-		fpr6_off, fpr7_off,
-		fpr8_off, fpr9_off,
-		fpr10_off, fpr11_off,
-		fpr12_off, fpr13_off,
-		fpr14_off, fpr15_off,
-		fpr16_off, fpr17_off,
-		fpr18_off, fpr19_off,
-		fpr20_off, fpr21_off,
-		fpr22_off, fpr23_off,
-		fpr24_off, fpr25_off,
-		fpr26_off, fpr27_off,
-		fpr28_off, fpr29_off,
-		fpr30_off, fpr31_off,
-
-		v0_off, v1_off,
-		a0_off, a1_off,
-		a2_off, a3_off,
-		a4_off, a5_off,
-		a6_off, a7_off,
-		t0_off, t1_off, t2_off, t3_off,
-		s0_off, s1_off, s2_off, s3_off, s4_off, s5_off, s6_off, s7_off,
-		t8_off, t9_off,
-	
-		gp_off, fp_off,
-		return_off,
-*/
-		reg_save_size
-	};
+    DEF_LAYOUT_OFFS(for_16_bytes_aligned)
+    DEF_LAYOUT_OFFS(fpr0)
+    DEF_LAYOUT_OFFS(fpr1)
+    DEF_LAYOUT_OFFS(fpr2)
+    DEF_LAYOUT_OFFS(fpr3)
+    DEF_LAYOUT_OFFS(fpr4)
+    DEF_LAYOUT_OFFS(fpr5)
+    DEF_LAYOUT_OFFS(fpr6)
+    DEF_LAYOUT_OFFS(fpr7)
+    DEF_LAYOUT_OFFS(fpr8)
+    DEF_LAYOUT_OFFS(fpr9)
+    DEF_LAYOUT_OFFS(fpr10)
+    DEF_LAYOUT_OFFS(fpr11)
+    DEF_LAYOUT_OFFS(fpr12)
+    DEF_LAYOUT_OFFS(fpr13)
+    DEF_LAYOUT_OFFS(fpr14)
+    DEF_LAYOUT_OFFS(fpr15)
+    DEF_LAYOUT_OFFS(fpr16)
+    DEF_LAYOUT_OFFS(fpr17)
+    DEF_LAYOUT_OFFS(fpr18)
+    DEF_LAYOUT_OFFS(fpr19)
+    DEF_LAYOUT_OFFS(fpr20)
+    DEF_LAYOUT_OFFS(fpr21)
+    DEF_LAYOUT_OFFS(fpr22)
+    DEF_LAYOUT_OFFS(fpr23)
+    DEF_LAYOUT_OFFS(fpr24)
+    DEF_LAYOUT_OFFS(fpr25)
+    DEF_LAYOUT_OFFS(fpr26)
+    DEF_LAYOUT_OFFS(fpr27)
+    DEF_LAYOUT_OFFS(fpr28)
+    DEF_LAYOUT_OFFS(fpr29)
+    DEF_LAYOUT_OFFS(fpr30)
+    DEF_LAYOUT_OFFS(fpr31)
+
+    DEF_LAYOUT_OFFS(v0)
+    DEF_LAYOUT_OFFS(v1)
+    DEF_LAYOUT_OFFS(a0)
+    DEF_LAYOUT_OFFS(a1)
+    DEF_LAYOUT_OFFS(a2)
+    DEF_LAYOUT_OFFS(a3)
+    DEF_LAYOUT_OFFS(a4)
+    DEF_LAYOUT_OFFS(a5)
+    DEF_LAYOUT_OFFS(a6)
+    DEF_LAYOUT_OFFS(a7)
+    DEF_LAYOUT_OFFS(t0)
+    DEF_LAYOUT_OFFS(t1)
+    DEF_LAYOUT_OFFS(t2)
+    DEF_LAYOUT_OFFS(t3)
+    DEF_LAYOUT_OFFS(s0)
+    DEF_LAYOUT_OFFS(s1)
+    DEF_LAYOUT_OFFS(s2)
+    DEF_LAYOUT_OFFS(s3)
+    DEF_LAYOUT_OFFS(s4)
+    DEF_LAYOUT_OFFS(s5)
+    DEF_LAYOUT_OFFS(s6)
+    DEF_LAYOUT_OFFS(s7)
+    DEF_LAYOUT_OFFS(t8)
+    DEF_LAYOUT_OFFS(t9)
+
+    DEF_LAYOUT_OFFS(gp)
+    DEF_LAYOUT_OFFS(fp)
+    DEF_LAYOUT_OFFS(return)
+    reg_save_size
+  };
 
   public:
 
-	static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors =false );
-	static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
-	//FIXME, I have no idea which register to use
-	static int raOffset(void) { return return_off / 2; }
-	//Rmethod
-	static int methodOffset(void) { return s3_off / 2; }
-
-	static int v0Offset(void) { return v0_off / 2; }
-	static int v1Offset(void) { return v1_off / 2; }
-
-	static int fpResultOffset(void) { return fpr0_off / 2; }
-
-	// During deoptimization only the result register need to be restored
-	// all the other values have already been extracted.
-
-	static void restore_result_registers(MacroAssembler* masm);
+  static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors =false );
+  static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
+  static int raOffset(void) { return return_off / 2; }
+  //Rmethod
+  static int methodOffset(void) { return s3_off / 2; }
+
+  static int v0Offset(void) { return v0_off / 2; }
+  static int v1Offset(void) { return v1_off / 2; }
+
+  static int fpResultOffset(void) { return fpr0_off / 2; }
+
+  // During deoptimization only the result register need to be restored
+  // all the other values have already been extracted.
+  static void restore_result_registers(MacroAssembler* masm);
 };
 
 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors ) {
@@ -183,7 +152,7 @@
   int frame_size_in_words = frame_size_in_bytes / wordSize;
   *total_frame_words = frame_size_in_words;
 
-  // save registers, fpu state, and flags  
+  // save registers, fpu state, and flags
   // We assume caller has already has return address slot on the stack
   // We push epb twice in this sequence because we want the real ebp
   // to be under the return like a normal enter and we want to use pushad
@@ -194,24 +163,24 @@
   __ sdc1(F0, SP, fpr0_off * jintSize); __ sdc1(F1, SP, fpr1_off * jintSize);
   __ sdc1(F2, SP, fpr2_off * jintSize); __ sdc1(F3, SP, fpr3_off * jintSize);
   __ sdc1(F4, SP, fpr4_off * jintSize); __ sdc1(F5, SP, fpr5_off * jintSize);
-  __ sdc1(F6, SP, fpr6_off * jintSize);	__ sdc1(F7, SP, fpr7_off * jintSize);
-  __ sdc1(F8, SP, fpr8_off * jintSize);	__ sdc1(F9, SP, fpr9_off * jintSize);
-  __ sdc1(F10, SP, fpr10_off * jintSize);	__ sdc1(F11, SP, fpr11_off * jintSize);
-  __ sdc1(F12, SP, fpr12_off * jintSize);	__ sdc1(F13, SP, fpr13_off * jintSize);
-  __ sdc1(F14, SP, fpr14_off * jintSize);	__ sdc1(F15, SP, fpr15_off * jintSize);
-  __ sdc1(F16, SP, fpr16_off * jintSize);	__ sdc1(F17, SP, fpr17_off * jintSize);
-  __ sdc1(F18, SP, fpr18_off * jintSize);	__ sdc1(F19, SP, fpr19_off * jintSize);
-  __ sdc1(F20, SP, fpr20_off * jintSize);	__ sdc1(F21, SP, fpr21_off * jintSize);
-  __ sdc1(F22, SP, fpr22_off * jintSize);	__ sdc1(F23, SP, fpr23_off * jintSize);
-  __ sdc1(F24, SP, fpr24_off * jintSize);	__ sdc1(F25, SP, fpr25_off * jintSize);
-  __ sdc1(F26, SP, fpr26_off * jintSize);	__ sdc1(F27, SP, fpr27_off * jintSize);
-  __ sdc1(F28, SP, fpr28_off * jintSize);	__ sdc1(F29, SP, fpr29_off * jintSize);
-  __ sdc1(F30, SP, fpr30_off * jintSize);	__ sdc1(F31, SP, fpr31_off * jintSize);
-  __ sd(V0, SP, v0_off * jintSize);	__ sd(V1, SP, v1_off * jintSize);
-  __ sd(A0, SP, a0_off * jintSize);	__ sd(A1, SP, a1_off * jintSize);
-  __ sd(A2, SP, a2_off * jintSize);	__ sd(A3, SP, a3_off * jintSize);
-  __ sd(A4, SP, a4_off * jintSize);	__ sd(A5, SP, a5_off * jintSize);
-  __ sd(A6, SP, a6_off * jintSize);	__ sd(A7, SP, a7_off * jintSize);
+  __ sdc1(F6, SP, fpr6_off * jintSize);  __ sdc1(F7, SP, fpr7_off * jintSize);
+  __ sdc1(F8, SP, fpr8_off * jintSize);  __ sdc1(F9, SP, fpr9_off * jintSize);
+  __ sdc1(F10, SP, fpr10_off * jintSize);  __ sdc1(F11, SP, fpr11_off * jintSize);
+  __ sdc1(F12, SP, fpr12_off * jintSize);  __ sdc1(F13, SP, fpr13_off * jintSize);
+  __ sdc1(F14, SP, fpr14_off * jintSize);  __ sdc1(F15, SP, fpr15_off * jintSize);
+  __ sdc1(F16, SP, fpr16_off * jintSize);  __ sdc1(F17, SP, fpr17_off * jintSize);
+  __ sdc1(F18, SP, fpr18_off * jintSize);  __ sdc1(F19, SP, fpr19_off * jintSize);
+  __ sdc1(F20, SP, fpr20_off * jintSize);  __ sdc1(F21, SP, fpr21_off * jintSize);
+  __ sdc1(F22, SP, fpr22_off * jintSize);  __ sdc1(F23, SP, fpr23_off * jintSize);
+  __ sdc1(F24, SP, fpr24_off * jintSize);  __ sdc1(F25, SP, fpr25_off * jintSize);
+  __ sdc1(F26, SP, fpr26_off * jintSize);  __ sdc1(F27, SP, fpr27_off * jintSize);
+  __ sdc1(F28, SP, fpr28_off * jintSize);  __ sdc1(F29, SP, fpr29_off * jintSize);
+  __ sdc1(F30, SP, fpr30_off * jintSize);  __ sdc1(F31, SP, fpr31_off * jintSize);
+  __ sd(V0, SP, v0_off * jintSize);  __ sd(V1, SP, v1_off * jintSize);
+  __ sd(A0, SP, a0_off * jintSize);  __ sd(A1, SP, a1_off * jintSize);
+  __ sd(A2, SP, a2_off * jintSize);  __ sd(A3, SP, a3_off * jintSize);
+  __ sd(A4, SP, a4_off * jintSize);  __ sd(A5, SP, a5_off * jintSize);
+  __ sd(A6, SP, a6_off * jintSize);  __ sd(A7, SP, a7_off * jintSize);
   __ sd(T0, SP, t0_off * jintSize);
   __ sd(T1, SP, t1_off * jintSize);
   __ sd(T2, SP, t2_off * jintSize);
@@ -234,8 +203,8 @@
   __ daddi(FP, SP, fp_off * jintSize);
 
   OopMapSet *oop_maps = new OopMapSet();
-  //OopMap* map =  new OopMap( frame_words, 0 );  
-  OopMap* map =  new OopMap( frame_size_in_slots, 0 );  
+  //OopMap* map =  new OopMap( frame_words, 0 );
+  OopMap* map =  new OopMap( frame_size_in_slots, 0 );
 
 
 //#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_words)
@@ -301,54 +270,6 @@
   map->set_callee_saved(STACK_OFFSET( fpr30_off), F30->as_VMReg());
   map->set_callee_saved(STACK_OFFSET( fpr31_off), F31->as_VMReg());
 
-/*
-  if (true) {
-    map->set_callee_saved(STACK_OFFSET( v0H_off), V0->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( v1H_off), V1->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( a0H_off), A0->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( a1H_off), A1->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( a2H_off), A2->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( a3H_off), A3->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( a4H_off), A4->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( a5H_off), A5->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( a6H_off), A6->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( a7H_off), A7->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( t0H_off), T0->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( t1H_off), T1->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( t2H_off), T2->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( t3H_off), T3->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( s0H_off), S0->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( s1H_off), S1->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( s2H_off), S2->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( s3H_off), S3->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( s4H_off), S4->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( s5H_off), S5->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( s6H_off), S6->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( s7H_off), S7->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( t8H_off), T8->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( t9H_off), T9->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( gpH_off), GP->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( fpH_off), FP->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( returnH_off), RA->as_VMReg()->next());
-
-    map->set_callee_saved(STACK_OFFSET( fpr0H_off), F0->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( fpr2H_off), F2->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( fpr4H_off), F4->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( fpr6H_off), F6->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( fpr8H_off), F8->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( fpr10H_off), F10->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( fpr12H_off), F12->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( fpr14H_off), F14->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( fpr16H_off), F16->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( fpr18H_off), F18->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( fpr20H_off), F20->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( fpr22H_off), F22->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( fpr24H_off), F24->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( fpr26H_off), F26->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( fpr28H_off), F28->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET( fpr30H_off), F30->as_VMReg()->next());
-  }
-*/
 #undef STACK_OFFSET
   return map;
 }
@@ -360,25 +281,25 @@
   __ ldc1(F0, SP, fpr0_off * jintSize); __ ldc1(F1, SP, fpr1_off * jintSize);
   __ ldc1(F2, SP, fpr2_off * jintSize); __ ldc1(F3, SP, fpr3_off * jintSize);
   __ ldc1(F4, SP, fpr4_off * jintSize); __ ldc1(F5, SP, fpr5_off * jintSize);
-  __ ldc1(F6, SP, fpr6_off * jintSize);	__ ldc1(F7, SP, fpr7_off * jintSize);
-  __ ldc1(F8, SP, fpr8_off * jintSize);	__ ldc1(F9, SP, fpr9_off * jintSize);
-  __ ldc1(F10, SP, fpr10_off * jintSize);	__ ldc1(F11, SP, fpr11_off * jintSize);
-  __ ldc1(F12, SP, fpr12_off * jintSize);	__ ldc1(F13, SP, fpr13_off * jintSize);
-  __ ldc1(F14, SP, fpr14_off * jintSize);	__ ldc1(F15, SP, fpr15_off * jintSize);
-  __ ldc1(F16, SP, fpr16_off * jintSize);	__ ldc1(F17, SP, fpr17_off * jintSize);
-  __ ldc1(F18, SP, fpr18_off * jintSize);	__ ldc1(F19, SP, fpr19_off * jintSize);
-  __ ldc1(F20, SP, fpr20_off * jintSize);	__ ldc1(F21, SP, fpr21_off * jintSize);
-  __ ldc1(F22, SP, fpr22_off * jintSize);	__ ldc1(F23, SP, fpr23_off * jintSize);
-  __ ldc1(F24, SP, fpr24_off * jintSize);	__ ldc1(F25, SP, fpr25_off * jintSize);
-  __ ldc1(F26, SP, fpr26_off * jintSize);	__ ldc1(F27, SP, fpr27_off * jintSize);
-  __ ldc1(F28, SP, fpr28_off * jintSize);	__ ldc1(F29, SP, fpr29_off * jintSize);
-  __ ldc1(F30, SP, fpr30_off * jintSize);	__ ldc1(F31, SP, fpr31_off * jintSize);
-
-  __ ld(V0, SP, v0_off * jintSize);	__ ld(V1, SP, v1_off * jintSize);
-  __ ld(A0, SP, a0_off * jintSize);	__ ld(A1, SP, a1_off * jintSize);
-  __ ld(A2, SP, a2_off * jintSize);	__ ld(A3, SP, a3_off * jintSize);
-  __ ld(A4, SP, a4_off * jintSize);	__ ld(A5, SP, a5_off * jintSize);
-  __ ld(A6, SP, a6_off * jintSize);	__ ld(A7, SP, a7_off * jintSize);
+  __ ldc1(F6, SP, fpr6_off * jintSize);  __ ldc1(F7, SP, fpr7_off * jintSize);
+  __ ldc1(F8, SP, fpr8_off * jintSize);  __ ldc1(F9, SP, fpr9_off * jintSize);
+  __ ldc1(F10, SP, fpr10_off * jintSize);  __ ldc1(F11, SP, fpr11_off * jintSize);
+  __ ldc1(F12, SP, fpr12_off * jintSize);  __ ldc1(F13, SP, fpr13_off * jintSize);
+  __ ldc1(F14, SP, fpr14_off * jintSize);  __ ldc1(F15, SP, fpr15_off * jintSize);
+  __ ldc1(F16, SP, fpr16_off * jintSize);  __ ldc1(F17, SP, fpr17_off * jintSize);
+  __ ldc1(F18, SP, fpr18_off * jintSize);  __ ldc1(F19, SP, fpr19_off * jintSize);
+  __ ldc1(F20, SP, fpr20_off * jintSize);  __ ldc1(F21, SP, fpr21_off * jintSize);
+  __ ldc1(F22, SP, fpr22_off * jintSize);  __ ldc1(F23, SP, fpr23_off * jintSize);
+  __ ldc1(F24, SP, fpr24_off * jintSize);  __ ldc1(F25, SP, fpr25_off * jintSize);
+  __ ldc1(F26, SP, fpr26_off * jintSize);  __ ldc1(F27, SP, fpr27_off * jintSize);
+  __ ldc1(F28, SP, fpr28_off * jintSize);  __ ldc1(F29, SP, fpr29_off * jintSize);
+  __ ldc1(F30, SP, fpr30_off * jintSize);  __ ldc1(F31, SP, fpr31_off * jintSize);
+
+  __ ld(V0, SP, v0_off * jintSize);  __ ld(V1, SP, v1_off * jintSize);
+  __ ld(A0, SP, a0_off * jintSize);  __ ld(A1, SP, a1_off * jintSize);
+  __ ld(A2, SP, a2_off * jintSize);  __ ld(A3, SP, a3_off * jintSize);
+  __ ld(A4, SP, a4_off * jintSize);  __ ld(A5, SP, a5_off * jintSize);
+  __ ld(A6, SP, a6_off * jintSize);  __ ld(A7, SP, a7_off * jintSize);
   __ ld(T0, SP, t0_off * jintSize);
   __ ld(T1, SP, t1_off * jintSize);
   __ ld(T2, SP, t2_off * jintSize);
@@ -406,36 +327,37 @@
 // a result.
 // FIXME, if the result is float?
 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
+
   // Just restore result register. Only used by deoptimization. By
   // now any callee save register that needs to be restore to a c2
   // caller of the deoptee has been extracted into the vframeArray
   // and will be stuffed into the c2i adapter we create for later
   // restoration so only result registers need to be restored here.
-  //
+
   __ ld(V0, SP, v0_off * jintSize);
   __ ld(V1, SP, v1_off * jintSize);
-  __ addiu(SP, SP, return_off * jintSize); 
+  __ addiu(SP, SP, return_off * jintSize);
 }
 
- // Is vector's size (in bytes) bigger than a size saved by default?
- // 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions.
- bool SharedRuntime::is_wide_vector(int size) {
-   return size > 16;
- }
+// Is vector's size (in bytes) bigger than a size saved by default?
+// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions.
+bool SharedRuntime::is_wide_vector(int size) {
+  return size > 16;
+}
 
 // The java_calling_convention describes stack locations as ideal slots on
 // a frame with no abi restrictions. Since we must observe abi restrictions
 // (like the placement of the register window) the slots must be biased by
 // the following value.
 
-static int reg2offset_in(VMReg r) { 
-	// Account for saved ebp and return address
-	// This should really be in_preserve_stack_slots
-	return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size;  // + 2 * VMRegImpl::stack_slot_size);
+static int reg2offset_in(VMReg r) {
+  // Account for saved ebp and return address
+  // This should really be in_preserve_stack_slots
+  return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size;  // + 2 * VMRegImpl::stack_slot_size);
 }
 
-static int reg2offset_out(VMReg r) { 
-	return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
+static int reg2offset_out(VMReg r) {
+  return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
 }
 
 // ---------------------------------------------------------------------------
@@ -470,10 +392,6 @@
                                            VMRegPair *regs,
                                            int total_args_passed,
                                            int is_outgoing) {
-//#define aoqi_test
-#ifdef aoqi_test
-tty->print_cr(" SharedRuntime::%s :%d, total_args_passed: %d", __func__, __LINE__, total_args_passed);
-#endif
 
   // Create the mapping between argument positions and
   // registers.
@@ -543,85 +461,9 @@
       ShouldNotReachHere();
       break;
     }
-#ifdef aoqi_test
-tty->print_cr(" SharedRuntime::%s :%d, sig_bt[%d]: %d, reg[%d]:%d|%d, stk_args:%d", __func__, __LINE__, i, sig_bt[i], i, regs[i].first(), regs[i].second(), stk_args);
-#endif
   }
 
   return round_to(stk_args, 2);
-/*
-	// Starting stack position for args on stack
-  uint    stack = 0;
-
-	// Pass first five oop/int args in registers T0, A0 - A3.
-	uint reg_arg0 = 9999;
-	uint reg_arg1 = 9999;
-	uint reg_arg2 = 9999;
-	uint reg_arg3 = 9999;
-	uint reg_arg4 = 9999;
-
- 
-  // Pass doubles & longs &float  ligned on the stack.  First count stack slots for doubles
-	int i;
-	for( i = 0; i < total_args_passed; i++) {
-		if( sig_bt[i] == T_DOUBLE || sig_bt[i] == T_LONG ) {
-			stack += 2;
-		}
-	}
-	int dstack = 0;  // Separate counter for placing doubles
-  for( i = 0; i < total_args_passed; i++) {
-    // From the type and the argument number (count) compute the location
-    switch( sig_bt[i] ) {
-    case T_SHORT:
-    case T_CHAR:
-    case T_BYTE:
-    case T_BOOLEAN:
-    case T_INT:
-    case T_ARRAY:
-    case T_OBJECT:
-    case T_ADDRESS:
-	    if( reg_arg0 == 9999 )  {
-		    reg_arg0 = i;
-		    regs[i].set1(T0->as_VMReg());
-	    } else if( reg_arg1 == 9999 ) {
-		    reg_arg1 = i;
-		    regs[i].set1(A0->as_VMReg());
-	    } else if( reg_arg2 == 9999 ) {
-		    reg_arg2 = i;
-		    regs[i].set1(A1->as_VMReg());
-	    }else if( reg_arg3 == 9999 ) {
-		    reg_arg3 = i;
-		    regs[i].set1(A2->as_VMReg());
-	    }else if( reg_arg4 == 9999 ) {
-		    reg_arg4 = i;
-		    regs[i].set1(A3->as_VMReg());
-	    } else {
-		    regs[i].set1(VMRegImpl::stack2reg(stack++));
-	    }
-	    break;
-    case T_FLOAT:
-	    regs[i].set1(VMRegImpl::stack2reg(stack++));
-	    break;
-    case T_LONG:      
-	    assert(sig_bt[i+1] == T_VOID, "missing Half" ); 
-	    regs[i].set2(VMRegImpl::stack2reg(dstack));
-	    dstack += 2;
-	    break;
-    case T_DOUBLE:
-	    assert(sig_bt[i+1] == T_VOID, "missing Half" ); 
-	    regs[i].set2(VMRegImpl::stack2reg(dstack));
-	    dstack += 2;
-	    break;
-    case T_VOID: regs[i].set_bad(); break;
-		 break;
-    default:
-		 ShouldNotReachHere();
-		 break;
-    }
- }
-  // return value can be odd number of VMRegImpl stack slots make multiple of 2
-  return round_to(stack, 2);
-*/
 }
 
 // Helper class mostly to avoid passing masm everywhere, and handle store
@@ -634,7 +476,6 @@
 #endif // _LP64
 
   void patch_callers_callsite();
-//  void tag_c2i_arg(frame::Tag t, Register base, int st_off, Register scratch);
 
   // base+st_off points to top of argument
   int arg_offset(const int st_off) { return st_off; }
@@ -684,130 +525,77 @@
 
 // Patch the callers callsite with entry to compiled code if it exists.
 void AdapterGenerator::patch_callers_callsite() {
-	Label L;
-	//FIXME , what is stored in eax? 
-	//__ verify_oop(ebx);
-	__ verify_oop(Rmethod);
-	// __ cmpl(Address(ebx, in_bytes(Method::code_offset())), NULL_WORD);
-	__ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); 
-	//__ jcc(Assembler::equal, L);
-	__ beq(AT,R0,L); 
-	__ delayed()->nop(); 
-	// Schedule the branch target address early.
-	// Call into the VM to patch the caller, then jump to compiled callee
-	// eax isn't live so capture return address while we easily can
-	//  __ movl(eax, Address(esp, 0));
-//	__ lw(T5,SP,0);  
-	__ move(V0, RA);
-       
-	__ pushad();
-      	//jerome_for_debug
-	// __ pushad();
-	// __ pushfd();
+  Label L;
+  __ verify_oop(Rmethod);
+  __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
+  __ beq(AT,R0,L);
+  __ delayed()->nop();
+  // Schedule the branch target address early.
+  // Call into the VM to patch the caller, then jump to compiled callee
+  // eax isn't live so capture return address while we easily can
+  __ move(V0, RA);
+
+  __ pushad();
 #ifdef COMPILER2
-	// C2 may leave the stack dirty if not in SSE2+ mode
-	__ empty_FPU_stack();
+  // C2 may leave the stack dirty if not in SSE2+ mode
+  __ empty_FPU_stack();
 #endif /* COMPILER2 */
 
-	// VM needs caller's callsite
-	//  __ pushl(eax);
-
-	// VM needs target method
-	// __ pushl(ebx);
-	//  __ push(Rmethod);
-	// __ verify_oop(ebx);
-      
-	__ move(A0, Rmethod); 
-	__ move(A1, V0); 
-//	__ addi(SP, SP, -8);
+  // VM needs caller's callsite
+  // VM needs target method
+
+  __ move(A0, Rmethod);
+  __ move(A1, V0);
 //we should preserve the return address
-	__ verify_oop(Rmethod);
-        __ move(S0, SP); 
-        __ move(AT, -(StackAlignmentInBytes));   // align the stack
-        __ andr(SP, SP, AT);
-      	__ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), 
-			relocInfo::runtime_call_type);
-	//__ addl(esp, 2*wordSize);
-
-	__ delayed()->nop(); 
-  //      __ addi(SP, SP, 8);
-	//  __ popfd();
-        __ move(SP, S0);
-	__ popad();
-	__ bind(L);
+  __ verify_oop(Rmethod);
+  __ move(S0, SP);
+  __ move(AT, -(StackAlignmentInBytes));   // align the stack
+  __ andr(SP, SP, AT);
+  __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite),
+          relocInfo::runtime_call_type);
+
+  __ delayed()->nop();
+  __ move(SP, S0);
+  __ popad();
+  __ bind(L);
 }
-/*
-void AdapterGenerator::tag_c2i_arg(frame::Tag t, Register base, int st_off,
-                 Register scratch) {
-	Unimplemented();
-}*/
 
 #ifdef _LP64
 Register AdapterGenerator::arg_slot(const int st_off) {
-	Unimplemented();
+  Unimplemented();
 }
 
 Register AdapterGenerator::next_arg_slot(const int st_off){
-	Unimplemented();
+  Unimplemented();
 }
 #endif // _LP64
 
 // Stores long into offset pointed to by base
 void AdapterGenerator::store_c2i_long(Register r, Register base,
                                       const int st_off, bool is_stack) {
-	Unimplemented();
+  Unimplemented();
 }
 
 void AdapterGenerator::store_c2i_object(Register r, Register base,
-                      const int st_off) {
-	Unimplemented();
+                                        const int st_off) {
+  Unimplemented();
 }
 
 void AdapterGenerator::store_c2i_int(Register r, Register base,
-                   const int st_off) {
-	Unimplemented();
+                                     const int st_off) {
+  Unimplemented();
 }
 
 // Stores into offset pointed to by base
 void AdapterGenerator::store_c2i_double(VMReg r_2,
                       VMReg r_1, Register base, const int st_off) {
-	Unimplemented();
+  Unimplemented();
 }
 
 void AdapterGenerator::store_c2i_float(FloatRegister f, Register base,
                                        const int st_off) {
-	Unimplemented();
+  Unimplemented();
 }
-/*
-void  AdapterGenerator::tag_stack(const BasicType sig, int st_off) {
-	if (TaggedStackInterpreter) {
-		int tag_offset = st_off + Interpreter::expr_tag_offset_in_bytes(0);
-		if (sig == T_OBJECT || sig == T_ARRAY) {
-			//   __ movl(Address(esp, tag_offset), frame::TagReference);
-			//  __ addi(AT,R0, frame::TagReference); 
-
-			__ move(AT, frame::TagReference);
-			__ sw (AT, SP, tag_offset); 
-		} else if (sig == T_LONG || sig == T_DOUBLE) {
-			int next_tag_offset = st_off + Interpreter::expr_tag_offset_in_bytes(1);
-			// __ movl(Address(esp, next_tag_offset), frame::TagValue);
-			// __ addi(AT,R0, frame::TagValue); 
-			__ move(AT, frame::TagValue); 
-			__ sw (AT, SP, next_tag_offset); 
-			//__ movl(Address(esp, tag_offset), frame::TagValue);
-			//   __ addi(AT,R0, frame::TagValue); 
-			__ move(AT, frame::TagValue); 
-			__ sw (AT, SP, tag_offset); 
-
-		} else {
-			//  __ movl(Address(esp, tag_offset), frame::TagValue);
-			//__ addi(AT,R0, frame::TagValue); 
-			__ move(AT, frame::TagValue); 
-			__ sw (AT, SP, tag_offset); 
-
-		}
-	}
-}*/
 
 void AdapterGenerator::gen_c2i_adapter(
                             int total_args_passed,
@@ -834,113 +622,77 @@
   // call and not bother building another interpreter arg area. We don't
   // do that at this point.
 
-	patch_callers_callsite();
-
-	__ bind(skip_fixup);
+  patch_callers_callsite();
+
+  __ bind(skip_fixup);
 
 #ifdef COMPILER2
-	__ empty_FPU_stack();
+  __ empty_FPU_stack();
 #endif /* COMPILER2 */
-	//this is for native ?
-	// Since all args are passed on the stack, total_args_passed * interpreter_
-	// stack_element_size  is the
-	// space we need.
-	int extraspace = total_args_passed * Interpreter::stackElementSize;
-
-        // stack is aligned, keep it that way
-        extraspace = round_to(extraspace, 2*wordSize);
-
-	// Get return address
-	// __ popl(eax);
-	//__ pop(T4);
-        __ move(V0, RA);		
-	// set senderSP value
-	// __ movl(esi, esp);
-//refer to interpreter_mips.cpp:generate_asm_entry
-	__ move(Rsender, SP); 
-	//__ subl(esp, extraspace);
-	__ addi(SP, SP, -extraspace);
-
-	// Now write the args into the outgoing interpreter space
-	for (int i = 0; i < total_args_passed; i++) {
-		if (sig_bt[i] == T_VOID) {
-			assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), 
-					"missing half");
-			continue;
-		}
-
-		// st_off points to lowest address on stack.
-		int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize;
-#ifdef aoqi_test
-tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d, st_off:%d", __func__, __LINE__, i, sig_bt[i], total_args_passed, st_off);
-#endif
-		// Say 4 args:
-		// i   st_off
-		// 0   12 T_LONG
-		// 1    8 T_VOID
-		// 2    4 T_OBJECT
-		// 3    0 T_BOOL
-		VMReg r_1 = regs[i].first();
-		VMReg r_2 = regs[i].second();
-		if (!r_1->is_valid()) {
-			assert(!r_2->is_valid(), "");
-			continue;
-		}
-
-		if (r_1->is_stack()) {
-			// memory to memory use fpu stack top
-			int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
-#ifdef aoqi_test
-tty->print_cr(" AdapterGenerator::%s :%d, r_1->is_stack, ld_off:%x", __func__, __LINE__, ld_off);
-#endif
-
-			if (!r_2->is_valid()) {
-#ifdef aoqi_test
-tty->print_cr(" AdapterGenerator::%s :%d, !r_2->is_valid, ld_off:%x", __func__, __LINE__, ld_off);
-#endif
-				__ ld_ptr(AT, SP, ld_off); 
-				__ st_ptr(AT, SP, st_off); 
-				//tag_stack(sig_bt[i], st_off);
-			} else {
-#ifdef aoqi_test
-tty->print_cr(" AdapterGenerator::%s :%d, r_2->is_valid, ld_off:%x", __func__, __LINE__, ld_off);
-#endif
-
-				// ld_off == LSW, ld_off+VMRegImpl::stack_slot_size == MSW
-				// st_off == MSW, st_off-wordSize == LSW
-
-				int next_off = st_off - Interpreter::stackElementSize;
-				/*
-				__ lw(AT, SP, ld_off); 
-				__ sw(AT, SP, next_off);
-				__ lw(AT, SP, ld_off + wordSize);
-				__ sw(AT, SP, st_off);
-				*/
-				__ ld_ptr(AT, SP, ld_off); 
-				__ st_ptr(AT, SP, st_off); 
-
-				/* Ref to is_Register condition */
-				if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
-					__ st_ptr(AT,SP,st_off - 8);
-				//tag_stack(sig_bt[i], next_off);
-			}
-		} else if (r_1->is_Register()) {
-			Register r = r_1->as_Register();
-			if (!r_2->is_valid()) {
-#ifdef aoqi_test
-tty->print_cr(" AdapterGenerator::%s :%d, r_1->is_Register, !r_2->is_valid, st_off: %lx", __func__, __LINE__, st_off);
-#endif
-			  // __ movl(Address(esp, st_off), r);
-			    __ sd(r,SP, st_off); //aoqi_test FIXME
-			  //tag_stack(sig_bt[i], st_off);
-			} else {
-#ifdef aoqi_test
-tty->print_cr(" AdapterGenerator::%s :%d, r_1->is_Register, r_2->is_valid, st_off: %lx", __func__, __LINE__, st_off);
-#endif
-				//FIXME, mips will not enter here 
-				// long/double in gpr
-			    __ sd(r,SP, st_off); //aoqi_test FIXME
-/* Jin: In [java/util/zip/ZipFile.java] 
+  //this is for native ?
+  // Since all args are passed on the stack, total_args_passed * interpreter_
+  // stack_element_size  is the
+  // space we need.
+  int extraspace = total_args_passed * Interpreter::stackElementSize;
+
+  // stack is aligned, keep it that way
+  extraspace = round_to(extraspace, 2*wordSize);
+
+  // Get return address
+  __ move(V0, RA);
+  // set senderSP value
+  //refer to interpreter_mips.cpp:generate_asm_entry
+  __ move(Rsender, SP);
+  __ addi(SP, SP, -extraspace);
+
+  // Now write the args into the outgoing interpreter space
+  for (int i = 0; i < total_args_passed; i++) {
+    if (sig_bt[i] == T_VOID) {
+      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
+      continue;
+    }
+
+    // st_off points to lowest address on stack.
+    int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize;
+    // Say 4 args:
+    // i   st_off
+    // 0   12 T_LONG
+    // 1    8 T_VOID
+    // 2    4 T_OBJECT
+    // 3    0 T_BOOL
+    VMReg r_1 = regs[i].first();
+    VMReg r_2 = regs[i].second();
+    if (!r_1->is_valid()) {
+      assert(!r_2->is_valid(), "");
+      continue;
+    }
+    if (r_1->is_stack()) {
+      // memory to memory use fpu stack top
+      int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
+      if (!r_2->is_valid()) {
+        __ ld_ptr(AT, SP, ld_off);
+        __ st_ptr(AT, SP, st_off);
+
+      } else {
+
+
+        int next_off = st_off - Interpreter::stackElementSize;
+        __ ld_ptr(AT, SP, ld_off);
+        __ st_ptr(AT, SP, st_off);
+
+        /* Ref to is_Register condition */
+        if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
+          __ st_ptr(AT,SP,st_off - 8);
+      }
+    } else if (r_1->is_Register()) {
+      Register r = r_1->as_Register();
+      if (!r_2->is_valid()) {
+          __ sd(r,SP, st_off); //aoqi_test FIXME
+      } else {
+        //FIXME, mips will not enter here
+        // long/double in gpr
+          __ sd(r,SP, st_off); //aoqi_test FIXME
+/* Jin: In [java/util/zip/ZipFile.java]
 
     private static native long open(String name, int mode, long lastModified);
     private static native int getTotal(long jzfile);
@@ -950,9 +702,9 @@
  *
  * Caller -> lir_static_call -> gen_resolve_stub
       -> -- resolve_static_call_C
-         `- gen_c2i_adapter()	[*]
+         `- gen_c2i_adapter()  [*]
              |
-	     `- AdapterHandlerLibrary::get_create_apapter_index
+       `- AdapterHandlerLibrary::get_create_apapter_index
       -> generate_native_entry
       -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**]
 
@@ -970,7 +722,7 @@
     |         |
    (low)
  *
- * However, the sequence is reversed here: 
+ * However, the sequence is reversed here:
  *
    (high)
     |         |
@@ -986,42 +738,36 @@
  *
  * So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry().
  */
-			    if (sig_bt[i] == T_LONG)
-			        __ sd(r,SP, st_off - 8);
-			//	ShouldNotReachHere();
-			//	int next_off = st_off - Interpreter::stackElementSize;
-			//	__ sw(r_2->as_Register(),SP, st_off);
-			//	__ sw(r,SP, next_off);
-			//	tag_stack(masm, sig_bt[i], next_off);
-			}
-		} else if (r_1->is_FloatRegister()) {
-			assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
-
-			FloatRegister fr = r_1->as_FloatRegister();
-			if (sig_bt[i] == T_FLOAT)
-		            __ swc1(fr,SP, st_off);
-			else
-			{
-		            __ sdc1(fr,SP, st_off);
-		            __ sdc1(fr,SP, st_off - 8);	/* T_DOUBLE needs two slots */
-			}
-		}
-	}
-        
-	// Schedule the branch target address early.
-	__ ld_ptr(AT, Rmethod,in_bytes(Method::interpreter_entry_offset()) ); 
-	// And repush original return address
-	__ move(RA, V0);	
-	__ jr (AT); 
-	__ delayed()->nop();
+        if (sig_bt[i] == T_LONG)
+          __ sd(r,SP, st_off - 8);
+      }
+    } else if (r_1->is_FloatRegister()) {
+      assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
+
+      FloatRegister fr = r_1->as_FloatRegister();
+      if (sig_bt[i] == T_FLOAT)
+        __ swc1(fr,SP, st_off);
+      else {
+        __ sdc1(fr,SP, st_off);
+        __ sdc1(fr,SP, st_off - 8);  /* T_DOUBLE needs two slots */
+      }
+    }
+  }
+
+  // Schedule the branch target address early.
+  __ ld_ptr(AT, Rmethod,in_bytes(Method::interpreter_entry_offset()) );
+  // And repush original return address
+  __ move(RA, V0);
+  __ jr (AT);
+  __ delayed()->nop();
 }
 
 void AdapterGenerator::gen_i2c_adapter(
-                            int total_args_passed,
-                            // VMReg max_arg,
-                            int comp_args_on_stack, // VMRegStackSlots
-                            const BasicType *sig_bt,
-			    const VMRegPair *regs) {
+                                       int total_args_passed,
+                                       // VMReg max_arg,
+                                       int comp_args_on_stack, // VMRegStackSlots
+                                       const BasicType *sig_bt,
+                                       const VMRegPair *regs) {
 
   // Generate an I2C adapter: adjust the I-frame to make space for the C-frame
   // layout.  Lesp was saved by the calling I-frame and will be restored on
@@ -1058,7 +804,7 @@
 
   // Align the outgoing SP
   __ move(AT, -(StackAlignmentInBytes));
-  __ andr(SP, SP, AT);	
+  __ andr(SP, SP, AT);
   // push the return address on the stack (note that pushing, rather
   // than storing it, yields the correct frame alignment for the callee)
   // Put saved SP in another register
@@ -1081,16 +827,16 @@
       continue;
     }
 
-    // Pick up 0, 1 or 2 words from SP+offset.  
-
-  //FIXME. aoqi. just delete the assert
+    // Pick up 0, 1 or 2 words from SP+offset.
+
+    //FIXME. aoqi. just delete the assert
     //assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?");
     // Load in argument order going down.
     int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize;
     // Point to interpreter value (vs. tag)
     int next_off = ld_off - Interpreter::stackElementSize;
     //
-    //  
+    //
     //
     VMReg r_1 = regs[i].first();
     VMReg r_2 = regs[i].second();
@@ -1098,114 +844,96 @@
       assert(!r_2->is_valid(), "");
       continue;
     }
-#ifdef aoqi_test
-tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d, ld_off:%d, next_off: %d", __func__, __LINE__, i, sig_bt[i], total_args_passed, ld_off, next_off);
-#endif
-    if (r_1->is_stack()) { 
-      // Convert stack slot to an SP offset (+ wordSize to 
+    if (r_1->is_stack()) {
+      // Convert stack slot to an SP offset (+ wordSize to
       // account for return address )
-      //NOTICE HERE!!!! I sub a wordSize here	
-      int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size; 
+      //NOTICE HERE!!!! I sub a wordSize here
+      int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size;
       //+ wordSize;
 
-      // We can use esi as a temp here because compiled code doesn't 
+      // We can use esi as a temp here because compiled code doesn't
       // need esi as an input
-      // and if we end up going thru a c2i because of a miss a reasonable 
-      // value of esi 
-      // we be generated. 
+      // and if we end up going thru a c2i because of a miss a reasonable
+      // value of esi
+      // we be generated.
       if (!r_2->is_valid()) {
-#ifdef aoqi_test
-tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d r_1->is_stack() !r_2->is_valid(), st_off:%d", __func__, __LINE__, i, sig_bt[i], total_args_passed, st_off);
-#endif
-	__ ld(AT, saved_sp, ld_off);
-	__ sd(AT, SP, st_off); 
+        __ ld(AT, saved_sp, ld_off);
+        __ sd(AT, SP, st_off);
       } else {
-#ifdef aoqi_test
-tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d r_1->is_stack() r_2->is_valid(), st_off:%d", __func__, __LINE__, i, sig_bt[i], total_args_passed, st_off);
-#endif
-	// Interpreter local[n] == MSW, local[n+1] == LSW however locals
-	// are accessed as negative so LSW is at LOW address
-
-	// ld_off is MSW so get LSW
-	// st_off is LSW (i.e. reg.first())
-	/*
-	__ ld(AT, saved_sp, next_off); 
-	__ sd(AT, SP, st_off); 
-	__ ld(AT, saved_sp, ld_off); 
-	__ sd(AT, SP, st_off + wordSize); 
-	*/
-
-	/* 2012/4/9 Jin
-	 * [./org/eclipse/swt/graphics/GC.java] 
-	 * void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight, 
-		int destX, int destY, int destWidth, int destHeight, 
-		boolean simple, 
-		int imgWidth, int imgHeight, 
-		long maskPixmap,	<-- Pass T_LONG in stack
-		int maskType);
-	 * Before this modification, Eclipse displays icons with solid black background.
-	 */
-	__ ld(AT, saved_sp, ld_off);
+        // Interpreter local[n] == MSW, local[n+1] == LSW however locals
+        // are accessed as negative so LSW is at LOW address
+
+        // ld_off is MSW so get LSW
+        // st_off is LSW (i.e. reg.first())
+        /*
+        __ ld(AT, saved_sp, next_off);
+        __ sd(AT, SP, st_off);
+        __ ld(AT, saved_sp, ld_off);
+        __ sd(AT, SP, st_off + wordSize);
+        */
+
+        /* 2012/4/9 Jin
+         * [./org/eclipse/swt/graphics/GC.java]
+         * void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight,
+          int destX, int destY, int destWidth, int destHeight,
+          boolean simple,
+          int imgWidth, int imgHeight,
+          long maskPixmap,  <-- Pass T_LONG in stack
+          int maskType);
+         * Before this modification, Eclipse displays icons with solid black background.
+         */
+        __ ld(AT, saved_sp, ld_off);
         if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
-	  __ ld(AT, saved_sp, ld_off - 8);
-	__ sd(AT, SP, st_off); 
-	//__ ld(AT, saved_sp, next_off); 
-	//__ sd(AT, SP, st_off + wordSize); 
+          __ ld(AT, saved_sp, ld_off - 8);
+        __ sd(AT, SP, st_off);
       }
     } else if (r_1->is_Register()) {  // Register argument
       Register r = r_1->as_Register();
       // assert(r != eax, "must be different");
       if (r_2->is_valid()) {
-#ifdef aoqi_test
-tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d r_1->is_Register() r_2->is_valid()", __func__, __LINE__, i, sig_bt[i], total_args_passed);
-#endif
-	//  assert(r_2->as_Register() != eax, "need another temporary register");
-	// Remember r_1 is low address (and LSB on mips)
-	// So r_2 gets loaded from high address regardless of the platform
-	//aoqi
-	assert(r_2->as_Register() == r_1->as_Register(), "");
-	//__ ld(r_2->as_Register(), saved_sp, ld_off);
-	//__ ld(r, saved_sp, next_off);
-	__ ld(r, saved_sp, ld_off);
-
-/* Jin: 
- *
- * For T_LONG type, the real layout is as below:
-
-   (high)
-    |         |
-    -----------
-    | 8 bytes |
-    | (void)  |
-    -----------
-    | 8 bytes |
-    | (long)  |
-    -----------
-    |         |
-   (low)
- *
- * We should load the low-8 bytes.
- */
-      if (sig_bt[i] == T_LONG)
-	__ ld(r, saved_sp, ld_off - 8);
+        //  assert(r_2->as_Register() != eax, "need another temporary register");
+        // Remember r_1 is low address (and LSB on mips)
+        // So r_2 gets loaded from high address regardless of the platform
+        //aoqi
+        assert(r_2->as_Register() == r_1->as_Register(), "");
+        //__ ld(r_2->as_Register(), saved_sp, ld_off);
+        //__ ld(r, saved_sp, next_off);
+        __ ld(r, saved_sp, ld_off);
+
+      /* Jin:
+       *
+       * For T_LONG type, the real layout is as below:
+
+         (high)
+          |         |
+          -----------
+          | 8 bytes |
+          | (void)  |
+          -----------
+          | 8 bytes |
+          | (long)  |
+          -----------
+          |         |
+         (low)
+       *
+       * We should load the low-8 bytes.
+       */
+        if (sig_bt[i] == T_LONG)
+          __ ld(r, saved_sp, ld_off - 8);
       } else {
-#ifdef aoqi_test
-tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d r_1->is_Register() !r_2->is_valid()", __func__, __LINE__, i, sig_bt[i], total_args_passed);
-#endif
-	__ lw(r, saved_sp, ld_off);
+        __ lw(r, saved_sp, ld_off);
       }
     } else if (r_1->is_FloatRegister()) { // Float Register
-	assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
-
-	FloatRegister fr = r_1->as_FloatRegister();
-	if (sig_bt[i] == T_FLOAT)
-	    __ lwc1(fr, saved_sp, ld_off);
-	else
-	{
-	    __ ldc1(fr, saved_sp, ld_off);
-	    __ ldc1(fr, saved_sp, ld_off - 8);
-	}
-    }   
+      assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
+
+      FloatRegister fr = r_1->as_FloatRegister();
+      if (sig_bt[i] == T_FLOAT)
+          __ lwc1(fr, saved_sp, ld_off);
+      else {
+          __ ldc1(fr, saved_sp, ld_off);
+          __ ldc1(fr, saved_sp, ld_off - 8);
+      }
+    }
   }
 
   // 6243940 We might end up in handle_wrong_method if
@@ -1223,8 +951,8 @@
   // move methodOop to eax in case we end up in an c2i adapter.
   // the c2i adapters expect methodOop in eax (c2) because c2's
   // resolve stubs return the result (the method) in eax.
-  // I'd love to fix this. 
-  __ move(V0, Rmethod);	
+  // I'd love to fix this.
+  __ move(V0, Rmethod);
   __ jr(T9);
   __ delayed()->nop();
 }
@@ -1264,33 +992,24 @@
     Label missed;
 
     __ verify_oop(holder);
-    // __ movl(temp, Address(receiver, oopDesc::klass_offset_in_bytes()));
-    //__ ld_ptr(temp, receiver, oopDesc::klass_offset_in_bytes());
     //add for compressedoops
     __ load_klass(temp, receiver);
     __ verify_oop(temp);
 
-    //  __ cmpl(temp, Address(holder, CompiledICHolder::holder_klass_offset()));
-    __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset()); 
-    //__ movl(ebx, Address(holder, CompiledICHolder::holder_method_offset()));
+    __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset());
     __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_method_offset());
-    //__ jcc(Assembler::notEqual, missed);
-    __ bne(AT, temp, missed); 
-    __ delayed()->nop(); 
+    __ bne(AT, temp, missed);
+    __ delayed()->nop();
     // Method might have been compiled since the call site was patched to
     // interpreted if that is the case treat it as a miss so we can get
     // the call site corrected.
-    //__ cmpl(Address(ebx, in_bytes(Method::code_offset())), NULL_WORD);
-    //__ jcc(Assembler::equal, skip_fixup);
     __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
-    __ beq(AT, R0, skip_fixup); 
-    __ delayed()->nop(); 
+    __ beq(AT, R0, skip_fixup);
+    __ delayed()->nop();
     __ bind(missed);
-    //   __ move(AT, (int)&jerome7);	
-    //	__ sw(RA, AT, 0);	
 
     __ jmp(ic_miss, relocInfo::runtime_call_type);
-    __ delayed()->nop(); 
+    __ delayed()->nop();
   }
 
   address c2i_entry = __ pc();
@@ -1299,72 +1018,34 @@
 
   __ flush();
   return  AdapterHandlerLibrary::new_entry(fingerprint,i2c_entry, c2i_entry, c2i_unverified_entry);
-
 }
-/*
-// Helper function for native calling conventions
-static VMReg int_stk_helper( int i ) {
-  // Bias any stack based VMReg we get by ignoring the window area
-  // but not the register parameter save area.
-  //
-  // This is strange for the following reasons. We'd normally expect
-  // the calling convention to return an VMReg for a stack slot
-  // completely ignoring any abi reserved area. C2 thinks of that
-  // abi area as only out_preserve_stack_slots. This does not include
-  // the area allocated by the C abi to store down integer arguments
-  // because the java calling convention does not use it. So
-  // since c2 assumes that there are only out_preserve_stack_slots
-  // to bias the optoregs (which impacts VMRegs) when actually referencing any actual stack
-  // location the c calling convention must add in this bias amount
-  // to make up for the fact that the out_preserve_stack_slots is
-  // insufficient for C calls. What a mess. I sure hope those 6
-  // stack words were worth it on every java call!
-
-  // Another way of cleaning this up would be for out_preserve_stack_slots
-  // to take a parameter to say whether it was C or java calling conventions.
-  // Then things might look a little better (but not much).
-
-  int mem_parm_offset = i - SPARC_ARGS_IN_REGS_NUM;
-  if( mem_parm_offset < 0 ) {
-    return as_oRegister(i)->as_VMReg();
-  } else {
-    int actual_offset = (mem_parm_offset + frame::memory_parameter_word_sp_offset) * VMRegImpl::slots_per_word;
-    // Now return a biased offset that will be correct when out_preserve_slots is added back in
-    return VMRegImpl::stack2reg(actual_offset - SharedRuntime::out_preserve_stack_slots());
-  }
-}
-*/
-
 
 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
                                          VMRegPair *regs,
                                          VMRegPair *regs2,
                                          int total_args_passed) {
-    assert(regs2 == NULL, "not needed on MIPS");
-#ifdef aoqi_test
-tty->print_cr(" SharedRuntime::%s :%d total_args_passed:%d", __func__, __LINE__, total_args_passed);
-#endif
-    // Return the number of VMReg stack_slots needed for the args.
-    // This value does not include an abi space (like register window
-    // save area).
-
-    // The native convention is V8 if !LP64
-    // The LP64 convention is the V9 convention which is slightly more sane.
-
-    // We return the amount of VMReg stack slots we need to reserve for all
-    // the arguments NOT counting out_preserve_stack_slots. Since we always
-    // have space for storing at least 6 registers to memory we start with that.
-    // See int_stk_helper for a further discussion.
-	// We return the amount of VMRegImpl stack slots we need to reserve for all
-	// the arguments NOT counting out_preserve_stack_slots. 
+  assert(regs2 == NULL, "not needed on MIPS");
+  // Return the number of VMReg stack_slots needed for the args.
+  // This value does not include an abi space (like register window
+  // save area).
+
+  // The native convention is V8 if !LP64
+  // The LP64 convention is the V9 convention which is slightly more sane.
+
+  // We return the amount of VMReg stack slots we need to reserve for all
+  // the arguments NOT counting out_preserve_stack_slots. Since we always
+  // have space for storing at least 6 registers to memory we start with that.
+  // See int_stk_helper for a further discussion.
+  // We return the amount of VMRegImpl stack slots we need to reserve for all
+  // the arguments NOT counting out_preserve_stack_slots.
   static const Register INT_ArgReg[Argument::n_register_parameters] = {
     A0, A1, A2, A3, A4, A5, A6, A7
   };
   static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
     F12, F13, F14, F15, F16, F17, F18, F19
   };
-    uint args = 0;
-    uint stk_args = 0; // inc by 2 each time
+  uint args = 0;
+  uint stk_args = 0; // inc by 2 each time
 
 /* Example:
 ---   n   java.lang.UNIXProcess::forkAndExec
@@ -1389,192 +1070,144 @@
                                        jobject stderr_fd)
 
 ::c_calling_convention
-0: 		// env		<-- a0
-1: L		// klass/obj	<-- t0 => a1
-2: [		// prog[]	<-- a0 => a2
-3: [		// argBlock[]	<-- a1 => a3
-4: I		// argc
-5: [		// envBlock[]	<-- a3 => a5
-6: I		// envc
-7: [		// dir[]	<-- a5 => a7
-8: Z		// redirectErrorStream	a6 => sp[0]
-9: L		// stdin		a7 => sp[8]
-10: L		// stdout		fp[16] => sp[16]
-11: L		// stderr		fp[24] => sp[24]
+0:     // env    <-- a0
+1: L    // klass/obj  <-- t0 => a1
+2: [    // prog[]  <-- a0 => a2
+3: [    // argBlock[]  <-- a1 => a3
+4: I    // argc
+5: [    // envBlock[]  <-- a3 => a5
+6: I    // envc
+7: [    // dir[]  <-- a5 => a7
+8: Z    // redirectErrorStream  a6 => sp[0]
+9: L    // stdin    a7 => sp[8]
+10: L    // stdout    fp[16] => sp[16]
+11: L    // stderr    fp[24] => sp[24]
 */
-    for (int i = 0; i < total_args_passed; i++) {
-      switch (sig_bt[i]) {
-      case T_VOID: // Halves of longs and doubles
-        assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
-        regs[i].set_bad();
-        break;
-      case T_BOOLEAN:
-      case T_CHAR:
-      case T_BYTE:
-      case T_SHORT:
-      case T_INT:
-        if (args < Argument::n_register_parameters) {
-          regs[i].set1(INT_ArgReg[args++]->as_VMReg());
-        } else {
-          regs[i].set1(VMRegImpl::stack2reg(stk_args));
-          stk_args += 2;
-        }
-        break;
-      case T_LONG:
-        assert(sig_bt[i + 1] == T_VOID, "expecting half");
-        // fall through
-      case T_OBJECT:
-      case T_ARRAY:
-      case T_ADDRESS:
-      case T_METADATA:
-        if (args < Argument::n_register_parameters) {
-          regs[i].set2(INT_ArgReg[args++]->as_VMReg());
-        } else {
-          regs[i].set2(VMRegImpl::stack2reg(stk_args));
-          stk_args += 2;
-        }
-        break;
-      case T_FLOAT:
-        if (args < Argument::n_float_register_parameters) {
-          regs[i].set1(FP_ArgReg[args++]->as_VMReg());
-        } else {
-          regs[i].set1(VMRegImpl::stack2reg(stk_args));
-          stk_args += 2;
-        }
-        break;
-      case T_DOUBLE:
-        assert(sig_bt[i + 1] == T_VOID, "expecting half");
-        if (args < Argument::n_float_register_parameters) {
-          regs[i].set2(FP_ArgReg[args++]->as_VMReg());
-        } else {
-          regs[i].set2(VMRegImpl::stack2reg(stk_args));
-          stk_args += 2;
-        }
-        break;
-      default:
-        ShouldNotReachHere();
-        break;
-      }
-    }
-
-  return round_to(stk_args, 2);
-}
-/*
-int SharedRuntime::c_calling_convention_jni(const BasicType *sig_bt, 
-                                         VMRegPair *regs,
-                                         int total_args_passed) {
-// We return the amount of VMRegImpl stack slots we need to reserve for all
-// the arguments NOT counting out_preserve_stack_slots. 
-   bool unalign = 0;
-  uint    stack = 0;        // All arguments on stack
-#ifdef aoqi_test
-tty->print_cr(" SharedRuntime::%s :%d total_args_passed:%d", __func__, __LINE__, total_args_passed);
-#endif
-
-  for( int i = 0; i < total_args_passed; i++) {
-    // From the type and the argument number (count) compute the location
-    switch( sig_bt[i] ) {
+  for (int i = 0; i < total_args_passed; i++) {
+    switch (sig_bt[i]) {
+    case T_VOID: // Halves of longs and doubles
+      assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
+      regs[i].set_bad();
+      break;
     case T_BOOLEAN:
     case T_CHAR:
-    case T_FLOAT:
     case T_BYTE:
     case T_SHORT:
     case T_INT:
+      if (args < Argument::n_register_parameters) {
+        regs[i].set1(INT_ArgReg[args++]->as_VMReg());
+      } else {
+        regs[i].set1(VMRegImpl::stack2reg(stk_args));
+        stk_args += 2;
+      }
+      break;
+    case T_LONG:
+      assert(sig_bt[i + 1] == T_VOID, "expecting half");
+      // fall through
     case T_OBJECT:
     case T_ARRAY:
     case T_ADDRESS:
-      regs[i].set1(VMRegImpl::stack2reg(stack++));
-      unalign = !unalign;
+    case T_METADATA:
+      if (args < Argument::n_register_parameters) {
+        regs[i].set2(INT_ArgReg[args++]->as_VMReg());
+      } else {
+        regs[i].set2(VMRegImpl::stack2reg(stk_args));
+        stk_args += 2;
+      }
       break;
-    case T_LONG:
-    case T_DOUBLE: // The stack numbering is reversed from Java
-      // Since C arguments do not get reversed, the ordering for
-      // doubles on the stack must be opposite the Java convention
-      assert(sig_bt[i+1] == T_VOID, "missing Half" ); 
-      if(unalign){
-            stack += 1; 
-     	    unalign = ! unalign; 
-      } 
-      regs[i].set2(VMRegImpl::stack2reg(stack));
-      stack += 2;
+    case T_FLOAT:
+      if (args < Argument::n_float_register_parameters) {
+        regs[i].set1(FP_ArgReg[args++]->as_VMReg());
+      } else {
+        regs[i].set1(VMRegImpl::stack2reg(stk_args));
+        stk_args += 2;
+      }
       break;
-    case T_VOID: regs[i].set_bad(); break;
+    case T_DOUBLE:
+      assert(sig_bt[i + 1] == T_VOID, "expecting half");
+      if (args < Argument::n_float_register_parameters) {
+        regs[i].set2(FP_ArgReg[args++]->as_VMReg());
+      } else {
+        regs[i].set2(VMRegImpl::stack2reg(stk_args));
+        stk_args += 2;
+      }
+      break;
     default:
       ShouldNotReachHere();
       break;
     }
   }
-  return stack;
+
+  return round_to(stk_args, 2);
 }
-*/
 
 // ---------------------------------------------------------------------------
 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
-	// We always ignore the frame_slots arg and just use the space just below frame pointer
-	// which by this time is free to use
-	switch (ret_type) {
-		case T_FLOAT:
-			__ swc1(FSF, FP, -wordSize); 
-			break;
-		case T_DOUBLE:
-			__ sdc1(FSF, FP, -wordSize ); 
-			break;
-		case T_VOID:  break;
-		case T_LONG:
-			      __ sd(V0, FP, -wordSize);
-			      break;
-		case T_OBJECT:
-		case T_ARRAY:
-			__ sd(V0, FP, -wordSize);
-			break;
-		default: {
-				 __ sw(V0, FP, -wordSize);
-			 }
-	}
+  // We always ignore the frame_slots arg and just use the space just below frame pointer
+  // which by this time is free to use
+  switch (ret_type) {
+    case T_FLOAT:
+      __ swc1(FSF, FP, -wordSize);
+      break;
+    case T_DOUBLE:
+      __ sdc1(FSF, FP, -wordSize );
+      break;
+    case T_VOID:  break;
+    case T_LONG:
+            __ sd(V0, FP, -wordSize);
+            break;
+    case T_OBJECT:
+    case T_ARRAY:
+      __ sd(V0, FP, -wordSize);
+      break;
+    default: {
+         __ sw(V0, FP, -wordSize);
+       }
+  }
 }
 
 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
-	// We always ignore the frame_slots arg and just use the space just below frame pointer
-	// which by this time is free to use
-	switch (ret_type) {
-		case T_FLOAT:
-			__ lwc1(FSF, FP, -wordSize); 
-			break;
-		case T_DOUBLE:
-			__ ldc1(FSF, FP, -wordSize ); 
-			break;
-		case T_LONG:
-			__ ld(V0, FP, -wordSize);
-			break;
-		case T_VOID:  break;
-		case T_OBJECT:
-		case T_ARRAY:
-			__ ld(V0, FP, -wordSize);
-			break;
-		default: {
-				 __ lw(V0, FP, -wordSize);
-			 }
-	}
+  // We always ignore the frame_slots arg and just use the space just below frame pointer
+  // which by this time is free to use
+  switch (ret_type) {
+    case T_FLOAT:
+      __ lwc1(FSF, FP, -wordSize);
+      break;
+    case T_DOUBLE:
+      __ ldc1(FSF, FP, -wordSize );
+      break;
+    case T_LONG:
+      __ ld(V0, FP, -wordSize);
+      break;
+    case T_VOID:  break;
+    case T_OBJECT:
+    case T_ARRAY:
+      __ ld(V0, FP, -wordSize);
+      break;
+    default: {
+         __ lw(V0, FP, -wordSize);
+       }
+  }
 }
 
 static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
-    for ( int i = first_arg ; i < arg_count ; i++ ) {
-      if (args[i].first()->is_Register()) {
-        __ push(args[i].first()->as_Register());
-      } else if (args[i].first()->is_FloatRegister()) {
-        __ push(args[i].first()->as_FloatRegister());
-      }
+  for ( int i = first_arg ; i < arg_count ; i++ ) {
+    if (args[i].first()->is_Register()) {
+      __ push(args[i].first()->as_Register());
+    } else if (args[i].first()->is_FloatRegister()) {
+      __ push(args[i].first()->as_FloatRegister());
     }
+  }
 }
 
 static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
-    for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
-      if (args[i].first()->is_Register()) {
-        __ pop(args[i].first()->as_Register());
-      } else if (args[i].first()->is_FloatRegister()) {
-        __ pop(args[i].first()->as_FloatRegister());
-      }
+  for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
+    if (args[i].first()->is_Register()) {
+      __ pop(args[i].first()->as_Register());
+    } else if (args[i].first()->is_FloatRegister()) {
+      __ pop(args[i].first()->as_FloatRegister());
     }
+  }
 }
 
 // A simple move of integer like type
@@ -1582,46 +1215,21 @@
   if (src.first()->is_stack()) {
     if (dst.first()->is_stack()) {
       // stack to stack
-		__ lw(AT, FP, reg2offset_in(src.first())); 
-		__ sd(AT,SP, reg2offset_out(dst.first())); 
+      __ lw(AT, FP, reg2offset_in(src.first()));
+      __ sd(AT,SP, reg2offset_out(dst.first()));
     } else {
       // stack to reg
-      //__ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
-			__ lw(dst.first()->as_Register(),  FP, reg2offset_in(src.first())); 
+      __ lw(dst.first()->as_Register(),  FP, reg2offset_in(src.first()));
     }
   } else if (dst.first()->is_stack()) {
     // reg to stack
-		__ sd(src.first()->as_Register(), SP, reg2offset_out(dst.first()));
+    __ sd(src.first()->as_Register(), SP, reg2offset_out(dst.first()));
   } else {
-    //__ mov(src.first()->as_Register(), dst.first()->as_Register());
-	  if (dst.first() != src.first()){ 
-		__ move(dst.first()->as_Register(), src.first()->as_Register()); // fujie error:dst.first()
-	  }
+    if (dst.first() != src.first()){
+      __ move(dst.first()->as_Register(), src.first()->as_Register()); // fujie error:dst.first()
+    }
   }
 }
-/*
-// On 64 bit we will store integer like items to the stack as
-// 64 bits items (sparc abi) even though java would only store
-// 32bits for a parameter. On 32bit it will simply be 32 bits
-// So this routine will do 32->32 on 32bit and 32->64 on 64bit
-static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
-  if (src.first()->is_stack()) {
-    if (dst.first()->is_stack()) {
-      // stack to stack
-      __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5);
-      __ st_ptr(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
-    } else {
-      // stack to reg
-      __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
-    }
-  } else if (dst.first()->is_stack()) {
-    // reg to stack
-    __ st_ptr(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS);
-  } else {
-    __ mov(src.first()->as_Register(), dst.first()->as_Register());
-  }
-}
-*/
 
 // An oop arg. Must pass a handle not the oop itself
 static void object_move(MacroAssembler* masm,
@@ -1635,181 +1243,133 @@
 
   // must pass a handle. First figure out the location we use as a handle
 
-	//FIXME, for mips, dst can be register
-	if (src.first()->is_stack()) {
-		// Oop is already on the stack as an argument
-		Register rHandle = V0;
-		Label nil;
-		//__ xorl(rHandle, rHandle);
-		__ xorr(rHandle, rHandle, rHandle);
-		//__ cmpl(Address(ebp, reg2offset_in(src.first())), NULL_WORD);
-		__ ld(AT, FP, reg2offset_in(src.first())); 
-		//__ jcc(Assembler::equal, nil);
-		__ beq(AT,R0, nil); 
-		__ delayed()->nop(); 
-		// __ leal(rHandle, Address(ebp, reg2offset_in(src.first())));
-		__ lea(rHandle, Address(FP, reg2offset_in(src.first())));
-		__ bind(nil);
-		//__ movl(Address(esp, reg2offset_out(dst.first())), rHandle);
-		if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first()));
-		else                       __ move( (dst.first())->as_Register(),rHandle); 
-		//if dst is register 
-	//FIXME, do mips need out preserve stack slots?	
-		int offset_in_older_frame = src.first()->reg2stack() 
-			+ SharedRuntime::out_preserve_stack_slots();
-		map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
-		if (is_receiver) {
-			*receiver_offset = (offset_in_older_frame 
-					+ framesize_in_slots) * VMRegImpl::stack_slot_size;
-		}
-	} else {
-		// Oop is in an a register we must store it to the space we reserve
-		// on the stack for oop_handles
-		const Register rOop = src.first()->as_Register();
-		assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register");
-		//   const Register rHandle = eax;
-		const Register rHandle = V0;
-		//Important: refer to java_calling_convertion	
-		int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
-		int offset = oop_slot*VMRegImpl::stack_slot_size;
-		Label skip;
-		// __ movl(Address(esp, offset), rOop);
-		__ sd( rOop , SP, offset );
-		map->set_oop(VMRegImpl::stack2reg(oop_slot));
-		//    __ xorl(rHandle, rHandle);
-		__ xorr( rHandle, rHandle, rHandle);
-		//__ cmpl(rOop, NULL_WORD);
-		// __ jcc(Assembler::equal, skip);
-		__ beq(rOop, R0, skip); 
-		__ delayed()->nop(); 
-		//  __ leal(rHandle, Address(esp, offset));
-		__ lea(rHandle, Address(SP, offset));
-		__ bind(skip);
-		// Store the handle parameter
-		//__ movl(Address(esp, reg2offset_out(dst.first())), rHandle);
-		if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first()));
-		else                       __ move((dst.first())->as_Register(), rHandle); 
-		//if dst is register 
-
-		if (is_receiver) {
-			*receiver_offset = offset;
-		}
-	}
+  //FIXME, for mips, dst can be register
+  if (src.first()->is_stack()) {
+    // Oop is already on the stack as an argument
+    Register rHandle = V0;
+    Label nil;
+    __ xorr(rHandle, rHandle, rHandle);
+    __ ld(AT, FP, reg2offset_in(src.first()));
+    __ beq(AT,R0, nil);
+    __ delayed()->nop();
+    __ lea(rHandle, Address(FP, reg2offset_in(src.first())));
+    __ bind(nil);
+    if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first()));
+    else                       __ move( (dst.first())->as_Register(),rHandle);
+    //if dst is register
+    //FIXME, do mips need out preserve stack slots?
+    int offset_in_older_frame = src.first()->reg2stack()
+      + SharedRuntime::out_preserve_stack_slots();
+    map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
+    if (is_receiver) {
+      *receiver_offset = (offset_in_older_frame
+          + framesize_in_slots) * VMRegImpl::stack_slot_size;
+    }
+  } else {
+    // Oop is in an a register we must store it to the space we reserve
+    // on the stack for oop_handles
+    const Register rOop = src.first()->as_Register();
+    assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register");
+    const Register rHandle = V0;
+    //Important: refer to java_calling_convertion
+    int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
+    int offset = oop_slot*VMRegImpl::stack_slot_size;
+    Label skip;
+    __ sd( rOop , SP, offset );
+    map->set_oop(VMRegImpl::stack2reg(oop_slot));
+    __ xorr( rHandle, rHandle, rHandle);
+    __ beq(rOop, R0, skip);
+    __ delayed()->nop();
+    __ lea(rHandle, Address(SP, offset));
+    __ bind(skip);
+    // Store the handle parameter
+    if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first()));
+    else                       __ move((dst.first())->as_Register(), rHandle);
+    //if dst is register
+
+    if (is_receiver) {
+      *receiver_offset = offset;
+    }
+  }
 }
 
 // A float arg may have to do float reg int reg conversion
 static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
   assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
 
-	if (src.first()->is_stack()) {
-		if(dst.first()->is_stack()){
-			//  __ movl(eax, Address(ebp, reg2offset_in(src.first())));
-			__ lwc1(F12 , FP, reg2offset_in(src.first()));
-			// __ movl(Address(esp, reg2offset_out(dst.first())), eax);
-			__ swc1(F12 ,SP, reg2offset_out(dst.first()));
-		}	
-		else
-			__ lwc1( dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first())); 
-	} else {
-		// reg to stack
-		// __ movss(Address(esp, reg2offset_out(dst.first())), 
-		// src.first()->as_XMMRegister());
-		// __ movl(Address(esp, reg2offset_out(dst.first())), eax);
-		if(dst.first()->is_stack())
-			__ swc1( src.first()->as_FloatRegister(),SP, reg2offset_out(dst.first()));
-		else
-			__ mov_s( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); 
-	}
+  if (src.first()->is_stack()) {
+    if (dst.first()->is_stack()) {
+      __ lwc1(F12 , FP, reg2offset_in(src.first()));
+      __ swc1(F12 ,SP, reg2offset_out(dst.first()));
+    }
+    else
+      __ lwc1( dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first()));
+  } else {
+    // reg to stack
+    if(dst.first()->is_stack())
+      __ swc1( src.first()->as_FloatRegister(),SP, reg2offset_out(dst.first()));
+    else
+      __ mov_s( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
+  }
 }
-/*
-static void split_long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
-  VMRegPair src_lo(src.first());
-  VMRegPair src_hi(src.second());
-  VMRegPair dst_lo(dst.first());
-  VMRegPair dst_hi(dst.second());
-  simple_move32(masm, src_lo, dst_lo);
-  simple_move32(masm, src_hi, dst_hi);
-}
-*/
+
 // A long move
 static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
 
-	// The only legal possibility for a long_move VMRegPair is:
-	// 1: two stack slots (possibly unaligned)
-	// as neither the java  or C calling convention will use registers
-	// for longs.
-
-	if (src.first()->is_stack()) {
-		assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack");
-		//  __ movl(eax, Address(ebp, reg2offset_in(src.first())));
-		if( dst.first()->is_stack()){ 
-			__ ld(AT, FP, reg2offset_in(src.first()));
-			//  __ movl(ebx, address(ebp, reg2offset_in(src.second())));
-			//__ lw(V0, FP, reg2offset_in(src.second())); 
-			// __ movl(address(esp, reg2offset_out(dst.first())), eax);
-			__ sd(AT, SP, reg2offset_out(dst.first()));
-			// __ movl(address(esp, reg2offset_out(dst.second())), ebx);
-			//__ sw(V0, SP,  reg2offset_out(dst.second())); 
-		} else{
-			__ ld( (dst.first())->as_Register() , FP, reg2offset_in(src.first()));
-			//__ lw( (dst.second())->as_Register(), FP, reg2offset_in(src.second())); 
-		} 
-	} else {
-		if( dst.first()->is_stack()){ 
-			__ sd( (src.first())->as_Register(), SP, reg2offset_out(dst.first()));
-			//__ sw( (src.second())->as_Register(), SP,  reg2offset_out(dst.second())); 
-		} else{
-			__ move( (dst.first())->as_Register() , (src.first())->as_Register());
-			//__ move( (dst.second())->as_Register(), (src.second())->as_Register()); 
-		} 
-	}
+  // The only legal possibility for a long_move VMRegPair is:
+  // 1: two stack slots (possibly unaligned)
+  // as neither the java  or C calling convention will use registers
+  // for longs.
+
+  if (src.first()->is_stack()) {
+    assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack");
+    if( dst.first()->is_stack()){
+      __ ld(AT, FP, reg2offset_in(src.first()));
+      __ sd(AT, SP, reg2offset_out(dst.first()));
+    } else {
+      __ ld( (dst.first())->as_Register() , FP, reg2offset_in(src.first()));
+    }
+  } else {
+    if( dst.first()->is_stack()){
+      __ sd( (src.first())->as_Register(), SP, reg2offset_out(dst.first()));
+    } else{
+      __ move( (dst.first())->as_Register() , (src.first())->as_Register());
+    }
+  }
 }
 
 // A double move
 static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
 
-	// The only legal possibilities for a double_move VMRegPair are:
-	// The painful thing here is that like long_move a VMRegPair might be
-
-	// Because of the calling convention we know that src is either
-	//   1: a single physical register (xmm registers only)
-	//   2: two stack slots (possibly unaligned)
-	// dst can only be a pair of stack slots.
-
-	// assert(dst.first()->is_stack() && (src.first()->is_XMMRegister() || 
-	// src.first()->is_stack()), "bad args");
-	//  assert(dst.first()->is_stack() || src.first()->is_stack()), "bad args");
-
-	if (src.first()->is_stack()) {
-		// source is all stack
-		// __ movl(eax, Address(ebp, reg2offset_in(src.first())));
-		if( dst.first()->is_stack()){ 
-			__ ldc1(F12, FP, reg2offset_in(src.first()));
-			//__ movl(ebx, Address(ebp, reg2offset_in(src.second())));
-			//__ lwc1(F14, FP, reg2offset_in(src.second()));
-
-			//   __ movl(Address(esp, reg2offset_out(dst.first())), eax);
-			__ sdc1(F12, SP, reg2offset_out(dst.first())); 
-			//  __ movl(Address(esp, reg2offset_out(dst.second())), ebx);
-			//__ swc1(F14, SP, reg2offset_out(dst.second()));
-		} else{
-			__ ldc1( (dst.first())->as_FloatRegister(), FP, reg2offset_in(src.first()));
-			//__ lwc1( (dst.second())->as_FloatRegister(), FP, reg2offset_in(src.second()));
-		} 
-
-	} else {
-		// reg to stack
-		// No worries about stack alignment
-		// __ movsd(Address(esp, reg2offset_out(dst.first())), src.first()->as_XMMRegister());
-		if( dst.first()->is_stack()){ 
-			__ sdc1( src.first()->as_FloatRegister(),SP, reg2offset_out(dst.first()));
-			//__ swc1( src.second()->as_FloatRegister(),SP, reg2offset_out(dst.second()));
-		}
-		else
-			__ mov_d( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
-			//__ mov_s( dst.second()->as_FloatRegister(), src.second()->as_FloatRegister()); 
-
-	}
+  // The only legal possibilities for a double_move VMRegPair are:
+  // The painful thing here is that like long_move a VMRegPair might be
+
+  // Because of the calling convention we know that src is either
+  //   1: a single physical register (xmm registers only)
+  //   2: two stack slots (possibly unaligned)
+  // dst can only be a pair of stack slots.
+
+
+  if (src.first()->is_stack()) {
+    // source is all stack
+    if( dst.first()->is_stack()){
+      __ ldc1(F12, FP, reg2offset_in(src.first()));
+
+      __ sdc1(F12, SP, reg2offset_out(dst.first()));
+    } else{
+      __ ldc1( (dst.first())->as_FloatRegister(), FP, reg2offset_in(src.first()));
+    }
+
+  } else {
+    // reg to stack
+    // No worries about stack alignment
+    if( dst.first()->is_stack()){
+      __ sdc1( src.first()->as_FloatRegister(),SP, reg2offset_out(dst.first()));
+    }
+    else
+      __ mov_d( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
+
+  }
 }
 
 static void verify_oop_args(MacroAssembler* masm,
@@ -1824,7 +1384,6 @@
         VMReg r = regs[i].first();
         assert(r->is_valid(), "bad oop arg");
         if (r->is_stack()) {
-//          __ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
           __ ld(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
           __ verify_oop(temp_reg);
         } else {
@@ -1850,7 +1409,6 @@
   int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
   if (ref_kind != 0) {
     member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
-//    member_reg = rbx;  // known to be free at this point
     member_reg = S3;  // known to be free at this point
     has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
   } else if (iid == vmIntrinsics::_invokeBasic) {
@@ -1882,7 +1440,6 @@
       // pass the receiver oop in a register.  If this is not true on some
       // platform, pick a temp and load the receiver from stack.
       fatal("receiver always in a register");
-//      receiver_reg = j_rarg0;  // known to be free at this point
       receiver_reg = SSR;  // known to be free at this point
       __ ld(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
     } else {
@@ -1905,20 +1462,17 @@
 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
                                                 methodHandle method,
                                                 int compile_id,
-                                                BasicType *in_sig_bt,
-                                                VMRegPair *in_regs,
+                                                BasicType* in_sig_bt,
+                                                VMRegPair* in_regs,
                                                 BasicType ret_type) {
-
   if (method->is_method_handle_intrinsic()) {
     vmIntrinsics::ID iid = method->intrinsic_id();
     intptr_t start = (intptr_t)__ pc();
     int vep_offset = ((intptr_t)__ pc()) - start;
-
     gen_special_dispatch(masm,
                          method,
                          in_sig_bt,
                          in_regs);
-
     int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
     __ flush();
     int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
@@ -1947,11 +1501,11 @@
   // An OopMap for lock (and class if static), and one for the VM call itself
   OopMapSet *oop_maps = new OopMapSet();
 
-	// We have received a description of where all the java arg are located
-	// on entry to the wrapper. We need to convert these args to where
-	// the jni function will expect them. To figure out where they go
-	// we convert the java signature to a C signature by inserting
-	// the hidden arguments as arg[0] and possibly arg[1] (static method)
+  // We have received a description of where all the java arg are located
+  // on entry to the wrapper. We need to convert these args to where
+  // the jni function will expect them. To figure out where they go
+  // we convert the java signature to a C signature by inserting
+  // the hidden arguments as arg[0] and possibly arg[1] (static method)
 
   const int total_in_args = method->size_of_parameters();
   int total_c_args = total_in_args;
@@ -1968,8 +1522,8 @@
     }
   }
 
-	BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
-	VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair,   total_c_args);
+  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
+  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
   BasicType* in_elem_bt = NULL;
 
   int argc = 0;
@@ -2024,7 +1578,7 @@
   //
   int out_arg_slots;
   //out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
-	out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
+  out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
 
   // Compute framesize for the wrapper.  We need to handlize all oops in
   // registers. We must create space for them here that is disjoint from
@@ -2079,16 +1633,14 @@
   }
 
   int oop_handle_offset = stack_slots;
-//  stack_slots += 9*VMRegImpl::slots_per_word;	// T0, A0 ~ A7
   stack_slots += total_save_slots;
 
   // Now any space we need for handlizing a klass if static method
 
-	int klass_slot_offset = 0;
-	int klass_offset = -1;
-	int lock_slot_offset = 0;
-	bool is_static = false;
-	//int oop_temp_slot_offset = 0;
+  int klass_slot_offset = 0;
+  int klass_offset = -1;
+  int lock_slot_offset = 0;
+  bool is_static = false;
 
   if (method->is_static()) {
     klass_slot_offset = stack_slots;
@@ -2105,9 +1657,8 @@
   }
 
   // Now a place to save return value or as a temporary for any gpr -> fpr moves
-	// + 2 for return address (which we own) and saved ebp
-  //stack_slots += 2;
-  stack_slots += 2 + 9 * VMRegImpl::slots_per_word;	// (T0, A0, A1, A2, A3, A4, A5, A6, A7)
+  // + 2 for return address (which we own) and saved ebp
+  stack_slots += 2 + 9 * VMRegImpl::slots_per_word;  // (T0, A0, A1, A2, A3, A4, A5, A6, A7)
 
   // Ok The space we have allocated will look like:
   //
@@ -2140,141 +1691,137 @@
 
   int stack_size = stack_slots * VMRegImpl::stack_slot_size;
 
-	intptr_t start = (intptr_t)__ pc();
-
-
-
-	// First thing make an ic check to see if we should even be here
-	address ic_miss = SharedRuntime::get_ic_miss_stub();
-
-	// We are free to use all registers as temps without saving them and
-	// restoring them except ebp. ebp is the only callee save register
-	// as far as the interpreter and the compiler(s) are concerned.
+  intptr_t start = (intptr_t)__ pc();
+
+
+
+  // First thing make an ic check to see if we should even be here
+  address ic_miss = SharedRuntime::get_ic_miss_stub();
+
+  // We are free to use all registers as temps without saving them and
+  // restoring them except ebp. ebp is the only callee save register
+  // as far as the interpreter and the compiler(s) are concerned.
 
   //refer to register_mips.hpp:IC_Klass
-	const Register ic_reg = T1;
-	const Register receiver = T0;
-	Label hit;
-	Label exception_pending;
-
-	__ verify_oop(receiver);
-	//__ lw(AT, receiver, oopDesc::klass_offset_in_bytes()); 
-	//add for compressedoops
-	__ load_klass(T9, receiver);
-	__ beq(T9, ic_reg, hit); 
-	__ delayed()->nop(); 
-	__ jmp(ic_miss, relocInfo::runtime_call_type);
-	__ delayed()->nop();
-	// verified entry must be aligned for code patching.
-	// and the first 5 bytes must be in the same cache line
-	// if we align at 8 then we will be sure 5 bytes are in the same line
-	__ align(8);
-
-	__ bind(hit);
-
-
-	int vep_offset = ((intptr_t)__ pc()) - start;
+  const Register ic_reg = T1;
+  const Register receiver = T0;
+
+  Label hit;
+  Label exception_pending;
+
+  __ verify_oop(receiver);
+  //add for compressedoops
+  __ load_klass(T9, receiver);
+  __ beq(T9, ic_reg, hit);
+  __ delayed()->nop();
+  __ jmp(ic_miss, relocInfo::runtime_call_type);
+  __ delayed()->nop();
+  // verified entry must be aligned for code patching.
+  // and the first 5 bytes must be in the same cache line
+  // if we align at 8 then we will be sure 5 bytes are in the same line
+  __ align(8);
+
+  __ bind(hit);
+
+
+  int vep_offset = ((intptr_t)__ pc()) - start;
 #ifdef COMPILER1
-	if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) {
-		// Object.hashCode can pull the hashCode from the header word
-		// instead of doing a full VM transition once it's been computed.
-		// Since hashCode is usually polymorphic at call sites we can't do
-		// this optimization at the call site without a lot of work.
-		Label slowCase;
-		Register receiver = T0;
-		Register result = V0;
-		__ ld ( result, receiver, oopDesc::mark_offset_in_bytes()); 
-		// check if locked
-		__ andi(AT, result, markOopDesc::unlocked_value); 
-		__ beq(AT, R0, slowCase); 
-		__ delayed()->nop(); 
-		if (UseBiasedLocking) {
-			// Check if biased and fall through to runtime if so
-			__ andi (AT, result, markOopDesc::biased_lock_bit_in_place);	  
-			__ bne(AT,R0, slowCase); 
-			__ delayed()->nop(); 
-		}
-		// get hash
-		__ li(AT, markOopDesc::hash_mask_in_place);
-		__ andr (AT, result, AT);
-		// test if hashCode exists
-		__ beq (AT, R0, slowCase); 
-		__ delayed()->nop(); 
-		__ shr(result, markOopDesc::hash_shift);
-		__ jr(RA); 
-		__ delayed()->nop(); 
-		__ bind (slowCase);
-	}
+  if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) {
+    // Object.hashCode can pull the hashCode from the header word
+    // instead of doing a full VM transition once it's been computed.
+    // Since hashCode is usually polymorphic at call sites we can't do
+    // this optimization at the call site without a lot of work.
+    Label slowCase;
+    Register receiver = T0;
+    Register result = V0;
+    __ ld ( result, receiver, oopDesc::mark_offset_in_bytes());
+    // check if locked
+    __ andi(AT, result, markOopDesc::unlocked_value);
+    __ beq(AT, R0, slowCase);
+    __ delayed()->nop();
+    if (UseBiasedLocking) {
+      // Check if biased and fall through to runtime if so
+      __ andi (AT, result, markOopDesc::biased_lock_bit_in_place);
+      __ bne(AT,R0, slowCase);
+      __ delayed()->nop();
+    }
+    // get hash
+    __ li(AT, markOopDesc::hash_mask_in_place);
+    __ andr (AT, result, AT);
+    // test if hashCode exists
+    __ beq (AT, R0, slowCase);
+    __ delayed()->nop();
+    __ shr(result, markOopDesc::hash_shift);
+    __ jr(RA);
+    __ delayed()->nop();
+    __ bind (slowCase);
+  }
 #endif // COMPILER1
 
-	// The instruction at the verified entry point must be 5 bytes or longer
-	// because it can be patched on the fly by make_non_entrant. The stack bang
-	// instruction fits that requirement. 
-
-	// Generate stack overflow check
-
-	if (UseStackBanging) {
-	//this function will modify the value in A0	
-		__ push(A0);
-		__ bang_stack_with_offset(StackShadowPages*os::vm_page_size());
-		__ pop(A0);
-	} else {
-		// need a 5 byte instruction to allow MT safe patching to non-entrant
-		__ nop(); 
-		__ nop(); 
-		__ nop(); 
-		__ nop(); 
-		__ nop(); 
-	}
-	// Generate a new frame for the wrapper.
-	// do mips need this ? 
+  // The instruction at the verified entry point must be 5 bytes or longer
+  // because it can be patched on the fly by make_non_entrant. The stack bang
+  // instruction fits that requirement.
+
+  // Generate stack overflow check
+
+  if (UseStackBanging) {
+  //this function will modify the value in A0
+    __ push(A0);
+    __ bang_stack_with_offset(StackShadowPages*os::vm_page_size());
+    __ pop(A0);
+  } else {
+    // need a 5 byte instruction to allow MT safe patching to non-entrant
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+  }
+  // Generate a new frame for the wrapper.
+  // do mips need this ?
 #ifndef OPT_THREAD
-	__ get_thread(TREG);
+  __ get_thread(TREG);
 #endif
 //FIXME here
-	__ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
-	// -2 because return address is already present and so is saved ebp
-	__ move(AT, -(StackAlignmentInBytes));
-	__ andr(SP, SP, AT);
-
-	__ enter();
-	__ addiu(SP, SP, -1 * (stack_size - 2*wordSize));
-
-	// Frame is now completed as far a size and linkage.
-
-	int frame_complete = ((intptr_t)__ pc()) - start;
-
-	// Calculate the difference between esp and ebp. We need to know it
-	// after the native call because on windows Java Natives will pop
-	// the arguments and it is painful to do esp relative addressing
-	// in a platform independent way. So after the call we switch to
-	// ebp relative addressing.
-//FIXME actually , the fp_adjustment may not be the right, because andr(sp,sp,at)may change
-//the SP 
-	int fp_adjustment = stack_size - 2*wordSize;
+  __ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
+  // -2 because return address is already present and so is saved ebp
+  __ move(AT, -(StackAlignmentInBytes));
+  __ andr(SP, SP, AT);
+
+  __ enter();
+  __ addiu(SP, SP, -1 * (stack_size - 2*wordSize));
+
+  // Frame is now completed as far a size and linkage.
+
+  int frame_complete = ((intptr_t)__ pc()) - start;
+
+  // Calculate the difference between esp and ebp. We need to know it
+  // after the native call because on windows Java Natives will pop
+  // the arguments and it is painful to do esp relative addressing
+  // in a platform independent way. So after the call we switch to
+  // ebp relative addressing.
+  //FIXME actually , the fp_adjustment may not be the right, because andr(sp,sp,at)may change
+  //the SP 
+  int fp_adjustment = stack_size - 2*wordSize;
 
 #ifdef COMPILER2
-	// C2 may leave the stack dirty if not in SSE2+ mode
-	// if (UseSSE >= 2) {
-	//  __ verify_FPU(0, "c2i transition should have clean FPU stack");
-	//} else {
-	__ empty_FPU_stack();
-	//}
+  // C2 may leave the stack dirty if not in SSE2+ mode
+  __ empty_FPU_stack();
 #endif /* COMPILER2 */
 
-	// Compute the ebp offset for any slots used after the jni call
-
-	int lock_slot_ebp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment;
-	// We use edi as a thread pointer because it is callee save and
-	// if we load it once it is usable thru the entire wrapper
-	// const Register thread = edi;
-	const Register thread = TREG;
-
-	// We use esi as the oop handle for the receiver/klass
-	// It is callee save so it survives the call to native
-
-	// const Register oop_handle_reg = esi;
-	const Register oop_handle_reg = S4;
+  // Compute the ebp offset for any slots used after the jni call
+
+  int lock_slot_ebp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment;
+  // We use edi as a thread pointer because it is callee save and
+  // if we load it once it is usable thru the entire wrapper
+  // const Register thread = edi;
+  const Register thread = TREG;
+
+  // We use esi as the oop handle for the receiver/klass
+  // It is callee save so it survives the call to native
+
+  // const Register oop_handle_reg = esi;
+  const Register oop_handle_reg = S4;
   if (is_critical_native) {
      __ stop("generate_native_wrapper in sharedRuntime <2>");
 //TODO:Fu
@@ -2285,7 +1832,7 @@
   }
 
 #ifndef OPT_THREAD
-	__ get_thread(thread);
+  __ get_thread(thread);
 #endif
 
   //
@@ -2295,7 +1842,7 @@
   // them.
 
   // -----------------
-  // The Grand Shuffle 
+  // The Grand Shuffle
   //
   // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
   // and, if static, the class mirror instead of a receiver.  This pretty much
@@ -2308,18 +1855,18 @@
   // vectors we have in our possession. We simply walk the java vector to
   // get the source locations and the c vector to get the destinations.
 
-	int c_arg = method->is_static() ? 2 : 1 ;
-
-	// Record esp-based slot for receiver on stack for non-static methods
-	int receiver_offset = -1;
-
-	// This is a trick. We double the stack slots so we can claim
-	// the oops in the caller's frame. Since we are sure to have
-	// more args than the caller doubling is enough to make
-	// sure we can capture all the incoming oop args from the
-	// caller. 
-	//
-	OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
+  int c_arg = method->is_static() ? 2 : 1 ;
+
+  // Record esp-based slot for receiver on stack for non-static methods
+  int receiver_offset = -1;
+
+  // This is a trick. We double the stack slots so we can claim
+  // the oops in the caller's frame. Since we are sure to have
+  // more args than the caller doubling is enough to make
+  // sure we can capture all the incoming oop args from the
+  // caller.
+  //
+  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
 
   // Mark location of rbp (someday)
   // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(rbp));
@@ -2341,9 +1888,9 @@
 
 #endif /* ASSERT */
 
-	// We know that we only have args in at most two integer registers (ecx, edx). So eax, ebx
-	// Are free to temporaries if we have to do  stack to steck moves.
-	// All inbound args are referenced based on ebp and all outbound args via esp.
+  // We know that we only have args in at most two integer registers (ecx, edx). So eax, ebx
+  // Are free to temporaries if we have to do  stack to steck moves.
+  // All inbound args are referenced based on ebp and all outbound args via esp.
 
   // This may iterate in two different directions depending on the
   // kind of native it is.  The reason is that for regular JNI natives
@@ -2351,7 +1898,6 @@
   // critical natives they are offset down.
   GrowableArray<int> arg_order(2 * total_in_args);
   VMRegPair tmp_vmreg;
-//  tmp_vmreg.set1(rbx->as_VMReg());
   tmp_vmreg.set1(T8->as_VMReg());
 
   if (!is_critical_native) {
@@ -2400,7 +1946,7 @@
     switch (in_sig_bt[i]) {
       case T_ARRAY:
         if (is_critical_native) {
-	  __ stop("generate_native_wrapper in sharedRuntime <2>");
+    __ stop("generate_native_wrapper in sharedRuntime <2>");
          //TODO:Fu
          // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
           c_arg++;
@@ -2440,7 +1986,6 @@
       case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
 
       default:
-//        move32_64(masm, in_regs[i], out_regs[c_arg]);
         simple_move32(masm, in_regs[i], out_regs[c_arg]);
     }
   }
@@ -2448,397 +1993,359 @@
   // point c_arg at the first arg that is already loaded in case we
   // need to spill before we call out
    c_arg = total_c_args - total_in_args;
-	// Pre-load a static method's oop into esi.  Used both by locking code and
-	// the normal JNI call code.
-	
-	__ move(oop_handle_reg, A1);
-
-	if (method->is_static() && !is_critical_native) {
-
-		//  load opp into a register
-		int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local(
-					(method->method_holder())->java_mirror()));
-
-		
-		RelocationHolder rspec = oop_Relocation::spec(oop_index);
-		__ relocate(rspec);
-		//__ lui(oop_handle_reg, Assembler::split_high((int)JNIHandles::make_local(
-		//	Klass::cast(method->method_holder())->java_mirror())));
-		//__ addiu(oop_handle_reg, oop_handle_reg, Assembler::split_low((int)
-		//    JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror())));
-		__ patchable_set48(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror()));
-	//	__ verify_oop(oop_handle_reg);
-		// Now handlize the static class mirror it's known not-null.
-		__ sd( oop_handle_reg, SP, klass_offset); 
-		map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
-		
-		// Now get the handle
-		__ lea(oop_handle_reg, Address(SP, klass_offset));
-		// store the klass handle as second argument
-		__ move(A1, oop_handle_reg);
-                // and protect the arg if we must spill
-                c_arg--;
-	}
+  // Pre-load a static method's oop into esi.  Used both by locking code and
+  // the normal JNI call code.
+
+  __ move(oop_handle_reg, A1);
+
+  if (method->is_static() && !is_critical_native) {
+
+    //  load opp into a register
+    int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local(
+          (method->method_holder())->java_mirror()));
+
+
+    RelocationHolder rspec = oop_Relocation::spec(oop_index);
+    __ relocate(rspec);
+    __ patchable_set48(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror()));
+    // Now handlize the static class mirror it's known not-null.
+    __ sd( oop_handle_reg, SP, klass_offset);
+    map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
+
+    // Now get the handle
+    __ lea(oop_handle_reg, Address(SP, klass_offset));
+    // store the klass handle as second argument
+    __ move(A1, oop_handle_reg);
+    // and protect the arg if we must spill
+    c_arg--;
+  }
+
   // Change state to native (we save the return address in the thread, since it might not
   // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
   // points into the right code segment. It does not have to be the correct return pc.
   // We use the same pc/oopMap repeatedly when we call out
 
-	intptr_t the_pc = (intptr_t) __ pc();
-	
-	oop_maps->add_gc_map(the_pc - start, map);
-
-	//__ set_last_Java_frame(thread, esp, noreg, (address)the_pc);
-	__ set_last_Java_frame(SP, noreg, NULL);
-	__ relocate(relocInfo::internal_pc_type); 
-	{	
-		intptr_t save_pc = (intptr_t)the_pc ;
-		__ patchable_set48(AT, save_pc);
-	}
-	__ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
- 
-
-	// We have all of the arguments setup at this point. We must not touch any register
-	// argument registers at this point (what if we save/restore them there are no oop?
-	{ 
-		SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
-		int metadata_index = __ oop_recorder()->find_index(method());
-		RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
-		__ relocate(rspec);
-		//__ lui(T6, Assembler::split_high((int)JNIHandles::make_local(method())));
-		//__ addiu(T6, T6, Assembler::split_low((int)JNIHandles::make_local(method())));
-		__ patchable_set48(AT, (long)(method()));
-
-		__ call_VM_leaf(
-				CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), 
-		   thread, AT); 
-
-	}
-
-  // These are register definitions we need for locking/unlocking 
-//  const Register swap_reg = eax;  // Must use eax for cmpxchg instruction
-//  const Register obj_reg  = ecx;  // Will contain the oop
- // const Register lock_reg = edx;  // Address of compiler lock object (BasicLock)
-//FIXME, I hava no idea which register to use
-	const Register swap_reg = T8;  // Must use eax for cmpxchg instruction
-	const Register obj_reg  = T9;  // Will contain the oop
-	//const Register lock_reg = T6;  // Address of compiler lock object (BasicLock)
-	const Register lock_reg = c_rarg0;  // Address of compiler lock object (BasicLock)
-
-
-
-	Label slow_path_lock;
-	Label lock_done;
-
-	// Lock a synchronized method
-	if (method->is_synchronized()) {
-                assert(!is_critical_native, "unhandled");
-
-		const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
-
-		// Get the handle (the 2nd argument)
-		__ move(oop_handle_reg, A1);
-
-		// Get address of the box
-		__ lea(lock_reg, Address(FP, lock_slot_ebp_offset));
-
-		// Load the oop from the handle 
-		__ ld(obj_reg, oop_handle_reg, 0);
-
-		if (UseBiasedLocking) {
-			// Note that oop_handle_reg is trashed during this call
-		__ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, 
-				false, lock_done, &slow_path_lock);
-		}
-
-		// Load immediate 1 into swap_reg %eax
-		__ move(swap_reg, 1);
-
-		__ ld(AT, obj_reg, 0);   
-		__ orr(swap_reg, swap_reg, AT); 
-
-		__ sd( swap_reg, lock_reg, mark_word_offset);
-		__ cmpxchg(lock_reg, Address(obj_reg, 0), swap_reg);
-		__ bne(AT, R0, lock_done);
-		__ delayed()->nop(); 
-		// Test if the oopMark is an obvious stack pointer, i.e.,
-		//  1) (mark & 3) == 0, and
-		//  2) esp <= mark < mark + os::pagesize()
-		// These 3 tests can be done by evaluating the following
-		// expression: ((mark - esp) & (3 - os::vm_page_size())),
-		// assuming both stack pointer and pagesize have their
-		// least significant 2 bits clear.
-		// NOTE: the oopMark is in swap_reg %eax as the result of cmpxchg
-
-		__ dsub(swap_reg, swap_reg,SP);
- 		__ move(AT, 3 - os::vm_page_size());
-		__ andr(swap_reg , swap_reg, AT);
-		// Save the test result, for recursive case, the result is zero
-		__ sd(swap_reg, lock_reg, mark_word_offset); 
-	//FIXME here, Why notEqual? 	
-		__ bne(swap_reg,R0, slow_path_lock);
-		__ delayed()->nop();  
-		// Slow path will re-enter here
-		__ bind(lock_done);
-
-		if (UseBiasedLocking) {
-			// Re-fetch oop_handle_reg as we trashed it above
-			__ move(A1, oop_handle_reg);
-		}
-	}
-
-
-	// Finally just about ready to make the JNI call
-
-
-	// get JNIEnv* which is first argument to native
+  intptr_t the_pc = (intptr_t) __ pc();
+  oop_maps->add_gc_map(the_pc - start, map);
+
+  __ set_last_Java_frame(SP, noreg, NULL);
+  __ relocate(relocInfo::internal_pc_type);
+  {
+    intptr_t save_pc = (intptr_t)the_pc ;
+    __ patchable_set48(AT, save_pc);
+  }
+  __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
+
+
+  // We have all of the arguments setup at this point. We must not touch any register
+  // argument registers at this point (what if we save/restore them there are no oop?
+  {
+    SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
+    int metadata_index = __ oop_recorder()->find_index(method());
+    RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
+    __ relocate(rspec);
+    __ patchable_set48(AT, (long)(method()));
+
+    __ call_VM_leaf(
+      CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
+      thread, AT);
+
+  }
+
+  // These are register definitions we need for locking/unlocking
+  const Register swap_reg = T8;  // Must use eax for cmpxchg instruction
+  const Register obj_reg  = T9;  // Will contain the oop
+  //const Register lock_reg = T6;  // Address of compiler lock object (BasicLock)
+  const Register lock_reg = c_rarg0;  // Address of compiler lock object (BasicLock)
+
+
+
+  Label slow_path_lock;
+  Label lock_done;
+
+  // Lock a synchronized method
+  if (method->is_synchronized()) {
+    assert(!is_critical_native, "unhandled");
+
+    const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
+
+    // Get the handle (the 2nd argument)
+    __ move(oop_handle_reg, A1);
+
+    // Get address of the box
+    __ lea(lock_reg, Address(FP, lock_slot_ebp_offset));
+
+    // Load the oop from the handle
+    __ ld(obj_reg, oop_handle_reg, 0);
+
+    if (UseBiasedLocking) {
+      // Note that oop_handle_reg is trashed during this call
+      __ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, false, lock_done, &slow_path_lock);
+    }
+
+    // Load immediate 1 into swap_reg %eax
+    __ move(swap_reg, 1);
+
+    __ ld(AT, obj_reg, 0);
+    __ orr(swap_reg, swap_reg, AT);
+
+    __ sd( swap_reg, lock_reg, mark_word_offset);
+    __ cmpxchg(lock_reg, Address(obj_reg, 0), swap_reg);
+    __ bne(AT, R0, lock_done);
+    __ delayed()->nop();
+    // Test if the oopMark is an obvious stack pointer, i.e.,
+    //  1) (mark & 3) == 0, and
+    //  2) esp <= mark < mark + os::pagesize()
+    // These 3 tests can be done by evaluating the following
+    // expression: ((mark - esp) & (3 - os::vm_page_size())),
+    // assuming both stack pointer and pagesize have their
+    // least significant 2 bits clear.
+    // NOTE: the oopMark is in swap_reg %eax as the result of cmpxchg
+
+    __ dsub(swap_reg, swap_reg,SP);
+     __ move(AT, 3 - os::vm_page_size());
+    __ andr(swap_reg , swap_reg, AT);
+    // Save the test result, for recursive case, the result is zero
+    __ sd(swap_reg, lock_reg, mark_word_offset);
+  //FIXME here, Why notEqual?
+    __ bne(swap_reg,R0, slow_path_lock);
+    __ delayed()->nop();
+    // Slow path will re-enter here
+    __ bind(lock_done);
+
+    if (UseBiasedLocking) {
+      // Re-fetch oop_handle_reg as we trashed it above
+      __ move(A1, oop_handle_reg);
+    }
+  }
+
+
+  // Finally just about ready to make the JNI call
+
+
+  // get JNIEnv* which is first argument to native
   if (!is_critical_native) {
-	__ addi(A0, thread, in_bytes(JavaThread::jni_environment_offset()));
+  __ addi(A0, thread, in_bytes(JavaThread::jni_environment_offset()));
   }
 
-	// Example: Java_java_lang_ref_Finalizer_invokeFinalizeMethod(JNIEnv *env, jclass clazz, jobject ob)
-	/* Load the second arguments into A1 */
-	//__ ld(A1, SP , wordSize ); 	// klass
-
-	// Now set thread in native
-	__ addi(AT, R0, _thread_in_native); 
-	__ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); 
-	/* Jin: do the call */
-	__ call(method->native_function(), relocInfo::runtime_call_type);
-	__ delayed()->nop();
-	// WARNING - on Windows Java Natives use pascal calling convention and pop the
-	// arguments off of the stack. We could just re-adjust the stack pointer here
-	// and continue to do SP relative addressing but we instead switch to FP
-	// relative addressing.
-
-	// Unpack native results.  
-	switch (ret_type) {
-	case T_BOOLEAN: __ c2bool(V0);            break;
-	case T_CHAR   : __ andi(V0,V0, 0xFFFF);      break;
-	case T_BYTE   : __ sign_extend_byte (V0); break;
-	case T_SHORT  : __ sign_extend_short(V0); break;
-	case T_INT    : // nothing to do         break;
-	case T_DOUBLE :
-	case T_FLOAT  :
-	// Result is in st0 we'll save as needed
-	break;
-	case T_ARRAY:                 // Really a handle
-	case T_OBJECT:                // Really a handle
-	break; // can't de-handlize until after safepoint check
-	case T_VOID: break;
-	case T_LONG: break;
-	default       : ShouldNotReachHere();
-	}
-	// Switch thread to "native transition" state before reading the synchronization state.
-	// This additional state is necessary because reading and testing the synchronization
-	// state is not atomic w.r.t. GC, as this scenario demonstrates:
-	//     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
-	//     VM thread changes sync state to synchronizing and suspends threads for GC.
-	//     Thread A is resumed to finish this native method, but doesn't block here since it
-	//     didn't see any synchronization is progress, and escapes.
-	// __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans);    
-	//__ sw(_thread_in_native_trans, thread, JavaThread::thread_state_offset());    
-	//   __ move(AT, (int)_thread_in_native_trans);
-	__ addi(AT, R0, _thread_in_native_trans); 
-	__ sw(AT, thread, in_bytes(JavaThread::thread_state_offset()));    
-	
+  // Example: Java_java_lang_ref_Finalizer_invokeFinalizeMethod(JNIEnv *env, jclass clazz, jobject ob)
+  /* Load the second arguments into A1 */
+  //__ ld(A1, SP , wordSize );   // klass
+
+  // Now set thread in native
+  __ addi(AT, R0, _thread_in_native);
+  __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset()));
+  /* Jin: do the call */
+  __ call(method->native_function(), relocInfo::runtime_call_type);
+  __ delayed()->nop();
+  // WARNING - on Windows Java Natives use pascal calling convention and pop the
+  // arguments off of the stack. We could just re-adjust the stack pointer here
+  // and continue to do SP relative addressing but we instead switch to FP
+  // relative addressing.
+
+  // Unpack native results.
+  switch (ret_type) {
+  case T_BOOLEAN: __ c2bool(V0);            break;
+  case T_CHAR   : __ andi(V0,V0, 0xFFFF);      break;
+  case T_BYTE   : __ sign_extend_byte (V0); break;
+  case T_SHORT  : __ sign_extend_short(V0); break;
+  case T_INT    : // nothing to do         break;
+  case T_DOUBLE :
+  case T_FLOAT  :
+  // Result is in st0 we'll save as needed
+  break;
+  case T_ARRAY:                 // Really a handle
+  case T_OBJECT:                // Really a handle
+  break; // can't de-handlize until after safepoint check
+  case T_VOID: break;
+  case T_LONG: break;
+  default       : ShouldNotReachHere();
+  }
+  // Switch thread to "native transition" state before reading the synchronization state.
+  // This additional state is necessary because reading and testing the synchronization
+  // state is not atomic w.r.t. GC, as this scenario demonstrates:
+  //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
+  //     VM thread changes sync state to synchronizing and suspends threads for GC.
+  //     Thread A is resumed to finish this native method, but doesn't block here since it
+  //     didn't see any synchronization is progress, and escapes.
+  // __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans);
+  //__ sw(_thread_in_native_trans, thread, JavaThread::thread_state_offset());
+  //   __ move(AT, (int)_thread_in_native_trans);
+  __ addi(AT, R0, _thread_in_native_trans);
+  __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset()));
+
   Label after_transition;
 
-	// check for safepoint operation in progress and/or pending suspend requests
-	{ Label Continue;
+  // check for safepoint operation in progress and/or pending suspend requests
+  { Label Continue;
 //FIXME here, which regiser should we use?
-		//        SafepointSynchronize::_not_synchronized);
-		__ li(AT, SafepointSynchronize::address_of_state());
-		__ lw(A0, AT, 0);	
-		__ addi(AT, A0, -SafepointSynchronize::_not_synchronized); 
-		Label L;
-		__ bne(AT,R0, L); 
-		__ delayed()->nop();	
-		__ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); 
-		__ beq(AT, R0, Continue); 
-		__ delayed()->nop(); 
-		__ bind(L);
-
-		// Don't use call_VM as it will see a possible pending exception and forward it
-		// and never return here preventing us from clearing _last_native_pc down below.
-		// Also can't use call_VM_leaf either as it will check to see if esi & edi are
-		// preserved and correspond to the bcp/locals pointers. So we do a runtime call
-		// by hand.
-		//
-		save_native_result(masm, ret_type, stack_slots);
-		__ move (A0, thread); 
-		__ addi(SP,SP, -wordSize); 
+    //        SafepointSynchronize::_not_synchronized);
+    __ li(AT, SafepointSynchronize::address_of_state());
+    __ lw(A0, AT, 0);
+    __ addi(AT, A0, -SafepointSynchronize::_not_synchronized);
+    Label L;
+    __ bne(AT,R0, L);
+    __ delayed()->nop();
+    __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset()));
+    __ beq(AT, R0, Continue);
+    __ delayed()->nop();
+    __ bind(L);
+
+    // Don't use call_VM as it will see a possible pending exception and forward it
+    // and never return here preventing us from clearing _last_native_pc down below.
+    // Also can't use call_VM_leaf either as it will check to see if esi & edi are
+    // preserved and correspond to the bcp/locals pointers. So we do a runtime call
+    // by hand.
+    //
+    save_native_result(masm, ret_type, stack_slots);
+    __ move (A0, thread);
+    __ addi(SP,SP, -wordSize);
     __ push(S2);
     __ move(AT, -(StackAlignmentInBytes));
     __ move(S2, SP);     // use S2 as a sender SP holder
     __ andr(SP, SP, AT); // align stack as required by ABI
     if (!is_critical_native) {
       __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type);
-      __ delayed()->nop(); 
+      __ delayed()->nop();
     } else {
       __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), relocInfo::runtime_call_type);
-      __ delayed()->nop(); 
+      __ delayed()->nop();
     }
     __ move(SP, S2);     // use S2 as a sender SP holder
     __ pop(S2);
-		__ addi(SP,SP, wordSize); 
-		//add for compressedoops
-		__ reinit_heapbase();
-		// Restore any method result value
-		restore_native_result(masm, ret_type, stack_slots);
+    __ addi(SP,SP, wordSize);
+    //add for compressedoops
+    __ reinit_heapbase();
+    // Restore any method result value
+    restore_native_result(masm, ret_type, stack_slots);
 
     if (is_critical_native) {
       // The call above performed the transition to thread_in_Java so
       // skip the transition logic below.
       __ beq(R0, R0, after_transition);
-      __ delayed()->nop(); 
+      __ delayed()->nop();
     }
 
-		__ bind(Continue);
-	}
-
-	// change thread state
-	__ addi(AT, R0, _thread_in_Java); 
-	__ sw(AT,  thread, in_bytes(JavaThread::thread_state_offset())); 
+    __ bind(Continue);
+  }
+
+  // change thread state
+  __ addi(AT, R0, _thread_in_Java);
+  __ sw(AT,  thread, in_bytes(JavaThread::thread_state_offset()));
   __ bind(after_transition);
-	Label reguard;
-	Label reguard_done;
-	__ lw(AT, thread, in_bytes(JavaThread::stack_guard_state_offset())); 
-	__ addi(AT, AT, -JavaThread::stack_guard_yellow_disabled); 
-	__ beq(AT, R0, reguard);
-	__ delayed()->nop();  
-	// slow path reguard  re-enters here
-	__ bind(reguard_done);
-
-	// Handle possible exception (will unlock if necessary)
-
-	// native result if any is live 
-
-	// Unlock
-	Label slow_path_unlock;
-	Label unlock_done;
-	if (method->is_synchronized()) {
-
-		Label done;
-
-		// Get locked oop from the handle we passed to jni
-		__ ld( obj_reg, oop_handle_reg, 0);
-		//FIXME 
-		if (UseBiasedLocking) {
-			__ biased_locking_exit(obj_reg, T8, done);
-
-		}
-
-		// Simple recursive lock?
-
-		__ ld(AT, FP, lock_slot_ebp_offset); 
-		__ beq(AT, R0, done);
-		__ delayed()->nop();	
-		// Must save eax if if it is live now because cmpxchg must use it
-		if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
-			save_native_result(masm, ret_type, stack_slots);
-		}
-
-		//  get old displaced header
-		__ ld (T8, FP, lock_slot_ebp_offset);
-		// get address of the stack lock
-		//FIXME aoqi
-		//__ addi (T6, FP, lock_slot_ebp_offset);
-		__ addi (c_rarg0, FP, lock_slot_ebp_offset);
-		// Atomic swap old header if oop still contains the stack lock
-		//FIXME aoqi
-		//__ cmpxchg(T8, Address(obj_reg, 0),T6 );
-		__ cmpxchg(T8, Address(obj_reg, 0), c_rarg0);
-
-		__ beq(AT, R0, slow_path_unlock);
-		__ delayed()->nop(); 
-		// slow path re-enters here
-		__ bind(unlock_done);
-		if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
-			restore_native_result(masm, ret_type, stack_slots);
-		}
-
-		__ bind(done);
-
-	}
-	{ 
-		SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
-		// Tell dtrace about this method exit
-		save_native_result(masm, ret_type, stack_slots);
-		int metadata_index = __ oop_recorder()->find_index( (method()));
-		RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
-		__ relocate(rspec);
-		//__ lui(T6, Assembler::split_high((int)JNIHandles::make_local(method())));
-		//__ addiu(T6, T6, Assembler::split_low((int)JNIHandles::make_local(method())));
-		__ patchable_set48(AT, (long)(method()));
-
-		__ call_VM_leaf(
-				CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), 
-				thread, AT);
-		restore_native_result(masm, ret_type, stack_slots);
-	}
-
-	// We can finally stop using that last_Java_frame we setup ages ago
-
-	__ reset_last_Java_frame(false, true);
-
-	// Unpack oop result
-	if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
-		Label L;
-		//  __ cmpl(eax, NULL_WORD);
-		//  __ jcc(Assembler::equal, L);
-		__ beq(V0, R0,L ); 
-		__ delayed()->nop(); 
-		//  __ movl(eax, Address(eax));
-		__ ld(V0, V0, 0);	
-		__ bind(L);
-		// __ verify_oop(eax);
-		__ verify_oop(V0);
-	}
+  Label reguard;
+  Label reguard_done;
+  __ lw(AT, thread, in_bytes(JavaThread::stack_guard_state_offset()));
+  __ addi(AT, AT, -JavaThread::stack_guard_yellow_disabled);
+  __ beq(AT, R0, reguard);
+  __ delayed()->nop();
+  // slow path reguard  re-enters here
+  __ bind(reguard_done);
+
+  // Handle possible exception (will unlock if necessary)
+
+  // native result if any is live
+
+  // Unlock
+  Label slow_path_unlock;
+  Label unlock_done;
+  if (method->is_synchronized()) {
+
+    Label done;
+
+    // Get locked oop from the handle we passed to jni
+    __ ld( obj_reg, oop_handle_reg, 0);
+    //FIXME
+    if (UseBiasedLocking) {
+      __ biased_locking_exit(obj_reg, T8, done);
+
+    }
+
+    // Simple recursive lock?
+
+    __ ld(AT, FP, lock_slot_ebp_offset);
+    __ beq(AT, R0, done);
+    __ delayed()->nop();
+    // Must save eax if if it is live now because cmpxchg must use it
+    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
+      save_native_result(masm, ret_type, stack_slots);
+    }
+
+    //  get old displaced header
+    __ ld (T8, FP, lock_slot_ebp_offset);
+    // get address of the stack lock
+    __ addi (c_rarg0, FP, lock_slot_ebp_offset);
+    // Atomic swap old header if oop still contains the stack lock
+    __ cmpxchg(T8, Address(obj_reg, 0), c_rarg0);
+
+    __ beq(AT, R0, slow_path_unlock);
+    __ delayed()->nop();
+    // slow path re-enters here
+    __ bind(unlock_done);
+    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
+      restore_native_result(masm, ret_type, stack_slots);
+    }
+
+    __ bind(done);
+
+  }
+  {
+    SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
+    // Tell dtrace about this method exit
+    save_native_result(masm, ret_type, stack_slots);
+    int metadata_index = __ oop_recorder()->find_index( (method()));
+    RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
+    __ relocate(rspec);
+    __ patchable_set48(AT, (long)(method()));
+
+    __ call_VM_leaf(
+         CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
+         thread, AT);
+    restore_native_result(masm, ret_type, stack_slots);
+  }
+
+  // We can finally stop using that last_Java_frame we setup ages ago
+
+  __ reset_last_Java_frame(false, true);
+
+  // Unpack oop result
+  if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
+    Label L;
+    __ beq(V0, R0,L );
+    __ delayed()->nop();
+    __ ld(V0, V0, 0);
+    __ bind(L);
+    __ verify_oop(V0);
+  }
 
   if (!is_critical_native) {
-	// reset handle block
-	__ ld(AT, thread, in_bytes(JavaThread::active_handles_offset()));
-	__ sw(R0, AT, JNIHandleBlock::top_offset_in_bytes()); 
+  // reset handle block
+  __ ld(AT, thread, in_bytes(JavaThread::active_handles_offset()));
+  __ sw(R0, AT, JNIHandleBlock::top_offset_in_bytes());
   }
 
   if (!is_critical_native) {
-	// Any exception pending?
-	__ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); 
-
-	__ bne(AT, R0, exception_pending);
-	__ delayed()->nop();
+    // Any exception pending?
+    __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
+
+    __ bne(AT, R0, exception_pending);
+    __ delayed()->nop();
   }
-	// no exception, we're almost done
-
-	// check that only result value is on FPU stack
-	__ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit");
-
-  // Fixup floating pointer results so that result looks like a return from a compiled method
-/*  if (ret_type == T_FLOAT) {
-    if (UseSSE >= 1) {
-      // Pop st0 and store as float and reload into xmm register
-      __ fstp_s(Address(ebp, -4));
-      __ movss(xmm0, Address(ebp, -4));
-    }
-  } else if (ret_type == T_DOUBLE) {
-    if (UseSSE >= 2) {
-      // Pop st0 and store as double and reload into xmm register
-      __ fstp_d(Address(ebp, -8));
-      __ movsd(xmm0, Address(ebp, -8));
-    }
-  }
-*/
+  // no exception, we're almost done
+
+  // check that only result value is on FPU stack
+  __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit");
+
   // Return
 #ifndef OPT_THREAD
-       __ get_thread(TREG);
+  __ get_thread(TREG);
 #endif
-	__ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
-	__ leave();
-
-	__ jr(RA);
-	__ delayed()->nop(); 
-	// Unexpected paths are out of line and go here
+  __ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
+  __ leave();
+
+  __ jr(RA);
+  __ delayed()->nop();
+  // Unexpected paths are out of line and go here
 /*
   if (!is_critical_native) {
     // forward the exception
@@ -2848,152 +2355,147 @@
     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
   }
 */
-	// Slow path locking & unlocking
-	if (method->is_synchronized()) {
-
-		// BEGIN Slow path lock
-
-		__ bind(slow_path_lock);
-
-                // protect the args we've loaded
-                save_args(masm, total_c_args, c_arg, out_regs);
-
-		// has last_Java_frame setup. No exceptions so do vanilla call not call_VM
-		// args are (oop obj, BasicLock* lock, JavaThread* thread)
-		
-		__ move(A0, obj_reg); 
-		__ move(A1, lock_reg); 
-		__ move(A2, thread); 
-		__ addi(SP, SP, - 3*wordSize); 
+  // Slow path locking & unlocking
+  if (method->is_synchronized()) {
+
+    // BEGIN Slow path lock
+    __ bind(slow_path_lock);
+
+    // protect the args we've loaded
+    save_args(masm, total_c_args, c_arg, out_regs);
+
+    // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
+    // args are (oop obj, BasicLock* lock, JavaThread* thread)
+
+    __ move(A0, obj_reg);
+    __ move(A1, lock_reg);
+    __ move(A2, thread);
+    __ addi(SP, SP, - 3*wordSize);
+
+    __ move(AT, -(StackAlignmentInBytes));
+    __ move(S2, SP);     // use S2 as a sender SP holder
+    __ andr(SP, SP, AT); // align stack as required by ABI
+
+    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type);
+    __ delayed()->nop();
+                __ move(SP, S2);
+    __ addi(SP, SP, 3*wordSize);
+
+    restore_args(masm, total_c_args, c_arg, out_regs);
+
+#ifdef ASSERT
+    { Label L;
+      __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
+      __ beq(AT, R0, L);
+      __ delayed()->nop();
+      __ stop("no pending exception allowed on exit from monitorenter");
+      __ bind(L);
+    }
+#endif
+    __ b(lock_done);
+    __ delayed()->nop();
+    // END Slow path lock
+
+    // BEGIN Slow path unlock
+    __ bind(slow_path_unlock);
+
+    // Slow path unlock
+
+    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
+      save_native_result(masm, ret_type, stack_slots);
+    }
+    // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
+
+    __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
+    __ push(AT);
+    __ sd(R0, thread, in_bytes(Thread::pending_exception_offset()));
 
                 __ move(AT, -(StackAlignmentInBytes));
                 __ move(S2, SP);     // use S2 as a sender SP holder
                 __ andr(SP, SP, AT); // align stack as required by ABI
 
-		__ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type);
-		__ delayed()->nop();
+    // should be a peal
+    // +wordSize because of the push above
+    __ addi(A1, FP, lock_slot_ebp_offset);
+
+    __ move(A0, obj_reg);
+    __ addi(SP,SP, -2*wordSize);
+    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C),
+        relocInfo::runtime_call_type);
+    __ delayed()->nop();
+    __ addi(SP,SP, 2*wordSize);
                 __ move(SP, S2);
-		__ addi(SP, SP, 3*wordSize); 
-
-                restore_args(masm, total_c_args, c_arg, out_regs);
-
+    //add for compressedoops
+    __ reinit_heapbase();
 #ifdef ASSERT
-		{ Label L;
-			// __ cmpl(Address(thread, in_bytes(Thread::pending_exception_offset())), (int)NULL_WORD);
-			__ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); 
-			//__ jcc(Assembler::equal, L);
-			__ beq(AT, R0, L); 
-			__ delayed()->nop(); 
-			__ stop("no pending exception allowed on exit from monitorenter");
-			__ bind(L);
-		}
-#endif
-		__ b(lock_done);
-		__ delayed()->nop();
-		// END Slow path lock
-
-		// BEGIN Slow path unlock
-		__ bind(slow_path_unlock);
-
-		// Slow path unlock
-
-		if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
-			save_native_result(masm, ret_type, stack_slots);
-		}
-		// Save pending exception around call to VM (which contains an EXCEPTION_MARK)
-
-		__ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); 
-		__ push(AT); 
-		__ sd(R0, thread, in_bytes(Thread::pending_exception_offset()));
-
-                __ move(AT, -(StackAlignmentInBytes));
-                __ move(S2, SP);     // use S2 as a sender SP holder
-                __ andr(SP, SP, AT); // align stack as required by ABI
-
-		// should be a peal
-		// +wordSize because of the push above
-		__ addi(A1, FP, lock_slot_ebp_offset);
-
-		__ move(A0, obj_reg); 
-		__ addi(SP,SP, -2*wordSize);
-		__ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C),
-				relocInfo::runtime_call_type);
-		__ delayed()->nop(); 
-		__ addi(SP,SP, 2*wordSize);
-                __ move(SP, S2);
-		//add for compressedoops
-		__ reinit_heapbase();
-#ifdef ASSERT
-		{
-			Label L;
-			//    __ cmpl(Address(thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD);
-			__ lw( AT, thread, in_bytes(Thread::pending_exception_offset())); 
-			//__ jcc(Assembler::equal, L);
-			__ beq(AT, R0, L); 
-			__ delayed()->nop(); 
-			__ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
-			__ bind(L);
-		}
+    {
+      Label L;
+      __ lw( AT, thread, in_bytes(Thread::pending_exception_offset()));
+      __ beq(AT, R0, L);
+      __ delayed()->nop();
+      __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
+      __ bind(L);
+    }
 #endif /* ASSERT */
 
-		__ pop(AT); 
-		__ sd(AT, thread, in_bytes(Thread::pending_exception_offset()));
-		if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
-			restore_native_result(masm, ret_type, stack_slots);
-		}
-		__ b(unlock_done);
-		__ delayed()->nop(); 
-		// END Slow path unlock
-
-	}
-
-	// SLOW PATH Reguard the stack if needed
-
-	__ bind(reguard);
-	save_native_result(masm, ret_type, stack_slots);
-	__ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), 
-			relocInfo::runtime_call_type);
-	__ delayed()->nop();	
-	//add for compressedoops
-	__ reinit_heapbase();
-	restore_native_result(masm, ret_type, stack_slots);
-	__ b(reguard_done);
-	__ delayed()->nop();
-
-	// BEGIN EXCEPTION PROCESSING
-    if (!is_critical_native) {
-	// Forward  the exception
-	__ bind(exception_pending);
-
-	// remove possible return value from FPU register stack
-	__ empty_FPU_stack();
-
-	// pop our frame
- //forward_exception_entry need return address on stack
-        __ addiu(SP, FP, wordSize);
-	__ ld(FP, SP, (-1) * wordSize);
-
-	// and forward the exception
-	__ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
-	__ delayed()->nop();
+    __ pop(AT);
+    __ sd(AT, thread, in_bytes(Thread::pending_exception_offset()));
+    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
+      restore_native_result(masm, ret_type, stack_slots);
     }
-	__ flush();
-
-	nmethod *nm = nmethod::new_native_nmethod(method,
-                        compile_id,
-			masm->code(),
-			vep_offset,
-			frame_complete,
-			stack_slots / VMRegImpl::slots_per_word,
-			(is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
-			in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
-			oop_maps);
+    __ b(unlock_done);
+    __ delayed()->nop();
+    // END Slow path unlock
+
+  }
+
+  // SLOW PATH Reguard the stack if needed
+
+  __ bind(reguard);
+  save_native_result(masm, ret_type, stack_slots);
+  __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages),
+      relocInfo::runtime_call_type);
+  __ delayed()->nop();
+  //add for compressedoops
+  __ reinit_heapbase();
+  restore_native_result(masm, ret_type, stack_slots);
+  __ b(reguard_done);
+  __ delayed()->nop();
+
+  // BEGIN EXCEPTION PROCESSING
+  if (!is_critical_native) {
+    // Forward  the exception
+    __ bind(exception_pending);
+
+    // remove possible return value from FPU register stack
+    __ empty_FPU_stack();
+
+    // pop our frame
+   //forward_exception_entry need return address on stack
+          __ addiu(SP, FP, wordSize);
+    __ ld(FP, SP, (-1) * wordSize);
+
+    // and forward the exception
+    __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
+    __ delayed()->nop();
+  }
+  __ flush();
+
+  nmethod *nm = nmethod::new_native_nmethod(method,
+                                            compile_id,
+                                            masm->code(),
+                                            vep_offset,
+                                            frame_complete,
+                                            stack_slots / VMRegImpl::slots_per_word,
+                                            (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
+                                            in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
+                                            oop_maps);
 
   if (is_critical_native) {
     nm->set_lazy_critical_native(true);
   }
-	return nm;
-
+
+  return nm;
 
 }
 
@@ -3027,8 +2529,8 @@
 }
 
 
-nmethod *SharedRuntime::generate_dtrace_nmethod(
-    MacroAssembler *masm, methodHandle method) {
+nmethod *SharedRuntime::generate_dtrace_nmethod(MacroAssembler *masm,
+                                                methodHandle method) {
 
 
   // generate_dtrace_nmethod is guarded by a mutex so we are sure to
@@ -3111,7 +2613,7 @@
   // Now figure out where the args must be stored and how much stack space
   // they require (neglecting out_preserve_stack_slots but space for storing
   // the 1st six register arguments). It's weird see int_stk_helper.
-  //
+
   int out_arg_slots;
   out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
 
@@ -3547,7 +3049,7 @@
 // this function returns the adjust size (in number of words) to a c2i adapter
 // activation for use during deoptimization
 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
-	return (callee_locals - callee_parameters) * Interpreter::stackElementWords;
+  return (callee_locals - callee_parameters) * Interpreter::stackElementWords;
 }
 
 // "Top of Stack" slots that may be unused by the calling convention but must
@@ -3557,131 +3059,8 @@
 // when an interrupt occurs.
 uint SharedRuntime::out_preserve_stack_slots() {
   //return frame::register_save_words * VMRegImpl::slots_per_word;
-	 return 0;
+   return 0;
 }
-/*
-static void gen_new_frame(MacroAssembler* masm, bool deopt) {
-//
-// Common out the new frame generation for deopt and uncommon trap
-//
-  Register        G3pcs              = G3_scratch; // Array of new pcs (input)
-  Register        Oreturn0           = O0;
-  Register        Oreturn1           = O1;
-  Register        O2UnrollBlock      = O2;
-  Register        O3array            = O3;         // Array of frame sizes (input)
-  Register        O4array_size       = O4;         // number of frames (input)
-  Register        O7frame_size       = O7;         // number of frames (input)
-
-  __ ld_ptr(O3array, 0, O7frame_size);
-  __ sub(G0, O7frame_size, O7frame_size);
-  __ save(SP, O7frame_size, SP);
-  __ ld_ptr(G3pcs, 0, I7);                      // load frame's new pc
-
-  #ifdef ASSERT
-  // make sure that the frames are aligned properly
-#ifndef _LP64
-  __ btst(wordSize*2-1, SP);
-  __ breakpoint_trap(Assembler::notZero);
-#endif
-  #endif
-
-  // Deopt needs to pass some extra live values from frame to frame
-
-  if (deopt) {
-    __ mov(Oreturn0->after_save(), Oreturn0);
-    __ mov(Oreturn1->after_save(), Oreturn1);
-  }
-
-  __ mov(O4array_size->after_save(), O4array_size);
-  __ sub(O4array_size, 1, O4array_size);
-  __ mov(O3array->after_save(), O3array);
-  __ mov(O2UnrollBlock->after_save(), O2UnrollBlock);
-  __ add(G3pcs, wordSize, G3pcs);               // point to next pc value
-
-  #ifdef ASSERT
-  // trash registers to show a clear pattern in backtraces
-  __ set(0xDEAD0000, I0);
-  __ add(I0,  2, I1);
-  __ add(I0,  4, I2);
-  __ add(I0,  6, I3);
-  __ add(I0,  8, I4);
-  // Don't touch I5 could have valuable savedSP
-  __ set(0xDEADBEEF, L0);
-  __ mov(L0, L1);
-  __ mov(L0, L2);
-  __ mov(L0, L3);
-  __ mov(L0, L4);
-  __ mov(L0, L5);
-
-  // trash the return value as there is nothing to return yet
-  __ set(0xDEAD0001, O7);
-  #endif
-
-  __ mov(SP, O5_savedSP);
-}
-
-
-static void make_new_frames(MacroAssembler* masm, bool deopt) {
-  //
-  // loop through the UnrollBlock info and create new frames
-  //
-  Register        G3pcs              = G3_scratch;
-  Register        Oreturn0           = O0;
-  Register        Oreturn1           = O1;
-  Register        O2UnrollBlock      = O2;
-  Register        O3array            = O3;
-  Register        O4array_size       = O4;
-  Label           loop;
-
-  // Before we make new frames, check to see if stack is available.
-  // Do this after the caller's return address is on top of stack
-  if (UseStackBanging) {
-    // Get total frame size for interpreted frames
-    __ ld(Address(O2UnrollBlock, 0,
-         Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()), O4);
-    __ bang_stack_size(O4, O3, G3_scratch);
-  }
-
-  __ ld(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()), O4array_size);
-  __ ld_ptr(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()), G3pcs);
-
-  __ ld_ptr(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()), O3array);
-
-  // Adjust old interpreter frame to make space for new frame's extra java locals
-  //
-  // We capture the original sp for the transition frame only because it is needed in
-  // order to properly calculate interpreter_sp_adjustment. Even though in real life
-  // every interpreter frame captures a savedSP it is only needed at the transition
-  // (fortunately). If we had to have it correct everywhere then we would need to
-  // be told the sp_adjustment for each frame we create. If the frame size array
-  // were to have twice the frame count entries then we could have pairs [sp_adjustment, frame_size]
-  // for each frame we create and keep up the illusion every where.
-  //
-
-  __ ld(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()), O7);
-  __ mov(SP, O5_savedSP);       // remember initial sender's original sp before adjustment
-  __ sub(SP, O7, SP);
-
-#ifdef ASSERT
-  // make sure that there is at least one entry in the array
-  __ tst(O4array_size);
-  __ breakpoint_trap(Assembler::zero);
-#endif
-
-  // Now push the new interpreter frames
-  __ bind(loop);
-
-  // allocate a new frame, filling the registers
-
-  gen_new_frame(masm, deopt);        // allocate an interpreter frame
-
-  __ tst(O4array_size);
-  __ br(Assembler::notZero, false, Assembler::pn, loop);
-  __ delayed()->add(O3array, wordSize, O3array);
-  __ ld_ptr(G3pcs, 0, O7);                      // load final frame new pc
-
-}
-*/
 
 //------------------------------generate_deopt_blob----------------------------
 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
@@ -3711,7 +3090,7 @@
   Register unroll = S7;
   // Prolog for non exception case!
   // Correct the return address we were given.
-  //FIXME, return address is on the tos or Ra? 
+  //FIXME, return address is on the tos or Ra?
   __ addi(RA, RA, - (NativeCall::return_address_offset_long));
   // Save everything in sight.
   map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
@@ -3722,13 +3101,12 @@
 
   int reexecute_offset = __ pc() - start;
 
-   // Reexecute case
-   // return address is the pc describes what bci to do re-execute at
-
-   // No need to update map as each call to save_live_registers will produce identical oopmap
-  //__ addi(RA, RA, - (NativeCall::return_address_offset));
+  // Reexecute case
+  // return address is the pc describes what bci to do re-execute at
+
+  // No need to update map as each call to save_live_registers will produce identical oopmap
   (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
-  __ move(reason, Deoptimization::Unpack_reexecute); 
+  __ move(reason, Deoptimization::Unpack_reexecute);
   __ b(cont);
   __ delayed()->nop();
 
@@ -3739,9 +3117,9 @@
   // edx which contain the exception oop and exception pc
   // respectively.  Set them in TLS and fall thru to the
   // unpack_with_exception_in_tls entry point.
-  
+
   __ get_thread(thread);
-  __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); 
+  __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
   __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
   int exception_in_tls_offset = __ pc() - start;
   // new implementation because exception oop is now passed in JavaThread
@@ -3752,9 +3130,9 @@
   // tos: stack at point of call to method that threw the exception (i.e. only
   // args are on the stack, no return address)
 
-  // Return address will be patched later with the throwing pc. The correct value is not 
+  // Return address will be patched later with the throwing pc. The correct value is not
   // available now because loading it from memory would destroy registers.
-   // Save everything in sight.
+  // Save everything in sight.
   // No need to update map as each call to save_live_registers will produce identical oopmap
   __ addi(RA, RA, - (NativeCall::return_address_offset_long));
   (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
@@ -3762,7 +3140,7 @@
   // Now it is safe to overwrite any register
   // store the correct deoptimization type
   __ move(reason, Deoptimization::Unpack_exception);
-  // load throwing pc from JavaThread and patch it as the return address 
+  // load throwing pc from JavaThread and patch it as the return address
   // of the current frame. Then clear the field in JavaThread
   __ get_thread(thread);
   __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
@@ -3777,8 +3155,8 @@
   // verify that there is no pending exception
   Label no_pending_exception;
   __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
-  __ beq(AT, R0, no_pending_exception); 
-  __ delayed()->nop(); 
+  __ beq(AT, R0, no_pending_exception);
+  __ delayed()->nop();
   __ stop("must not have pending exception here");
   __ bind(no_pending_exception);
 #endif
@@ -3788,7 +3166,7 @@
 
 
   // Call C code.  Need thread and this frame, but NOT official VM entry
-  // crud.  We cannot block on this call, no GC can happen.  
+  // crud.  We cannot block on this call, no GC can happen.
 #ifndef OPT_THREAD
   __ get_thread(thread);
 #endif
@@ -3801,8 +3179,8 @@
   // Call fetch_unroll_info().  Need thread and this frame, but NOT official VM entry - cannot block on
   // this call, no GC can happen.  Call should capture return values.
 
-  __ relocate(relocInfo::internal_pc_type); 
-  {	
+  __ relocate(relocInfo::internal_pc_type);
+  {
     intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + 28;
     __ patchable_set48(AT, save_pc);
   }
@@ -3824,7 +3202,6 @@
   // we are very short of registers
 
   Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes());
-  //__ pop(reason);	
   __ sw(reason, unpack_kind);
   // save the unpack_kind value
   // Retrieve the possible live values (return values)
@@ -3839,13 +3216,13 @@
   __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
   __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
   __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset()));
- 
+
   __ verify_oop(V0);
 
   // Overwrite the result registers with the exception results.
-  __ st_ptr(V0, SP, RegisterSaver::v0Offset()*wordSize); 
+  __ st_ptr(V0, SP, RegisterSaver::v0Offset()*wordSize);
   __ st_ptr(V1, SP, RegisterSaver::v1Offset()*wordSize);
-  
+
   __ bind(noException);
 
 
@@ -3856,11 +3233,11 @@
   RegisterSaver::restore_result_registers(masm);
   // All of the register save area has been popped of the stack. Only the
   // return address remains.
-  // Pop all the frames we must move/replace. 
+  // Pop all the frames we must move/replace.
   // Frame picture (youngest to oldest)
   // 1: self-frame (no frame link)
   // 2: deopting frame  (no frame link)
-  // 3: caller of deopting frame (could be compiled/interpreted). 
+  // 3: caller of deopting frame (could be compiled/interpreted).
   //
   // Note: by leaving the return address of self-frame on the stack
   // and using the size of frame 2 to adjust the stack
@@ -3874,19 +3251,19 @@
   Register sizes = T1;
   // register for frame count
   Register count = T3;
-	
+
   // Pop deoptimized frame
   __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
   __ add(SP, SP, AT);
   // sp should be pointing at the return address to the caller (3)
- 
+
   // Load array of frame pcs into pcs
   __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
   __ addi(SP, SP, wordSize);  // trash the old pc
   // Load array of frame sizes into T6
   __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
 
- 
+
 
   // Load count of frams into T3
   __ lw(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
@@ -3904,14 +3281,14 @@
 /*
  *
 Loop:
-   0x000000555bd82d18: lw t2, 0x0(t1)           ; lw sizes[i]	<--- error lw->ld
+   0x000000555bd82d18: lw t2, 0x0(t1)           ; lw sizes[i]  <--- error lw->ld
    0x000000555bd82d1c: ld at, 0x0(t0)           ; ld pcs[i]
-   0x000000555bd82d20: daddi t2, t2, 0xfffffff0 ; t2 -= 16 
+   0x000000555bd82d20: daddi t2, t2, 0xfffffff0 ; t2 -= 16
    0x000000555bd82d24: daddi sp, sp, 0xfffffff0
    0x000000555bd82d28: sd fp, 0x0(sp)           ; push fp
    0x000000555bd82d2c: sd at, 0x8(sp)           ; push at
-   0x000000555bd82d30: dadd fp, sp, zero        ; fp <- sp 
-   0x000000555bd82d34: dsub sp, sp, t2          ; sp -= t2 
+   0x000000555bd82d30: dadd fp, sp, zero        ; fp <- sp
+   0x000000555bd82d34: dsub sp, sp, t2          ; sp -= t2
    0x000000555bd82d38: sd zero, 0xfffffff0(fp)  ; __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
    0x000000555bd82d3c: sd s4, 0xfffffff8(fp)    ; __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
    0x000000555bd82d40: dadd s4, sp, zero        ; move(sender_sp, SP);
@@ -3924,26 +3301,26 @@
 // pcs[0] = frame_pcs[0] = deopt_sender.raw_pc(); regex.split
   Label loop;
   __ bind(loop);
-  __ ld(T2, sizes, 0);		// Load frame size
-  __ ld_ptr(AT, pcs, 0);  	       // save return address
+  __ ld(T2, sizes, 0);    // Load frame size
+  __ ld_ptr(AT, pcs, 0);           // save return address
   __ addi(T2, T2, -2*wordSize);           // we'll push pc and rbp, by hand
-  __ push2(AT, FP);			
+  __ push2(AT, FP);
   __ move(FP, SP);
-  __ sub(SP, SP, T2); 			// Prolog!
+  __ sub(SP, SP, T2);       // Prolog!
   // This value is corrected by layout_activation_impl
-  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); 
+  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
   __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
-  __ move(sender_sp, SP);	// pass to next frame
-  __ addi(count, count, -1); 	// decrement counter
-  __ addi(sizes, sizes, wordSize); 	// Bump array pointer (sizes)
+  __ move(sender_sp, SP);  // pass to next frame
+  __ addi(count, count, -1);   // decrement counter
+  __ addi(sizes, sizes, wordSize);   // Bump array pointer (sizes)
   __ bne(count, R0, loop);
-  __ delayed()->addi(pcs, pcs, wordSize); 	// Bump array pointer (pcs)
-  __ ld(AT, pcs, 0);			// frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0);
+  __ delayed()->addi(pcs, pcs, wordSize);   // Bump array pointer (pcs)
+  __ ld(AT, pcs, 0);      // frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0);
   // Re-push self-frame
-  __ push2(AT, FP);			
+  __ push2(AT, FP);
   __ move(FP, SP);
-  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); 
-  __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize); 
+  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
+  __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
   __ addi(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize);
 
   // Restore frame locals after moving the frame
@@ -3952,12 +3329,12 @@
   __ sdc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local
   __ sdc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize);
 
-  
+
   // Call unpack_frames().  Need thread and this frame, but NOT official VM entry - cannot block on
   // this call, no GC can happen.
-  __ move(A1, reason);	// exec_mode
+  __ move(A1, reason);  // exec_mode
   __ get_thread(thread);
-  __ move(A0, thread);	// thread
+  __ move(A0, thread);  // thread
   __ addi(SP, SP, (-additional_words) *wordSize);
 
   // set last_Java_sp, last_Java_fp
@@ -3966,14 +3343,13 @@
   __ move(AT, -(StackAlignmentInBytes));
   __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
 
-  __ relocate(relocInfo::internal_pc_type); 
-  {	
+  __ relocate(relocInfo::internal_pc_type);
+  {
     intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + 28;
     __ patchable_set48(AT, save_pc);
   }
   __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
-	
-  //__ call(Deoptimization::unpack_frames);
+
   __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type);
   __ delayed()->nop();
   // Revert SP alignment after call since we're going to do some SP relative addressing below
@@ -3982,7 +3358,7 @@
   oop_maps->add_gc_map(__ offset(), new OopMap( frame_size_in_words , 0));
 
   __ push(V0);
-	
+
   __ get_thread(thread);
   __ reset_last_Java_frame(true, true);
 
@@ -3991,7 +3367,7 @@
   __ ld(V1, SP, (RegisterSaver::v1Offset() + additional_words +1) * wordSize);
   __ ldc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local
   __ ldc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize);
-  //FIXME, 
+  //FIXME,
   // Clear floating point stack before returning to interpreter
   __ empty_FPU_stack();
   //FIXME, we should consider about float and double
@@ -4016,20 +3392,20 @@
   // allocate space for the code
   ResourceMark rm;
   // setup code generation tools
-  CodeBuffer  buffer ("uncommon_trap_blob", 512*80 , 512*40 ); 
-  MacroAssembler* masm = new MacroAssembler(&buffer);   
+  CodeBuffer  buffer ("uncommon_trap_blob", 512*80 , 512*40 );
+  MacroAssembler* masm = new MacroAssembler(&buffer);
 
   enum frame_layout {
-	s0_off, s0_off2,
-	s1_off, s1_off2,
-	s2_off, s2_off2,
-	s3_off, s3_off2,
-	s4_off, s4_off2,
-	s5_off, s5_off2,
-	s6_off, s6_off2,
-	s7_off, s7_off2,
-	fp_off, fp_off2,
-	return_off, return_off2,    // slot for return address    sp + 9
+    s0_off, s0_off2,
+    s1_off, s1_off2,
+    s2_off, s2_off2,
+    s3_off, s3_off2,
+    s4_off, s4_off2,
+    s5_off, s5_off2,
+    s6_off, s6_off2,
+    s7_off, s7_off2,
+    fp_off, fp_off2,
+    return_off, return_off2,    // slot for return address    sp + 9
     framesize
   };
   assert(framesize % 4 == 0, "sp not 16-byte aligned");
@@ -4042,7 +3418,7 @@
   __ sd(RA, SP, return_off * BytesPerInt);
   __ sd(FP, SP, fp_off * BytesPerInt);
 
-  // Save callee saved registers.  None for UseSSE=0, 
+  // Save callee saved registers.  None for UseSSE=0,
   // floats-only for UseSSE=1, and doubles for UseSSE=2.
   __ sd(S0, SP, s0_off * BytesPerInt);
   __ sd(S1, SP, s1_off * BytesPerInt);
@@ -4065,8 +3441,8 @@
 #endif
   // set last_Java_sp
   __ set_last_Java_frame(NOREG, FP, NULL);
-  __ relocate(relocInfo::internal_pc_type); 
-  {	
+  __ relocate(relocInfo::internal_pc_type);
+  {
     long save_pc = (long)__ pc() + 52;
     __ patchable_set48(AT, (long)save_pc);
     __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
@@ -4083,7 +3459,7 @@
   OopMapSet *oop_maps = new OopMapSet();
   OopMap* map =  new OopMap( framesize, 0 );
 
-  map->set_callee_saved( VMRegImpl::stack2reg(s0_off    ),  S0->as_VMReg() ); 
+  map->set_callee_saved( VMRegImpl::stack2reg(s0_off    ),  S0->as_VMReg() );
   map->set_callee_saved( VMRegImpl::stack2reg(s1_off    ),  S1->as_VMReg() );
   map->set_callee_saved( VMRegImpl::stack2reg(s2_off    ),  S2->as_VMReg() );
   map->set_callee_saved( VMRegImpl::stack2reg(s3_off    ),  S3->as_VMReg() );
@@ -4093,7 +3469,7 @@
   map->set_callee_saved( VMRegImpl::stack2reg(s7_off    ),  S7->as_VMReg() );
 
   //oop_maps->add_gc_map( __ offset(), true, map);
-  oop_maps->add_gc_map( __ offset(),  map); 
+  oop_maps->add_gc_map( __ offset(),  map);
 
 #ifndef OPT_THREAD
   __ get_thread(thread);
@@ -4104,12 +3480,12 @@
   Register unroll = S7;
   __ move(unroll, V0);
 
-  // Pop all the frames we must move/replace. 
-  // 
+  // Pop all the frames we must move/replace.
+  //
   // Frame picture (youngest to oldest)
   // 1: self-frame (no frame link)
   // 2: deopting frame  (no frame link)
-  // 3: possible-i2c-adapter-frame 
+  // 3: possible-i2c-adapter-frame
   // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an
   //    and c2i here)
 
@@ -4133,10 +3509,6 @@
   // Load array of frame pcs into ECX
   __ ld(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
 
-/* 2012/9/7 Not needed in MIPS
-  __ addiu(SP, SP, wordSize);
-*/
-
   // Load array of frame sizes into ESI
   __ ld(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
   __ lwu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
@@ -4176,7 +3548,7 @@
   __ daddi(SP, SP, - 2 * wordSize);      // save old & set new FP
   __ sd(FP, SP, 0 * wordSize);          // save final return address
   __ sd(RA, SP, 1 * wordSize);
-  __ move(FP, SP); 
+  __ move(FP, SP);
   __ daddi(SP, SP, -(framesize / 2 - 2) * wordSize);
 
   // set last_Java_sp, last_Java_fp
@@ -4185,8 +3557,8 @@
   __ move(AT, -(StackAlignmentInBytes));
   __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
 
-  __ relocate(relocInfo::internal_pc_type); 
-  {	
+  __ relocate(relocInfo::internal_pc_type);
+  {
     long save_pc = (long)__ pc() + 52;
     __ patchable_set48(AT, (long)save_pc);
   }
@@ -4199,7 +3571,7 @@
   __ move(A1, Deoptimization::Unpack_uncommon_trap);
   __ patchable_call((address)Deoptimization::unpack_frames);
   // Set an oopmap for the call site
-  //oop_maps->add_gc_map( __ offset(), true, new OopMap( framesize, 0 ) ); 
+  //oop_maps->add_gc_map( __ offset(), true, new OopMap( framesize, 0 ) );
   oop_maps->add_gc_map( __ offset(),  new OopMap( framesize, 0 ) );//Fu
 
   __ reset_last_Java_frame(true,true);
@@ -4226,28 +3598,28 @@
 // a safepoint.
 //
 // This blob is jumped to (via a breakpoint and the signal handler) from a
-// safepoint in compiled code. 
- 
+// safepoint in compiled code.
+
 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int pool_type) {
 
   // Account for thread arg in our frame
-  const int additional_words = 0; 
+  const int additional_words = 0;
   int frame_size_in_words;
 
-  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");  
+  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
 
   ResourceMark rm;
   OopMapSet *oop_maps = new OopMapSet();
   OopMap* map;
 
   // allocate space for the code
-  // setup code generation tools  
+  // setup code generation tools
   CodeBuffer  buffer ("handler_blob", 2048, 512);
   MacroAssembler* masm = new MacroAssembler( &buffer);
-  
-  const Register thread = TREG; 
-  address start   = __ pc();  
-  address call_pc = NULL;  
+
+  const Register thread = TREG;
+  address start   = __ pc();
+  address call_pc = NULL;
   bool cause_return = (pool_type == POLL_AT_RETURN);
   bool save_vectors = (pool_type == POLL_AT_VECTOR_LOOP);
 
@@ -4264,7 +3636,7 @@
   if(!cause_return) {
     __ ld_ptr(RA, Address(thread, JavaThread::saved_exception_pc_offset()));
   }
-  
+
   __ pop(thread);
   map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, save_vectors);
 
@@ -4278,16 +3650,6 @@
   __ move(A0, thread);
   __ set_last_Java_frame(NOREG, NOREG, NULL);
 
-  //__ relocate(relocInfo::internal_pc_type); 
-  if (!cause_return)
-  {	
-/*
-    intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + NativeCall::return_address_offset + 4;
-    __ li48(AT, save_pc);
-    __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
-*/
-  }
-
 
   // do the call
   //__ lui(T9, Assembler::split_high((int)call_ptr));
@@ -4315,21 +3677,19 @@
   RegisterSaver::restore_live_registers(masm, save_vectors);
   //forward_exception_entry need return address on the stack
   __ push(RA);
-  //__ lui(T9, Assembler::split_high((int)StubRoutines::forward_exception_entry()));
-  //__ addiu(T9, T9, Assembler::split_low((int)StubRoutines::forward_exception_entry()));
   __ patchable_jump((address)StubRoutines::forward_exception_entry());
 
   // No exception case
   __ bind(noException);
-  // Normal exit, register restoring and exit  
+  // Normal exit, register restoring and exit
   RegisterSaver::restore_live_registers(masm, save_vectors);
   __ jr(RA);
   __ delayed()->nop();
-  
-  masm->flush();  
+
+  masm->flush();
 
   // Fill-out other meta info
-  return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);      
+  return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
 }
 
 //
@@ -4352,7 +3712,7 @@
   MacroAssembler* masm  = new MacroAssembler(&buffer);
 
   int frame_size_words;
-  //we put the thread in A0 
+  //we put the thread in A0
 
   OopMapSet *oop_maps = new OopMapSet();
   OopMap* map = NULL;
@@ -4366,16 +3726,14 @@
   const Register thread = T8;
   __ get_thread(thread);
 
-  __ move(A0, thread); 
+  __ move(A0, thread);
   __ set_last_Java_frame(noreg, FP, NULL);
-  //__ addi(SP, SP, -wordSize);
-  //align the stack before invoke native 
+  //align the stack before invoke native
   __ move(AT, -(StackAlignmentInBytes));
-  __ andr(SP, SP, AT); 
-  __ relocate(relocInfo::internal_pc_type); 
-  {	
+  __ andr(SP, SP, AT);
+  __ relocate(relocInfo::internal_pc_type);
+  {
     intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + 24 + 1 * BytesPerInstWord;
-//tty->print_cr(" %s :%d, name:%s, pc: %lx, save_pc: %lx, frame_size_words: %lx", __func__, __LINE__, name, __ pc(), save_pc, frame_size_words); //aoqi_test
     __ patchable_set48(AT, save_pc);
   }
   __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
@@ -4396,9 +3754,9 @@
   Label pending;
   __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
   __ bne(AT, R0, pending);
-  __ delayed()->nop(); 
-  // get the returned Method* 
-  //FIXME, do mips need this ? 
+  __ delayed()->nop();
+  // get the returned Method*
+  //FIXME, do mips need this ?
   __ get_vm_result_2(Rmethod, thread);  // Refer to OpenJDK8
   __ st_ptr(Rmethod, SP, RegisterSaver::methodOffset() * wordSize);
   __ st_ptr(V0, SP, RegisterSaver::v0Offset() * wordSize);
@@ -4414,42 +3772,21 @@
   RegisterSaver::restore_live_registers(masm);
 
   // exception pending => remove activation and forward to exception handler
-  //forward_exception_entry need return address on the stack 
+  //forward_exception_entry need return address on the stack
   __ push(RA);
   __ get_thread(thread);
-  __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset())); 
+  __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset()));
   __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset()));
   __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
   __ delayed() -> nop();
   // -------------
   // make sure all code is generated
-  masm->flush();  
+  masm->flush();
 
   RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
   return tmp;
 }
 
-/*void SharedRuntime::generate_stubs() {
-	_wrong_method_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, 
-				SharedRuntime::handle_wrong_method),"wrong_method_stub");
-	_ic_miss_blob      = generate_resolve_blob(CAST_FROM_FN_PTR(address, 
-				SharedRuntime::handle_wrong_method_ic_miss),"ic_miss_stub");
-	_resolve_opt_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, 
-				SharedRuntime::resolve_opt_virtual_call_C),"resolve_opt_virtual_call");
-	_resolve_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, 
-				SharedRuntime::resolve_virtual_call_C),"resolve_virtual_call");
-	_resolve_static_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, 
-				SharedRuntime::resolve_static_call_C),"resolve_static_call");
-	_polling_page_safepoint_handler_blob =generate_handler_blob(CAST_FROM_FN_PTR(address, 
-				SafepointSynchronize::handle_polling_page_exception), false);
-	_polling_page_return_handler_blob =generate_handler_blob(CAST_FROM_FN_PTR(address,
-				SafepointSynchronize::handle_polling_page_exception), true);
-	generate_deopt_blob();
-#ifdef COMPILER2
-	generate_uncommon_trap_blob();
-#endif // COMPILER2
-}*/
-
 extern "C" int SpinPause() {return 0;}
 // extern "C" int SafeFetch32 (int * adr, int errValue) {return 0;} ;
 // extern "C" intptr_t SafeFetchN (intptr_t * adr, intptr_t errValue) {return *adr; } ;