src/cpu/mips/vm/templateTable_mips_64.cpp

changeset 6880
52ea28d233d2
parent 428
4030eb32ef94
child 6881
633080c2ba82
     1.1 --- a/src/cpu/mips/vm/templateTable_mips_64.cpp	Fri Sep 01 10:28:22 2017 +0800
     1.2 +++ b/src/cpu/mips/vm/templateTable_mips_64.cpp	Thu Sep 07 09:12:16 2017 +0800
     1.3 @@ -49,7 +49,7 @@
     1.4  }
     1.5  
     1.6  // Address computation: local variables
     1.7 -// we use t8 as the local variables pointer register, by yjl 6/27/2005
     1.8 +
     1.9  static inline Address iaddress(int n) {
    1.10    return Address(LVP, Interpreter::local_offset_in_bytes(n));
    1.11  }
    1.12 @@ -71,30 +71,8 @@
    1.13  }
    1.14  static inline Address haddress(int n)            { return iaddress(n + 0); }
    1.15  
    1.16 -//FIXME , can not use dadd and dsll
    1.17 -/*
    1.18 -static inline Address iaddress(Register r) {
    1.19 -  return Address(r14, r, Address::times_8, Interpreter::value_offset_in_bytes());
    1.20 -}
    1.21 -
    1.22 -static inline Address laddress(Register r) {
    1.23 -  return Address(r14, r, Address::times_8, Interpreter::local_offset_in_bytes(1));
    1.24 -}
    1.25 -
    1.26 -static inline Address faddress(Register r) {
    1.27 -  return iaddress(r);
    1.28 -}
    1.29 -
    1.30 -static inline Address daddress(Register r) {
    1.31 -  return laddress(r);
    1.32 -}
    1.33 -
    1.34 -static inline Address aaddress(Register r) {
    1.35 -  return iaddress(r);
    1.36 -}
    1.37 -*/
    1.38 -
    1.39 -static inline Address at_sp() 						{	return Address(SP, 	0); }					
    1.40 +
    1.41 +static inline Address at_sp()             {  return Address(SP,   0); }
    1.42  static inline Address at_sp_p1()          { return Address(SP,  1 * wordSize); }
    1.43  static inline Address at_sp_p2()          { return Address(SP,  2 * wordSize); }
    1.44  
    1.45 @@ -117,24 +95,19 @@
    1.46    return Address(SP,  Interpreter::expr_offset_in_bytes(3));
    1.47  }
    1.48  
    1.49 -// we use S0 as bcp, be sure you have bcp in S0 before you call any of the Template generator 
    1.50 +// we use S0 as bcp, be sure you have bcp in S0 before you call any of the Template generator
    1.51  Address TemplateTable::at_bcp(int offset) {
    1.52    assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
    1.53    return Address(BCP, offset);
    1.54  }
    1.55  
    1.56 -#define callee_saved_register(R) assert((R>=S0 && R<=S7), "should use callee saved registers!")
    1.57 -
    1.58  // bytecode folding
    1.59  void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
    1.60 -                                   Register tmp_reg, 
    1.61 -                                   bool load_bc_into_bc_reg,/*=true*/
    1.62 +                                   Register tmp_reg, bool load_bc_into_bc_reg/*=true*/,
    1.63                                     int byte_no) {
    1.64 -  if (!RewriteBytecodes) {
    1.65 -    return;
    1.66 -  }
    1.67 -  
    1.68 +  if (!RewriteBytecodes)  return;
    1.69    Label L_patch_done;
    1.70 +
    1.71    switch (bc) {
    1.72    case Bytecodes::_fast_aputfield:
    1.73    case Bytecodes::_fast_bputfield:
    1.74 @@ -145,25 +118,27 @@
    1.75    case Bytecodes::_fast_lputfield:
    1.76    case Bytecodes::_fast_sputfield:
    1.77      {
    1.78 -    // We skip bytecode quickening for putfield instructions when the put_code written to the constant pool cache
    1.79 -    // is zero. This is required so that every execution of this instruction calls out to 
    1.80 -    // InterpreterRuntime::resolve_get_put to do additional, required work.
    1.81 -    assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
    1.82 -    assert(load_bc_into_bc_reg, "we use bc_reg as temp");
    1.83 -    __ get_cache_and_index_and_bytecode_at_bcp(tmp_reg, bc_reg, tmp_reg, byte_no, 1);
    1.84 -    __ daddi(bc_reg, R0, bc);
    1.85 -    __ beq(tmp_reg, R0, L_patch_done);
    1.86 -    __ delayed()->nop();
    1.87 +      // We skip bytecode quickening for putfield instructions when
    1.88 +      // the put_code written to the constant pool cache is zero.
    1.89 +      // This is required so that every execution of this instruction
    1.90 +      // calls out to InterpreterRuntime::resolve_get_put to do
    1.91 +      // additional, required work.
    1.92 +      assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
    1.93 +      assert(load_bc_into_bc_reg, "we use bc_reg as temp");
    1.94 +      __ get_cache_and_index_and_bytecode_at_bcp(tmp_reg, bc_reg, tmp_reg, byte_no, 1);
    1.95 +      __ daddi(bc_reg, R0, bc);
    1.96 +      __ beq(tmp_reg, R0, L_patch_done);
    1.97 +      __ delayed()->nop();
    1.98      }
    1.99      break;
   1.100    default:
   1.101      assert(byte_no == -1, "sanity");
   1.102 - // the pair bytecodes have already done the load.
   1.103 -  if (load_bc_into_bc_reg) {
   1.104 -    __ move(bc_reg, bc);
   1.105 +    // the pair bytecodes have already done the load.
   1.106 +    if (load_bc_into_bc_reg) {
   1.107 +      __ move(bc_reg, bc);
   1.108 +    }
   1.109    }
   1.110  
   1.111 -  }
   1.112    if (JvmtiExport::can_post_breakpoint()) {
   1.113      Label L_fast_patch;
   1.114      // if a breakpoint is present we can't rewrite the stream directly
   1.115 @@ -173,9 +148,9 @@
   1.116      __ delayed()->nop();
   1.117  
   1.118      __ get_method(tmp_reg);
   1.119 -    // Let breakpoint table handling rewrite to quicker bytecode 
   1.120 -    __ call_VM(NOREG, CAST_FROM_FN_PTR(address, 
   1.121 -	  InterpreterRuntime::set_original_bytecode_at), tmp_reg, BCP, bc_reg);
   1.122 +    // Let breakpoint table handling rewrite to quicker bytecode
   1.123 +    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
   1.124 +    InterpreterRuntime::set_original_bytecode_at), tmp_reg, BCP, bc_reg);
   1.125  
   1.126      __ b(L_patch_done);
   1.127      __ delayed()->nop();
   1.128 @@ -270,9 +245,9 @@
   1.129  }
   1.130  
   1.131  void TemplateTable::sipush() {
   1.132 -	transition(vtos, itos);
   1.133 -	__ get_2_byte_integer_at_bcp(FSR, AT, 1);
   1.134 -	__ hswap(FSR);
   1.135 +  transition(vtos, itos);
   1.136 +  __ get_2_byte_integer_at_bcp(FSR, AT, 1);
   1.137 +  __ hswap(FSR);
   1.138  }
   1.139  
   1.140  // T1 : tags
   1.141 @@ -300,11 +275,6 @@
   1.142    __ lb(T1, AT, tags_offset);
   1.143    //now T1 is the tag
   1.144  
   1.145 -  // unresolved string - get the resolved string
   1.146 -  /*__ daddiu(AT, T1, - JVM_CONSTANT_UnresolvedString);
   1.147 -  __ beq(AT, R0, call_ldc);
   1.148 -  __ delayed()->nop();*/
   1.149 -
   1.150    // unresolved class - get the resolved class
   1.151    __ daddiu(AT, T1, - JVM_CONSTANT_UnresolvedClass);
   1.152    __ beq(AT, R0, call_ldc);
   1.153 @@ -312,7 +282,7 @@
   1.154  
   1.155    // unresolved class in error (resolution failed) - call into runtime
   1.156    // so that the same error from first resolution attempt is thrown.
   1.157 -  __ daddiu(AT, T1, -JVM_CONSTANT_UnresolvedClassInError); 
   1.158 +  __ daddiu(AT, T1, -JVM_CONSTANT_UnresolvedClassInError);
   1.159    __ beq(AT, R0, call_ldc);
   1.160    __ delayed()->nop();
   1.161  
   1.162 @@ -325,10 +295,8 @@
   1.163  
   1.164    __ move(A1, wide);
   1.165    call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), A1);
   1.166 -  //	__ sw(FSR, SP, - 1 * wordSize);
   1.167 -  __ push(atos);	
   1.168 +  __ push(atos);
   1.169    __ b(Done);
   1.170 -  //	__ delayed()->daddi(SP, SP, - 1 * wordSize);
   1.171    __ delayed()->nop();
   1.172    __ bind(notClass);
   1.173  
   1.174 @@ -344,7 +312,7 @@
   1.175  
   1.176    __ bind(notFloat);
   1.177  #ifdef ASSERT
   1.178 -  { 
   1.179 +  {
   1.180      Label L;
   1.181      __ daddiu(AT, T1, -JVM_CONSTANT_Integer);
   1.182      __ beq(AT, R0, L);
   1.183 @@ -358,7 +326,7 @@
   1.184    __ lw(FSR, T0, base_offset);
   1.185    __ push(itos);
   1.186    __ b(Done);
   1.187 -  __ delayed()->nop(); 
   1.188 +  __ delayed()->nop();
   1.189  
   1.190  
   1.191    if (VerifyOops) {
   1.192 @@ -377,8 +345,9 @@
   1.193    int index_size = wide ? sizeof(u2) : sizeof(u1);
   1.194  
   1.195    Label resolved;
   1.196 - // We are resolved if the resolved reference cache entry contains a
   1.197 - // non-null object (String, MethodType, etc.)
   1.198 +
   1.199 +  // We are resolved if the resolved reference cache entry contains a
   1.200 +  // non-null object (String, MethodType, etc.)
   1.201    assert_different_registers(result, tmp);
   1.202    __ get_cache_index_at_bcp(tmp, 1, index_size);
   1.203    __ load_resolved_reference_at_index(result, tmp);
   1.204 @@ -423,7 +392,7 @@
   1.205    __ daddiu(AT, T1, - JVM_CONSTANT_Double);
   1.206    __ bne(AT, R0, Long);
   1.207    __ delayed()->dsll(T2, T2, Address::times_8);
   1.208 -  // dtos	
   1.209 +  // dtos
   1.210    __ daddu(AT, T3, T2);
   1.211    __ ldc1(FSF, AT, base_offset + 0 * wordSize);
   1.212    __ sdc1(FSF, SP, - 2 * wordSize);
   1.213 @@ -432,7 +401,7 @@
   1.214  
   1.215    // ltos
   1.216    __ bind(Long);
   1.217 -  __ dadd(AT, T3, T2);	
   1.218 +  __ dadd(AT, T3, T2);
   1.219    __ ld(FSR, AT, base_offset + 0 * wordSize);
   1.220    __ push(ltos);
   1.221  
   1.222 @@ -448,13 +417,13 @@
   1.223  }
   1.224  
   1.225  // this method will do bytecode folding of the two form:
   1.226 -// iload iload			iload caload
   1.227 +// iload iload      iload caload
   1.228  // used register : T2, T3
   1.229  // T2 : bytecode
   1.230  // T3 : folded code
   1.231  void TemplateTable::iload() {
   1.232    transition(vtos, itos);
   1.233 -  if (RewriteFrequentPairs) { 
   1.234 +  if (RewriteFrequentPairs) {
   1.235      Label rewrite, done;
   1.236      // get the next bytecode in T2
   1.237      __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
   1.238 @@ -495,14 +464,14 @@
   1.239  // used register T2
   1.240  // T2 : index
   1.241  void TemplateTable::fast_iload2() {
   1.242 -	transition(vtos, itos);
   1.243 -	locals_index(T2);
   1.244 -	__ lw(FSR, T2, 0);
   1.245 -	__ push(itos);
   1.246 -	locals_index(T2, 3);
   1.247 -	__ lw(FSR, T2, 0);
   1.248 +  transition(vtos, itos);
   1.249 +  locals_index(T2);
   1.250 +  __ lw(FSR, T2, 0);
   1.251 +  __ push(itos);
   1.252 +  locals_index(T2, 3);
   1.253 +  __ lw(FSR, T2, 0);
   1.254  }
   1.255 -  
   1.256 +
   1.257  // used register T2
   1.258  // T2 : index
   1.259  void TemplateTable::fast_iload() {
   1.260 @@ -514,7 +483,6 @@
   1.261  // used register T2
   1.262  // T2 : index
   1.263  void TemplateTable::lload() {
   1.264 -
   1.265    transition(vtos, ltos);
   1.266    locals_index(T2);
   1.267    __ ld(FSR, T2, -wordSize);
   1.268 @@ -526,33 +494,21 @@
   1.269  void TemplateTable::fload() {
   1.270    transition(vtos, ftos);
   1.271    locals_index(T2);
   1.272 -//FIXME, aoqi. How should the high 32bits be when store a single float into a 64bits register. 
   1.273 -  //__ mtc1(R0, FSF);
   1.274    __ lwc1(FSF, T2, 0);
   1.275  }
   1.276  
   1.277  // used register T2
   1.278  // T2 : index
   1.279  void TemplateTable::dload() {
   1.280 -
   1.281    transition(vtos, dtos);
   1.282    locals_index(T2);
   1.283 -/*  if (TaggedStackInterpreter) {
   1.284 -    // Get double out of locals array, onto temp stack and load with
   1.285 -    // float instruction into ST0
   1.286 -    __ dsll(AT,T2,Interpreter::stackElementScale());
   1.287 -    __ dadd(AT, LVP, AT);
   1.288 -    __ ldc1(FSF, AT, Interpreter::local_offset_in_bytes(1)); 
   1.289 -  } else {*/
   1.290 -    __ ldc1(FSF, T2, -wordSize);
   1.291 -    __ ldc1(SSF, T2, 0);
   1.292 - // }
   1.293 +  __ ldc1(FSF, T2, -wordSize);
   1.294 +  __ ldc1(SSF, T2, 0);
   1.295  }
   1.296  
   1.297  // used register T2
   1.298  // T2 : index
   1.299 -void TemplateTable::aload() 
   1.300 -{
   1.301 +void TemplateTable::aload() {
   1.302    transition(vtos, atos);
   1.303    locals_index(T2);
   1.304    __ ld(FSR, T2, 0);
   1.305 @@ -568,56 +524,41 @@
   1.306  // used register T2
   1.307  // T2 : index
   1.308  void TemplateTable::wide_iload() {
   1.309 -	transition(vtos, itos);
   1.310 -	locals_index_wide(T2);
   1.311 -	__ ld(FSR, T2, 0);
   1.312 +  transition(vtos, itos);
   1.313 +  locals_index_wide(T2);
   1.314 +  __ ld(FSR, T2, 0);
   1.315  }
   1.316  
   1.317  // used register T2
   1.318  // T2 : index
   1.319  void TemplateTable::wide_lload() {
   1.320 -	transition(vtos, ltos);
   1.321 -	locals_index_wide(T2);
   1.322 -	__ ld(FSR, T2, -4);
   1.323 +  transition(vtos, ltos);
   1.324 +  locals_index_wide(T2);
   1.325 +  __ ld(FSR, T2, -4);
   1.326  }
   1.327  
   1.328  // used register T2
   1.329  // T2 : index
   1.330  void TemplateTable::wide_fload() {
   1.331 -	transition(vtos, ftos);
   1.332 -	locals_index_wide(T2);
   1.333 -	__ lwc1(FSF, T2, 0);
   1.334 +  transition(vtos, ftos);
   1.335 +  locals_index_wide(T2);
   1.336 +  __ lwc1(FSF, T2, 0);
   1.337  }
   1.338  
   1.339  // used register T2
   1.340  // T2 : index
   1.341  void TemplateTable::wide_dload() {
   1.342 -	transition(vtos, dtos);
   1.343 -	locals_index_wide(T2);
   1.344 -/*	if (TaggedStackInterpreter) {
   1.345 -		// Get double out of locals array, onto temp stack and load with
   1.346 -		// float instruction into ST0
   1.347 -		//   __ movl(eax, laddress(ebx));
   1.348 -		//  __ movl(edx, haddress(ebx));
   1.349 -		__ dsll(AT,T2,Interpreter::stackElementScale());
   1.350 -		__ dadd(AT, LVP, AT);
   1.351 -		__ ldc1(FSF, AT, Interpreter::local_offset_in_bytes(1)); 
   1.352 -
   1.353 -		//  __ pushl(edx);  // push hi first
   1.354 -		//  __ pushl(eax);
   1.355 -		//  __ fld_d(Address(esp));
   1.356 -		//  __ addl(esp, 2*wordSize);
   1.357 -	} else {*/
   1.358 -		__ ldc1(FSF, T2, -4);
   1.359 -	//}
   1.360 +  transition(vtos, dtos);
   1.361 +  locals_index_wide(T2);
   1.362 +  __ ldc1(FSF, T2, -4);
   1.363  }
   1.364  
   1.365  // used register T2
   1.366  // T2 : index
   1.367  void TemplateTable::wide_aload() {
   1.368 -	transition(vtos, atos);
   1.369 -	locals_index_wide(T2);
   1.370 -	__ ld(FSR, T2, 0);
   1.371 +  transition(vtos, atos);
   1.372 +  locals_index_wide(T2);
   1.373 +  __ ld(FSR, T2, 0);
   1.374  }
   1.375  
   1.376  // we use A2 as the regiser for index, BE CAREFUL!
   1.377 @@ -644,10 +585,10 @@
   1.378  #ifndef OPT_RANGECHECK
   1.379    __ sltu(AT, index, AT);
   1.380    __ bne(AT, R0, ok);
   1.381 -  __ delayed()->nop(); 
   1.382 +  __ delayed()->nop();
   1.383  
   1.384    //throw_ArrayIndexOutOfBoundsException assume abberrant index in A2
   1.385 -  if (A2 != index) __ move(A2, index);		
   1.386 +  if (A2 != index) __ move(A2, index);
   1.387    __ jmp(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
   1.388    __ delayed()->nop();
   1.389    __ bind(ok);
   1.390 @@ -660,18 +601,17 @@
   1.391  
   1.392  void TemplateTable::iaload() {
   1.393    transition(itos, itos);
   1.394 -  //  __ pop(SSR);
   1.395 -  if(UseBoundCheckInstruction) {  
   1.396 +  if(UseBoundCheckInstruction) {
   1.397      __ pop(SSR); //SSR:array    FSR: index
   1.398      __ dsll(FSR, FSR, 2);
   1.399      __ dadd(FSR, SSR, FSR);
   1.400      __ addi(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT));
   1.401 -  
   1.402 +
   1.403      __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
   1.404      __ dsll(AT, AT, 2);
   1.405      __ dadd(AT, SSR, AT);
   1.406      __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT));
   1.407 -  
   1.408 +
   1.409      __ gslwle(FSR, FSR, AT);
   1.410    } else {
   1.411      index_check(SSR, FSR);
   1.412 @@ -684,19 +624,18 @@
   1.413  
   1.414  void TemplateTable::laload() {
   1.415    transition(itos, ltos);
   1.416 -  //  __ pop(SSR);
   1.417    if(UseBoundCheckInstruction) {
   1.418      __ pop(SSR); //SSR:array    FSR: index
   1.419      __ dsll(FSR, FSR, Address::times_8);
   1.420      __ dadd(FSR, SSR, FSR);
   1.421      __ addi(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);
   1.422 -  
   1.423 +
   1.424      __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
   1.425      __ dsll(AT, AT, Address::times_8);
   1.426      __ dadd(AT, SSR, AT);
   1.427      __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);
   1.428 -  
   1.429 -    __ gsldle(FSR, FSR, AT); 
   1.430 +
   1.431 +    __ gsldle(FSR, FSR, AT);
   1.432    } else {
   1.433      index_check(SSR, FSR);
   1.434      __ dsll(AT, FSR, Address::times_8);
   1.435 @@ -707,21 +646,20 @@
   1.436  
   1.437  void TemplateTable::faload() {
   1.438    transition(itos, ftos);
   1.439 -  // __ pop(SSR);
   1.440    if(UseBoundCheckInstruction) {
   1.441      __ pop(SSR); //SSR:array    FSR: index
   1.442      __ shl(FSR, 2);
   1.443      __ dadd(FSR, SSR, FSR);
   1.444      __ addi(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
   1.445 -  
   1.446 +
   1.447      __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
   1.448      __ shl(AT, 2);
   1.449      __ dadd(AT, SSR, AT);
   1.450      __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
   1.451 -  
   1.452 +
   1.453      __ gslwlec1(FSF, FSR, AT);
   1.454    } else {
   1.455 -    index_check(SSR, FSR);  
   1.456 +    index_check(SSR, FSR);
   1.457      __ shl(FSR, 2);
   1.458      __ dadd(FSR, SSR, FSR);
   1.459      __ lwc1(FSF, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
   1.460 @@ -730,21 +668,20 @@
   1.461  
   1.462  void TemplateTable::daload() {
   1.463    transition(itos, dtos);
   1.464 -  //__ pop(SSR);
   1.465    if(UseBoundCheckInstruction) {
   1.466      __ pop(SSR); //SSR:array    FSR: index
   1.467      __ dsll(FSR, FSR, 3);
   1.468      __ dadd(FSR, SSR, FSR);
   1.469      __ addi(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
   1.470 -  
   1.471 +
   1.472      __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
   1.473      __ dsll(AT, AT, 3);
   1.474      __ dadd(AT, SSR, AT);
   1.475      __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
   1.476 -  
   1.477 +
   1.478      __ gsldlec1(FSF, FSR, AT);
   1.479    } else {
   1.480 -    index_check(SSR, FSR);  
   1.481 +    index_check(SSR, FSR);
   1.482      __ dsll(AT, FSR, 3);
   1.483      __ dadd(AT, SSR, AT);
   1.484      __ ldc1(FSF, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
   1.485 @@ -753,7 +690,6 @@
   1.486  
   1.487  void TemplateTable::aaload() {
   1.488    transition(itos, atos);
   1.489 -  //__ pop(SSR);
   1.490    index_check(SSR, FSR);
   1.491    __ dsll(FSR, FSR, UseCompressedOops ? Address::times_4 : Address::times_8);
   1.492    __ dadd(FSR, SSR, FSR);
   1.493 @@ -763,19 +699,18 @@
   1.494  
   1.495  void TemplateTable::baload() {
   1.496    transition(itos, itos);
   1.497 -  //__ pop(SSR);
   1.498    if(UseBoundCheckInstruction) {
   1.499      __ pop(SSR); //SSR:array   FSR:index
   1.500      __ dadd(FSR, SSR, FSR);
   1.501      __ addi(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //base
   1.502  
   1.503 -    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  
   1.504 +    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());
   1.505      __ dadd(AT, SSR, AT);
   1.506      __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //bound
   1.507  
   1.508      __ gslble(FSR, FSR, AT);
   1.509    } else {
   1.510 -    index_check(SSR, FSR); 
   1.511 +    index_check(SSR, FSR);
   1.512      __ dadd(FSR, SSR, FSR);
   1.513      __ lb(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));
   1.514    }
   1.515 @@ -783,7 +718,6 @@
   1.516  
   1.517  void TemplateTable::caload() {
   1.518    transition(itos, itos);
   1.519 -  // __ pop(SSR);
   1.520    index_check(SSR, FSR);
   1.521    __ dsll(FSR, FSR, Address::times_2);
   1.522    __ dadd(FSR, SSR, FSR);
   1.523 @@ -798,7 +732,6 @@
   1.524    // load index out of locals
   1.525    locals_index(T2);
   1.526    __ lw(FSR, T2, 0);
   1.527 -  //	__ pop(SSR);
   1.528    index_check(SSR, FSR);
   1.529    __ dsll(FSR, FSR, 1);
   1.530    __ dadd(FSR, SSR, FSR);
   1.531 @@ -807,21 +740,20 @@
   1.532  
   1.533  void TemplateTable::saload() {
   1.534    transition(itos, itos);
   1.535 -  // __ pop(SSR);
   1.536    if(UseBoundCheckInstruction) {
   1.537      __ pop(SSR); //SSR:array    FSR: index
   1.538      __ dsll(FSR, FSR, Address::times_2);
   1.539      __ dadd(FSR, SSR, FSR);
   1.540      __ addi(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT));
   1.541 -  
   1.542 +
   1.543      __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
   1.544      __ dsll(AT, AT, Address::times_2);
   1.545      __ dadd(AT, SSR, AT);
   1.546      __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_SHORT));
   1.547 -  
   1.548 -    __ gslhle(FSR, FSR, AT); 
   1.549 +
   1.550 +    __ gslhle(FSR, FSR, AT);
   1.551    } else {
   1.552 -    index_check(SSR, FSR);  
   1.553 +    index_check(SSR, FSR);
   1.554      __ dsll(FSR, FSR, Address::times_2);
   1.555      __ dadd(FSR, SSR, FSR);
   1.556      __ lh(FSR, FSR,  arrayOopDesc::base_offset_in_bytes(T_SHORT));
   1.557 @@ -829,24 +761,23 @@
   1.558  }
   1.559  
   1.560  void TemplateTable::iload(int n) {
   1.561 -	transition(vtos, itos);
   1.562 -	__ lw(FSR, iaddress(n));
   1.563 +  transition(vtos, itos);
   1.564 +  __ lw(FSR, iaddress(n));
   1.565  }
   1.566  
   1.567  void TemplateTable::lload(int n) {
   1.568 -	transition(vtos, ltos);
   1.569 -	__ ld(FSR, laddress(n));
   1.570 +  transition(vtos, ltos);
   1.571 +  __ ld(FSR, laddress(n));
   1.572  }
   1.573  
   1.574  void TemplateTable::fload(int n) {
   1.575    transition(vtos, ftos);
   1.576 -  //__ mtc1(R0, FSF);
   1.577    __ lwc1(FSF, faddress(n));
   1.578  }
   1.579 -//FIXME here
   1.580 +
   1.581  void TemplateTable::dload(int n) {
   1.582 -	transition(vtos, dtos);
   1.583 -	__ ldc1(FSF, laddress(n));
   1.584 +  transition(vtos, dtos);
   1.585 +  __ ldc1(FSF, laddress(n));
   1.586  }
   1.587  
   1.588  void TemplateTable::aload(int n) {
   1.589 @@ -858,82 +789,88 @@
   1.590  // T2 : bytecode
   1.591  // T3 : folded code
   1.592  void TemplateTable::aload_0() {
   1.593 -	transition(vtos, atos);
   1.594 -	// According to bytecode histograms, the pairs:
   1.595 -	//
   1.596 -	// _aload_0, _fast_igetfield
   1.597 -	// _aload_0, _fast_agetfield
   1.598 -	// _aload_0, _fast_fgetfield
   1.599 -	//
   1.600 -	// occur frequently. If RewriteFrequentPairs is set, the (slow) _aload_0
   1.601 -	// bytecode checks if the next bytecode is either _fast_igetfield, 
   1.602 -	// _fast_agetfield or _fast_fgetfield and then rewrites the
   1.603 -	// current bytecode into a pair bytecode; otherwise it rewrites the current
   1.604 -	// bytecode into _fast_aload_0 that doesn't do the pair check anymore.
   1.605 -	//
   1.606 -	// Note: If the next bytecode is _getfield, the rewrite must be delayed,
   1.607 -	//       otherwise we may miss an opportunity for a pair.
   1.608 -	//
   1.609 -	// Also rewrite frequent pairs
   1.610 -	//   aload_0, aload_1
   1.611 -	//   aload_0, iload_1
   1.612 -	// These bytecodes with a small amount of code are most profitable to rewrite
   1.613 -	if (RewriteFrequentPairs) {
   1.614 -		Label rewrite, done;
   1.615 -		// get the next bytecode in T2
   1.616 -		__ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
   1.617 -
   1.618 -		// do actual aload_0
   1.619 -		aload(0);
   1.620 -
   1.621 -		// if _getfield then wait with rewrite
   1.622 -		__ move(AT, Bytecodes::_getfield);
   1.623 -		__ beq(AT, T2, done);
   1.624 -		__ delayed()->nop();
   1.625 -
   1.626 -		// if _igetfield then reqrite to _fast_iaccess_0
   1.627 -		assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == 
   1.628 -				Bytecodes::_aload_0, "fix bytecode definition");
   1.629 -		__ move(T3, Bytecodes::_fast_iaccess_0);
   1.630 -		__ move(AT, Bytecodes::_fast_igetfield);
   1.631 -		__ beq(AT, T2, rewrite);
   1.632 -		__ delayed()->nop();
   1.633 -
   1.634 -		// if _agetfield then reqrite to _fast_aaccess_0
   1.635 -		assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == 
   1.636 -				Bytecodes::_aload_0, "fix bytecode definition");
   1.637 -		__ move(T3, Bytecodes::_fast_aaccess_0);
   1.638 -		__ move(AT, Bytecodes::_fast_agetfield);
   1.639 -		__ beq(AT, T2, rewrite);
   1.640 -		__ delayed()->nop();
   1.641 -
   1.642 -		// if _fgetfield then reqrite to _fast_faccess_0
   1.643 -		assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == 
   1.644 -				Bytecodes::_aload_0, "fix bytecode definition");
   1.645 -		__ move(T3, Bytecodes::_fast_faccess_0);
   1.646 -		__ move(AT, Bytecodes::_fast_fgetfield);
   1.647 -		__ beq(AT, T2, rewrite);
   1.648 -		__ delayed()->nop();
   1.649 -
   1.650 -		// else rewrite to _fast_aload0
   1.651 -		assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == 
   1.652 -				Bytecodes::_aload_0, "fix bytecode definition");
   1.653 -		__ move(T3, Bytecodes::_fast_aload_0);
   1.654 -
   1.655 -		// rewrite
   1.656 -		__ bind(rewrite);
   1.657 -		patch_bytecode(Bytecodes::_aload_0, T3, T2, false);
   1.658 -
   1.659 -		__ bind(done);
   1.660 -	} else {
   1.661 -		aload(0);
   1.662 -	}
   1.663 +  transition(vtos, atos);
   1.664 +  // According to bytecode histograms, the pairs:
   1.665 +  //
   1.666 +  // _aload_0, _fast_igetfield
   1.667 +  // _aload_0, _fast_agetfield
   1.668 +  // _aload_0, _fast_fgetfield
   1.669 +  //
   1.670 +  // occur frequently. If RewriteFrequentPairs is set, the (slow)
   1.671 +  // _aload_0 bytecode checks if the next bytecode is either
   1.672 +  // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
   1.673 +  // rewrites the current bytecode into a pair bytecode; otherwise it
   1.674 +  // rewrites the current bytecode into _fast_aload_0 that doesn't do
   1.675 +  // the pair check anymore.
   1.676 +  //
   1.677 +  // Note: If the next bytecode is _getfield, the rewrite must be
   1.678 +  //       delayed, otherwise we may miss an opportunity for a pair.
   1.679 +  //
   1.680 +  // Also rewrite frequent pairs
   1.681 +  //   aload_0, aload_1
   1.682 +  //   aload_0, iload_1
   1.683 +  // These bytecodes with a small amount of code are most profitable
   1.684 +  // to rewrite
   1.685 +  if (RewriteFrequentPairs) {
   1.686 +    Label rewrite, done;
   1.687 +    // get the next bytecode in T2
   1.688 +    __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
   1.689 +
   1.690 +    // do actual aload_0
   1.691 +    aload(0);
   1.692 +
   1.693 +    // if _getfield then wait with rewrite
   1.694 +    __ move(AT, Bytecodes::_getfield);
   1.695 +    __ beq(AT, T2, done);
   1.696 +    __ delayed()->nop();
   1.697 +
   1.698 +    // if _igetfield then reqrite to _fast_iaccess_0
   1.699 +    assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) ==
   1.700 +        Bytecodes::_aload_0,
   1.701 +        "fix bytecode definition");
   1.702 +    __ move(T3, Bytecodes::_fast_iaccess_0);
   1.703 +    __ move(AT, Bytecodes::_fast_igetfield);
   1.704 +    __ beq(AT, T2, rewrite);
   1.705 +    __ delayed()->nop();
   1.706 +
   1.707 +    // if _agetfield then reqrite to _fast_aaccess_0
   1.708 +    assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) ==
   1.709 +        Bytecodes::_aload_0,
   1.710 +        "fix bytecode definition");
   1.711 +    __ move(T3, Bytecodes::_fast_aaccess_0);
   1.712 +    __ move(AT, Bytecodes::_fast_agetfield);
   1.713 +    __ beq(AT, T2, rewrite);
   1.714 +    __ delayed()->nop();
   1.715 +
   1.716 +    // if _fgetfield then reqrite to _fast_faccess_0
   1.717 +    assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) ==
   1.718 +        Bytecodes::_aload_0,
   1.719 +        "fix bytecode definition");
   1.720 +    __ move(T3, Bytecodes::_fast_faccess_0);
   1.721 +    __ move(AT, Bytecodes::_fast_fgetfield);
   1.722 +    __ beq(AT, T2, rewrite);
   1.723 +    __ delayed()->nop();
   1.724 +
   1.725 +    // else rewrite to _fast_aload0
   1.726 +    assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) ==
   1.727 +        Bytecodes::_aload_0,
   1.728 +        "fix bytecode definition");
   1.729 +    __ move(T3, Bytecodes::_fast_aload_0);
   1.730 +
   1.731 +    // rewrite
   1.732 +    __ bind(rewrite);
   1.733 +    patch_bytecode(Bytecodes::_aload_0, T3, T2, false);
   1.734 +
   1.735 +    __ bind(done);
   1.736 +  } else {
   1.737 +    aload(0);
   1.738 +  }
   1.739  }
   1.740  
   1.741  void TemplateTable::istore() {
   1.742 -	transition(itos, vtos);
   1.743 -	locals_index(T2);
   1.744 -	__ sw(FSR, T2, 0);
   1.745 +  transition(itos, vtos);
   1.746 +  locals_index(T2);
   1.747 +  __ sw(FSR, T2, 0);
   1.748  }
   1.749  
   1.750  void TemplateTable::lstore() {
   1.751 @@ -943,9 +880,9 @@
   1.752  }
   1.753  
   1.754  void TemplateTable::fstore() {
   1.755 -	transition(ftos, vtos);
   1.756 -	locals_index(T2);
   1.757 -	__ swc1(FSF, T2, 0);
   1.758 +  transition(ftos, vtos);
   1.759 +  locals_index(T2);
   1.760 +  __ swc1(FSF, T2, 0);
   1.761  }
   1.762  
   1.763  void TemplateTable::dstore() {
   1.764 @@ -956,42 +893,38 @@
   1.765  
   1.766  void TemplateTable::astore() {
   1.767    transition(vtos, vtos);
   1.768 -  //  __ pop(FSR);
   1.769    __ pop_ptr(FSR);
   1.770    locals_index(T2);
   1.771    __ sd(FSR, T2, 0);
   1.772  }
   1.773  
   1.774  void TemplateTable::wide_istore() {
   1.775 -	transition(vtos, vtos);
   1.776 -	//  __ pop(FSR);
   1.777 -	__ pop_i(FSR);
   1.778 -	locals_index_wide(T2);
   1.779 -	__ sd(FSR, T2, 0);
   1.780 +  transition(vtos, vtos);
   1.781 +  __ pop_i(FSR);
   1.782 +  locals_index_wide(T2);
   1.783 +  __ sd(FSR, T2, 0);
   1.784  }
   1.785  
   1.786  void TemplateTable::wide_lstore() {
   1.787 -	transition(vtos, vtos);
   1.788 -	//__ pop2(FSR, SSR);
   1.789 -	//__ pop_l(FSR, SSR); 
   1.790 -	__ pop_l(FSR); //aoqi:FIXME Is this right?
   1.791 -	locals_index_wide(T2);
   1.792 -	__ sd(FSR, T2, -4);
   1.793 +  transition(vtos, vtos);
   1.794 +  __ pop_l(FSR);
   1.795 +  locals_index_wide(T2);
   1.796 +  __ sd(FSR, T2, -4);
   1.797  }
   1.798  
   1.799  void TemplateTable::wide_fstore() {
   1.800 -	wide_istore();
   1.801 +  wide_istore();
   1.802  }
   1.803  
   1.804  void TemplateTable::wide_dstore() {
   1.805 -	wide_lstore();
   1.806 +  wide_lstore();
   1.807  }
   1.808  
   1.809  void TemplateTable::wide_astore() {
   1.810 -	transition(vtos, vtos);
   1.811 -	__ pop_ptr(FSR);
   1.812 -	locals_index_wide(T2);
   1.813 -	__ sd(FSR, T2, 0);
   1.814 +  transition(vtos, vtos);
   1.815 +  __ pop_ptr(FSR);
   1.816 +  locals_index_wide(T2);
   1.817 +  __ sd(FSR, T2, 0);
   1.818  }
   1.819  
   1.820  // used register : T2
   1.821 @@ -999,16 +932,16 @@
   1.822    transition(itos, vtos);
   1.823    __ pop_i(SSR);   // T2: array  SSR: index
   1.824    if(UseBoundCheckInstruction) {
   1.825 -    __ pop_ptr(T2); 
   1.826 +    __ pop_ptr(T2);
   1.827      __ dsll(SSR, SSR, Address::times_4);
   1.828      __ dadd(SSR, T2, SSR);
   1.829      __ addi(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_INT));  // base
   1.830 -    
   1.831 -    __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());  
   1.832 +
   1.833 +    __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
   1.834      __ dsll(AT, AT, Address::times_4);
   1.835      __ dadd(AT, T2, AT);
   1.836      __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT));  //bound
   1.837 -    
   1.838 +
   1.839      __ gsswle(FSR, SSR, AT);
   1.840    } else {
   1.841      index_check(T2, SSR);  // prefer index in ebx
   1.842 @@ -1025,16 +958,16 @@
   1.843    transition(ltos, vtos);
   1.844    __ pop_i (T2);
   1.845    if(UseBoundCheckInstruction) {
   1.846 -    __ pop_ptr(T3); 
   1.847 +    __ pop_ptr(T3);
   1.848      __ dsll(T2, T2, Address::times_8);
   1.849      __ dadd(T2, T3, T2);
   1.850      __ addi(T2, T2, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);  // base
   1.851 -    
   1.852 -    __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes());  
   1.853 +
   1.854 +    __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes());
   1.855      __ dsll(AT, AT, Address::times_8);
   1.856      __ dadd(AT, T3, AT);
   1.857      __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);  //bound
   1.858 -    
   1.859 +
   1.860      __ gssdle(FSR, T2, AT);
   1.861    } else {
   1.862      index_check(T3, T2);
   1.863 @@ -1047,21 +980,21 @@
   1.864  // used register T2
   1.865  void TemplateTable::fastore() {
   1.866    transition(ftos, vtos);
   1.867 -  __ pop_i(SSR);	
   1.868 +  __ pop_i(SSR);
   1.869    if(UseBoundCheckInstruction) {
   1.870 -    __ pop_ptr(T2); 
   1.871 +    __ pop_ptr(T2);
   1.872      __ dsll(SSR, SSR, Address::times_4);
   1.873      __ dadd(SSR, T2, SSR);
   1.874      __ addi(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));  // base
   1.875 -    
   1.876 -    __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());  
   1.877 +
   1.878 +    __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
   1.879      __ dsll(AT, AT, Address::times_4);
   1.880      __ dadd(AT, T2, AT);
   1.881      __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT));  //bound
   1.882 -    
   1.883 +
   1.884      __ gsswlec1(FSF, SSR, AT);
   1.885    } else {
   1.886 -    index_check(T2, SSR); 
   1.887 +    index_check(T2, SSR);
   1.888      __ dsll(SSR, SSR, Address::times_4);
   1.889      __ dadd(T2, T2, SSR);
   1.890      __ swc1(FSF, T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
   1.891 @@ -1071,21 +1004,21 @@
   1.892  // used register T2, T3
   1.893  void TemplateTable::dastore() {
   1.894    transition(dtos, vtos);
   1.895 -  __ pop_i (T2); 
   1.896 +  __ pop_i (T2);
   1.897    if(UseBoundCheckInstruction) {
   1.898 -    __ pop_ptr(T3); 
   1.899 +    __ pop_ptr(T3);
   1.900      __ dsll(T2, T2, Address::times_8);
   1.901      __ dadd(T2, T3, T2);
   1.902      __ addi(T2, T2, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);  // base
   1.903 -    
   1.904 -    __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes());  
   1.905 +
   1.906 +    __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes());
   1.907      __ dsll(AT, AT, Address::times_8);
   1.908      __ dadd(AT, T3, AT);
   1.909      __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);  //bound
   1.910 -    
   1.911 +
   1.912      __ gssdlec1(FSF, T2, AT);
   1.913    } else {
   1.914 -    index_check(T3, T2);  
   1.915 +    index_check(T3, T2);
   1.916      __ dsll(T2, T2, Address::times_8);
   1.917      __ daddu(T3, T3, T2);
   1.918      __ sdc1(FSF, T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
   1.919 @@ -1111,11 +1044,9 @@
   1.920    __ delayed()->nop();
   1.921  
   1.922    // Move subklass into T3
   1.923 -  //__ ld(T3,  Address(FSR, oopDesc::klass_offset_in_bytes()));
   1.924    //add for compressedoops
   1.925    __ load_klass(T3, FSR);
   1.926    // Move superklass into T8
   1.927 -  //__ ld(T8, Address(T2, oopDesc::klass_offset_in_bytes()));
   1.928    //add for compressedoops
   1.929    __ load_klass(T8, T2);
   1.930    __ ld(T8, Address(T8,  ObjArrayKlass::element_klass_offset()));
   1.931 @@ -1126,7 +1057,7 @@
   1.932  
   1.933    // Generate subtype check.
   1.934    // Superklass in T8.  Subklass in T3.
   1.935 -  __ gen_subtype_check(T8, T3, ok_is_subtype);				// <-- Jin
   1.936 +  __ gen_subtype_check(T8, T3, ok_is_subtype);        // <-- Jin
   1.937    // Come here on failure
   1.938    // object is at FSR
   1.939    __ jmp(Interpreter::_throw_ArrayStoreException_entry);    // <-- Jin
   1.940 @@ -1134,8 +1065,7 @@
   1.941    // Come here on success
   1.942    __ bind(ok_is_subtype);
   1.943    //replace with do_oop_store->store_heap_oop
   1.944 -  //__ sd(FSR, T2, 0);
   1.945 -  __ store_heap_oop(Address(T2, 0), FSR);					// <-- Jin
   1.946 +  __ store_heap_oop(Address(T2, 0), FSR);          // <-- Jin
   1.947    __ store_check(T2);
   1.948    __ b(done);
   1.949    __ delayed()->nop();
   1.950 @@ -1145,8 +1075,7 @@
   1.951    __ profile_null_seen(T9);
   1.952    __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8);
   1.953    __ dadd(T2, T2, AT);
   1.954 -  //__ sd(FSR, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
   1.955 -  __ store_heap_oop(Address(T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), FSR);	/* FSR is null here */
   1.956 +  __ store_heap_oop(Address(T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), FSR);  /* FSR is null here */
   1.957  
   1.958    __ bind(done);
   1.959    __ daddi(SP, SP, 3 * Interpreter::stackElementSize);
   1.960 @@ -1154,16 +1083,16 @@
   1.961  
   1.962  void TemplateTable::bastore() {
   1.963    transition(itos, vtos);
   1.964 -  __ pop_i (SSR); 
   1.965 +  __ pop_i(SSR);
   1.966    if(UseBoundCheckInstruction) {
   1.967 -    __ pop_ptr(T2); 
   1.968 +    __ pop_ptr(T2);
   1.969      __ dadd(SSR, T2, SSR);
   1.970      __ addi(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));  // base
   1.971 -    
   1.972 -    __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());  
   1.973 +
   1.974 +    __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
   1.975      __ dadd(AT, T2, AT);
   1.976      __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE));  //bound
   1.977 -    
   1.978 +
   1.979      __ gssble(FSR, SSR, AT);
   1.980    } else {
   1.981      index_check(T2, SSR);
   1.982 @@ -1174,21 +1103,21 @@
   1.983  
   1.984  void TemplateTable::castore() {
   1.985    transition(itos, vtos);
   1.986 -  __ pop_i(SSR); 
   1.987 +  __ pop_i(SSR);
   1.988    if(UseBoundCheckInstruction) {
   1.989 -    __ pop_ptr(T2); 
   1.990 +    __ pop_ptr(T2);
   1.991      __ dsll(SSR, SSR, Address::times_2);
   1.992      __ dadd(SSR, T2, SSR);
   1.993      __ addi(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR));  // base
   1.994 -    
   1.995 -    __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());  
   1.996 +
   1.997 +    __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
   1.998      __ dsll(AT, AT, Address::times_2);
   1.999      __ dadd(AT, T2, AT);
  1.1000      __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_CHAR));  //bound
  1.1001 -    
  1.1002 +
  1.1003      __ gsshle(FSR, SSR, AT);
  1.1004    } else {
  1.1005 -    index_check(T2, SSR); 
  1.1006 +    index_check(T2, SSR);
  1.1007      __ dsll(SSR, SSR, Address::times_2);
  1.1008      __ dadd(SSR, T2, SSR);
  1.1009      __ sh(FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR));
  1.1010 @@ -1245,226 +1174,212 @@
  1.1011  
  1.1012  // blows FSR
  1.1013  void TemplateTable::dup_x1() {
  1.1014 -	transition(vtos, vtos);
  1.1015 -	// stack: ..., a, b
  1.1016 -	__ load_ptr(0, FSR);  // load b
  1.1017 -	__ load_ptr(1, A5);  // load a
  1.1018 -	__ store_ptr(1, FSR); // store b
  1.1019 -	__ store_ptr(0, A5); // store a
  1.1020 -	__ push_ptr(FSR);             // push b
  1.1021 -	// stack: ..., b, a, b
  1.1022 +  transition(vtos, vtos);
  1.1023 +  // stack: ..., a, b
  1.1024 +  __ load_ptr(0, FSR);  // load b
  1.1025 +  __ load_ptr(1, A5);  // load a
  1.1026 +  __ store_ptr(1, FSR); // store b
  1.1027 +  __ store_ptr(0, A5); // store a
  1.1028 +  __ push_ptr(FSR);             // push b
  1.1029 +  // stack: ..., b, a, b
  1.1030  }
  1.1031  
  1.1032  // blows FSR
  1.1033  void TemplateTable::dup_x2() {
  1.1034 -	transition(vtos, vtos);
  1.1035 -	// stack: ..., a, b, c
  1.1036 -	__ load_ptr(0, FSR);  // load c
  1.1037 -	__ load_ptr(2, A5);  // load a
  1.1038 -	__ store_ptr(2, FSR); // store c in a
  1.1039 -	__ push_ptr(FSR);             // push c
  1.1040 -	// stack: ..., c, b, c, c
  1.1041 -	__ load_ptr(2, FSR);  // load b
  1.1042 -	__ store_ptr(2, A5); // store a in b
  1.1043 -	// stack: ..., c, a, c, c
  1.1044 -	__ store_ptr(1, FSR); // store b in c
  1.1045 -	// stack: ..., c, a, b, c
  1.1046 +  transition(vtos, vtos);
  1.1047 +  // stack: ..., a, b, c
  1.1048 +  __ load_ptr(0, FSR);  // load c
  1.1049 +  __ load_ptr(2, A5);  // load a
  1.1050 +  __ store_ptr(2, FSR); // store c in a
  1.1051 +  __ push_ptr(FSR);             // push c
  1.1052 +  // stack: ..., c, b, c, c
  1.1053 +  __ load_ptr(2, FSR);  // load b
  1.1054 +  __ store_ptr(2, A5); // store a in b
  1.1055 +  // stack: ..., c, a, c, c
  1.1056 +  __ store_ptr(1, FSR); // store b in c
  1.1057 +  // stack: ..., c, a, b, c
  1.1058  }
  1.1059  
  1.1060  // blows FSR
  1.1061  void TemplateTable::dup2() {
  1.1062 -	transition(vtos, vtos);
  1.1063 -	// stack: ..., a, b
  1.1064 -	__ load_ptr(1, FSR);  // load a
  1.1065 -	__ push_ptr(FSR);             // push a
  1.1066 -	__ load_ptr(1, FSR);  // load b
  1.1067 -	__ push_ptr(FSR);             // push b
  1.1068 -	// stack: ..., a, b, a, b
  1.1069 +  transition(vtos, vtos);
  1.1070 +  // stack: ..., a, b
  1.1071 +  __ load_ptr(1, FSR);  // load a
  1.1072 +  __ push_ptr(FSR);             // push a
  1.1073 +  __ load_ptr(1, FSR);  // load b
  1.1074 +  __ push_ptr(FSR);             // push b
  1.1075 +  // stack: ..., a, b, a, b
  1.1076  }
  1.1077  
  1.1078  // blows FSR
  1.1079  void TemplateTable::dup2_x1() {
  1.1080 -	transition(vtos, vtos);
  1.1081 -	// stack: ..., a, b, c
  1.1082 -	__ load_ptr(0, T2);  // load c
  1.1083 -	__ load_ptr(1, FSR);  // load b
  1.1084 -	__ push_ptr(FSR);             // push b
  1.1085 -	__ push_ptr(T2);             // push c
  1.1086 -	// stack: ..., a, b, c, b, c
  1.1087 -	__ store_ptr(3, T2); // store c in b
  1.1088 -	// stack: ..., a, c, c, b, c
  1.1089 -	__ load_ptr(4, T2);  // load a
  1.1090 -	__ store_ptr(2, T2); // store a in 2nd c
  1.1091 -	// stack: ..., a, c, a, b, c
  1.1092 -	__ store_ptr(4, FSR); // store b in a
  1.1093 -	// stack: ..., b, c, a, b, c
  1.1094 -
  1.1095 -	// stack: ..., b, c, a, b, c
  1.1096 +  transition(vtos, vtos);
  1.1097 +  // stack: ..., a, b, c
  1.1098 +  __ load_ptr(0, T2);  // load c
  1.1099 +  __ load_ptr(1, FSR);  // load b
  1.1100 +  __ push_ptr(FSR);             // push b
  1.1101 +  __ push_ptr(T2);             // push c
  1.1102 +  // stack: ..., a, b, c, b, c
  1.1103 +  __ store_ptr(3, T2); // store c in b
  1.1104 +  // stack: ..., a, c, c, b, c
  1.1105 +  __ load_ptr(4, T2);  // load a
  1.1106 +  __ store_ptr(2, T2); // store a in 2nd c
  1.1107 +  // stack: ..., a, c, a, b, c
  1.1108 +  __ store_ptr(4, FSR); // store b in a
  1.1109 +  // stack: ..., b, c, a, b, c
  1.1110 +
  1.1111 +  // stack: ..., b, c, a, b, c
  1.1112  }
  1.1113  
  1.1114  // blows FSR, SSR
  1.1115  void TemplateTable::dup2_x2() {
  1.1116 -	transition(vtos, vtos);
  1.1117 -	// stack: ..., a, b, c, d
  1.1118 -	// stack: ..., a, b, c, d
  1.1119 -	__ load_ptr(0, T2);  // load d
  1.1120 -	__ load_ptr(1, FSR);  // load c
  1.1121 -	__ push_ptr(FSR);             // push c
  1.1122 -	__ push_ptr(T2);             // push d
  1.1123 -	// stack: ..., a, b, c, d, c, d
  1.1124 -	__ load_ptr(4, FSR);  // load b
  1.1125 -	__ store_ptr(2, FSR); // store b in d
  1.1126 -	__ store_ptr(4, T2); // store d in b
  1.1127 -	// stack: ..., a, d, c, b, c, d
  1.1128 -	__ load_ptr(5, T2);  // load a
  1.1129 -	__ load_ptr(3, FSR);  // load c
  1.1130 -	__ store_ptr(3, T2); // store a in c
  1.1131 -	__ store_ptr(5, FSR); // store c in a
  1.1132 -	// stack: ..., c, d, a, b, c, d
  1.1133 -
  1.1134 -	// stack: ..., c, d, a, b, c, d
  1.1135 +  transition(vtos, vtos);
  1.1136 +  // stack: ..., a, b, c, d
  1.1137 +  // stack: ..., a, b, c, d
  1.1138 +  __ load_ptr(0, T2);  // load d
  1.1139 +  __ load_ptr(1, FSR);  // load c
  1.1140 +  __ push_ptr(FSR);             // push c
  1.1141 +  __ push_ptr(T2);             // push d
  1.1142 +  // stack: ..., a, b, c, d, c, d
  1.1143 +  __ load_ptr(4, FSR);  // load b
  1.1144 +  __ store_ptr(2, FSR); // store b in d
  1.1145 +  __ store_ptr(4, T2); // store d in b
  1.1146 +  // stack: ..., a, d, c, b, c, d
  1.1147 +  __ load_ptr(5, T2);  // load a
  1.1148 +  __ load_ptr(3, FSR);  // load c
  1.1149 +  __ store_ptr(3, T2); // store a in c
  1.1150 +  __ store_ptr(5, FSR); // store c in a
  1.1151 +  // stack: ..., c, d, a, b, c, d
  1.1152 +
  1.1153 +  // stack: ..., c, d, a, b, c, d
  1.1154  }
  1.1155  
  1.1156  // blows FSR
  1.1157  void TemplateTable::swap() {
  1.1158 -	transition(vtos, vtos);
  1.1159 -	// stack: ..., a, b
  1.1160 -
  1.1161 -	__ load_ptr(1, A5);  // load a
  1.1162 -	__ load_ptr(0, FSR);  // load b
  1.1163 -	__ store_ptr(0, A5); // store a in b
  1.1164 -	__ store_ptr(1, FSR); // store b in a
  1.1165 -
  1.1166 -	// stack: ..., b, a
  1.1167 +  transition(vtos, vtos);
  1.1168 +  // stack: ..., a, b
  1.1169 +
  1.1170 +  __ load_ptr(1, A5);  // load a
  1.1171 +  __ load_ptr(0, FSR);  // load b
  1.1172 +  __ store_ptr(0, A5); // store a in b
  1.1173 +  __ store_ptr(1, FSR); // store b in a
  1.1174 +
  1.1175 +  // stack: ..., b, a
  1.1176  }
  1.1177  
  1.1178  void TemplateTable::iop2(Operation op) {
  1.1179 -	transition(itos, itos);
  1.1180 -	switch (op) {
  1.1181 -		case add  :                    
  1.1182 -			__ pop_i(SSR); 
  1.1183 -			__ addu32(FSR, SSR, FSR); 
  1.1184 -			break;
  1.1185 -		case sub  :  
  1.1186 -			__ pop_i(SSR); 
  1.1187 -			__ subu32(FSR, SSR, FSR); 
  1.1188 -			break;
  1.1189 -		case mul  :                    
  1.1190 -			__ lw(SSR, SP, 0);
  1.1191 -			__ daddi(SP, SP, wordSize);
  1.1192 +  transition(itos, itos);
  1.1193 +  switch (op) {
  1.1194 +    case add  :
  1.1195 +      __ pop_i(SSR);
  1.1196 +      __ addu32(FSR, SSR, FSR);
  1.1197 +      break;
  1.1198 +    case sub  :
  1.1199 +      __ pop_i(SSR);
  1.1200 +      __ subu32(FSR, SSR, FSR);
  1.1201 +      break;
  1.1202 +    case mul  :
  1.1203 +      __ lw(SSR, SP, 0);
  1.1204 +      __ daddi(SP, SP, wordSize);
  1.1205                          __ mul(FSR, SSR, FSR);
  1.1206 -			break;
  1.1207 -		case _and :                    
  1.1208 -			__ pop_i(SSR); 
  1.1209 -			__ andr(FSR, SSR, FSR); 
  1.1210 -			break;
  1.1211 -		case _or  :                    
  1.1212 -			__ pop_i(SSR); 
  1.1213 -			__ orr(FSR, SSR, FSR); 
  1.1214 -			break;
  1.1215 -		case _xor :                    
  1.1216 -			__ pop_i(SSR); 
  1.1217 -			__ xorr(FSR, SSR, FSR); 
  1.1218 -			break;
  1.1219 -		case shl  : 
  1.1220 -			__ pop_i(SSR); 
  1.1221 -			__ sllv(FSR, SSR, FSR);      
  1.1222 -			break; // implicit masking of lower 5 bits by Intel shift instr. mips also
  1.1223 -		case shr  : 
  1.1224 -			__ pop_i(SSR); 
  1.1225 -			__ srav(FSR, SSR, FSR);      
  1.1226 -			break; // implicit masking of lower 5 bits by Intel shift instr. mips also
  1.1227 -		case ushr : 
  1.1228 -			__ pop_i(SSR); 
  1.1229 -			__ srlv(FSR, SSR, FSR);     
  1.1230 -			break; // implicit masking of lower 5 bits by Intel shift instr. mips also
  1.1231 -		default   : ShouldNotReachHere();
  1.1232 -	}
  1.1233 +      break;
  1.1234 +    case _and :
  1.1235 +      __ pop_i(SSR);
  1.1236 +      __ andr(FSR, SSR, FSR);
  1.1237 +      break;
  1.1238 +    case _or  :
  1.1239 +      __ pop_i(SSR);
  1.1240 +      __ orr(FSR, SSR, FSR);
  1.1241 +      break;
  1.1242 +    case _xor :
  1.1243 +      __ pop_i(SSR);
  1.1244 +      __ xorr(FSR, SSR, FSR);
  1.1245 +      break;
  1.1246 +    case shl  :
  1.1247 +      __ pop_i(SSR);
  1.1248 +      __ sllv(FSR, SSR, FSR);
  1.1249 +      break; // implicit masking of lower 5 bits by Intel shift instr. mips also
  1.1250 +    case shr  :
  1.1251 +      __ pop_i(SSR);
  1.1252 +      __ srav(FSR, SSR, FSR);
  1.1253 +      break; // implicit masking of lower 5 bits by Intel shift instr. mips also
  1.1254 +    case ushr :
  1.1255 +      __ pop_i(SSR);
  1.1256 +      __ srlv(FSR, SSR, FSR);
  1.1257 +      break; // implicit masking of lower 5 bits by Intel shift instr. mips also
  1.1258 +    default   : ShouldNotReachHere();
  1.1259 +  }
  1.1260  }
  1.1261  
  1.1262  // the result stored in FSR, SSR,
  1.1263  // used registers : T2, T3
  1.1264 -//FIXME, aoqi
  1.1265  void TemplateTable::lop2(Operation op) {
  1.1266    transition(ltos, ltos);
  1.1267 -  //__ pop2(T2, T3);
  1.1268    __ pop_l(T2, T3);
  1.1269  #ifdef ASSERT
  1.1270    {
  1.1271      Label  L;
  1.1272      __ beq(T3, R0, L);
  1.1273      __ delayed()->nop();
  1.1274 -    // FIXME: stack verification required
  1.1275 -//    __ stop("lop2, wrong stack");  // <--- Fu 20130930
  1.1276      __ bind(L);
  1.1277    }
  1.1278  #endif
  1.1279    switch (op) {
  1.1280 -    case add : 
  1.1281 +    case add :
  1.1282        __ daddu(FSR, T2, FSR);
  1.1283 -      //__ sltu(AT, FSR, T2);
  1.1284 -      //__ daddu(SSR, T3, SSR);
  1.1285 -      //__ daddu(SSR, SSR, AT); 
  1.1286        break;
  1.1287      case sub :
  1.1288        __ dsubu(FSR, T2, FSR);
  1.1289 -      //__ sltu(AT, T2, FSR);
  1.1290 -      //__ dsubu(SSR, T3, SSR);
  1.1291 -      //__ dsubu(SSR, SSR, AT);
  1.1292        break;
  1.1293 -    case _and: 
  1.1294 -      __ andr(FSR, T2, FSR); 
  1.1295 -      //__ andr(SSR, T3, SSR); 
  1.1296 +    case _and:
  1.1297 +      __ andr(FSR, T2, FSR);
  1.1298        break;
  1.1299 -    case _or : 
  1.1300 -      __ orr(FSR, T2, FSR); 
  1.1301 -      //__ orr(SSR, T3, SSR); 
  1.1302 +    case _or :
  1.1303 +      __ orr(FSR, T2, FSR);
  1.1304        break;
  1.1305 -    case _xor: 
  1.1306 -      __ xorr(FSR, T2, FSR); 
  1.1307 -      //__ xorr(SSR, T3, SSR); 
  1.1308 +    case _xor:
  1.1309 +      __ xorr(FSR, T2, FSR);
  1.1310        break;
  1.1311      default : ShouldNotReachHere();
  1.1312    }
  1.1313  }
  1.1314  
  1.1315 -// java require this bytecode could handle 0x80000000/-1, dont cause a overflow exception, 
  1.1316 +// java require this bytecode could handle 0x80000000/-1, dont cause a overflow exception,
  1.1317  // the result is 0x80000000
  1.1318  // the godson2 cpu do the same, so we need not handle this specially like x86
  1.1319  void TemplateTable::idiv() {
  1.1320 -	transition(itos, itos);
  1.1321 -	Label not_zero;
  1.1322 -
  1.1323 -	__ bne(FSR, R0, not_zero);
  1.1324 -	__ delayed()->nop();
  1.1325 -	__ jmp(Interpreter::_throw_ArithmeticException_entry); 
  1.1326 -	__ delayed()->nop();
  1.1327 -	__ bind(not_zero);
  1.1328 -
  1.1329 -	__ pop_i(SSR);
  1.1330 -        if (UseLoongsonISA) {
  1.1331 -          __ gsdiv(FSR, SSR, FSR);
  1.1332 -        } else {
  1.1333 -	  __ div(SSR, FSR);
  1.1334 -	  __ mflo(FSR);
  1.1335 -        }
  1.1336 +  transition(itos, itos);
  1.1337 +  Label not_zero;
  1.1338 +
  1.1339 +  __ bne(FSR, R0, not_zero);
  1.1340 +  __ delayed()->nop();
  1.1341 +  __ jmp(Interpreter::_throw_ArithmeticException_entry);
  1.1342 +  __ delayed()->nop();
  1.1343 +  __ bind(not_zero);
  1.1344 +
  1.1345 +  __ pop_i(SSR);
  1.1346 +  if (UseLoongsonISA) {
  1.1347 +    __ gsdiv(FSR, SSR, FSR);
  1.1348 +  } else {
  1.1349 +    __ div(SSR, FSR);
  1.1350 +    __ mflo(FSR);
  1.1351 +  }
  1.1352  }
  1.1353  
  1.1354  void TemplateTable::irem() {
  1.1355 -	transition(itos, itos);
  1.1356 -	Label not_zero;
  1.1357 -	//__ pop(SSR);
  1.1358 -	__ pop_i(SSR);
  1.1359 -	__ div(SSR, FSR);
  1.1360 -
  1.1361 -	__ bne(FSR, R0, not_zero);
  1.1362 -	__ delayed()->nop();
  1.1363 -	//__ brk(7);
  1.1364 -	__ jmp(Interpreter::_throw_ArithmeticException_entry);
  1.1365 -	__ delayed()->nop();
  1.1366 -
  1.1367 -	__ bind(not_zero);
  1.1368 -	__ mfhi(FSR);
  1.1369 +  transition(itos, itos);
  1.1370 +  Label not_zero;
  1.1371 +  __ pop_i(SSR);
  1.1372 +  __ div(SSR, FSR);
  1.1373 +
  1.1374 +  __ bne(FSR, R0, not_zero);
  1.1375 +  __ delayed()->nop();
  1.1376 +  //__ brk(7);
  1.1377 +  __ jmp(Interpreter::_throw_ArithmeticException_entry);
  1.1378 +  __ delayed()->nop();
  1.1379 +
  1.1380 +  __ bind(not_zero);
  1.1381 +  __ mfhi(FSR);
  1.1382  }
  1.1383  
  1.1384  void TemplateTable::lmul() {
  1.1385 @@ -1473,10 +1388,10 @@
  1.1386    if(UseLoongsonISA){
  1.1387      __ gsdmult(FSR, T2, FSR);
  1.1388    } else {
  1.1389 -      __ dmult(T2, FSR);
  1.1390 -      __ mflo(FSR);
  1.1391 -  } 
  1.1392 -} 
  1.1393 +    __ dmult(T2, FSR);
  1.1394 +    __ mflo(FSR);
  1.1395 +  }
  1.1396 +}
  1.1397  
  1.1398  // NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
  1.1399  void TemplateTable::ldiv() {
  1.1400 @@ -1486,14 +1401,14 @@
  1.1401    __ bne(FSR, R0, normal);
  1.1402    __ delayed()->nop();
  1.1403  
  1.1404 -  //__ brk(7);		//generate FPE
  1.1405 +  //__ brk(7);    //generate FPE
  1.1406    __ jmp(Interpreter::_throw_ArithmeticException_entry);
  1.1407    __ delayed()->nop();
  1.1408  
  1.1409    __ bind(normal);
  1.1410    __ pop_l(A2, A3);
  1.1411    if (UseLoongsonISA) {
  1.1412 -    __ gsddiv(FSR, A2, FSR);	
  1.1413 +    __ gsddiv(FSR, A2, FSR);
  1.1414    } else {
  1.1415      __ ddiv(A2, FSR);
  1.1416      __ mflo(FSR);
  1.1417 @@ -1512,11 +1427,11 @@
  1.1418    __ delayed()->nop();
  1.1419  
  1.1420    __ bind(normal);
  1.1421 -  __ pop_l (A2, A3); 
  1.1422 +  __ pop_l (A2, A3);
  1.1423  
  1.1424    if(UseLoongsonISA){
  1.1425      __ gsdmod(FSR, A2, FSR);
  1.1426 -  } else { 
  1.1427 +  } else {
  1.1428      __ ddiv(A2, FSR);
  1.1429      __ mfhi(FSR);
  1.1430    }
  1.1431 @@ -1526,24 +1441,24 @@
  1.1432  // used registers : T0
  1.1433  void TemplateTable::lshl() {
  1.1434    transition(itos, ltos);
  1.1435 -  __ pop_l(T0, T1);	
  1.1436 +  __ pop_l(T0, T1);
  1.1437  #ifdef ASSERT
  1.1438    {
  1.1439      Label  L;
  1.1440      __ beq(T1, R0, L);
  1.1441      __ delayed()->nop();
  1.1442 -    //__ stop("lshl, wrong stack");  // <-- Fu 20130930 
  1.1443 +    //__ stop("lshl, wrong stack");  // <-- Fu 20130930
  1.1444      __ bind(L);
  1.1445    }
  1.1446  #endif
  1.1447 -  __ andi(FSR, FSR, 0x3f);	      // the bit to be shifted
  1.1448 +  __ andi(FSR, FSR, 0x3f);        // the bit to be shifted
  1.1449    __ dsllv(FSR, T0, FSR);
  1.1450  }
  1.1451  
  1.1452  // used registers : T0
  1.1453  void TemplateTable::lshr() {
  1.1454    transition(itos, ltos);
  1.1455 -  __ pop_l(T0, T1);	
  1.1456 +  __ pop_l(T0, T1);
  1.1457  #ifdef ASSERT
  1.1458    {
  1.1459      Label  L;
  1.1460 @@ -1553,14 +1468,14 @@
  1.1461      __ bind(L);
  1.1462    }
  1.1463  #endif
  1.1464 -  __ andi(FSR, FSR, 0x3f);				// the bit to be shifted
  1.1465 +  __ andi(FSR, FSR, 0x3f);        // the bit to be shifted
  1.1466    __ dsrav(FSR, T0, FSR);
  1.1467  }
  1.1468  
  1.1469  // used registers : T0
  1.1470  void TemplateTable::lushr() {
  1.1471    transition(itos, ltos);
  1.1472 -  __ pop_l(T0, T1);	
  1.1473 +  __ pop_l(T0, T1);
  1.1474  #ifdef ASSERT
  1.1475    {
  1.1476      Label  L;
  1.1477 @@ -1570,135 +1485,119 @@
  1.1478      __ bind(L);
  1.1479    }
  1.1480  #endif
  1.1481 -  __ andi(FSR, FSR, 0x3f);				// the bit to be shifted
  1.1482 +  __ andi(FSR, FSR, 0x3f);        // the bit to be shifted
  1.1483    __ dsrlv(FSR, T0, FSR);
  1.1484  }
  1.1485  
  1.1486  // result in FSF
  1.1487  void TemplateTable::fop2(Operation op) {
  1.1488 -	transition(ftos, ftos);
  1.1489 -	__ pop_ftos_to_esp();  // pop ftos into esp
  1.1490 -	switch (op) {
  1.1491 -		case add:
  1.1492 -			__ lwc1(FTF, at_sp());
  1.1493 -			__ add_s(FSF, FTF, FSF);
  1.1494 -			break;
  1.1495 -		case sub: 
  1.1496 -			__ lwc1(FTF, at_sp());
  1.1497 -			__ sub_s(FSF, FTF, FSF);
  1.1498 -			break;
  1.1499 -		case mul: 
  1.1500 -			__ lwc1(FTF, at_sp());
  1.1501 -			__ mul_s(FSF, FTF, FSF);
  1.1502 -			break;
  1.1503 -		case div: 
  1.1504 -			__ lwc1(FTF, at_sp());
  1.1505 -			__ div_s(FSF, FTF, FSF);
  1.1506 -			break;
  1.1507 -		case rem: 
  1.1508 -            __ mov_s(F13, FSF);
  1.1509 -			__ lwc1(F12, at_sp());
  1.1510 -            __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2);
  1.1511 -			break;
  1.1512 -		default : ShouldNotReachHere();
  1.1513 -	}
  1.1514 -
  1.1515 -	__ daddi(SP, SP, 1 * wordSize);
  1.1516 +  transition(ftos, ftos);
  1.1517 +  switch (op) {
  1.1518 +    case add:
  1.1519 +      __ lwc1(FTF, at_sp());
  1.1520 +      __ add_s(FSF, FTF, FSF);
  1.1521 +      break;
  1.1522 +    case sub:
  1.1523 +      __ lwc1(FTF, at_sp());
  1.1524 +      __ sub_s(FSF, FTF, FSF);
  1.1525 +      break;
  1.1526 +    case mul:
  1.1527 +      __ lwc1(FTF, at_sp());
  1.1528 +      __ mul_s(FSF, FTF, FSF);
  1.1529 +      break;
  1.1530 +    case div:
  1.1531 +      __ lwc1(FTF, at_sp());
  1.1532 +      __ div_s(FSF, FTF, FSF);
  1.1533 +      break;
  1.1534 +    case rem:
  1.1535 +      __ mov_s(F13, FSF);
  1.1536 +      __ lwc1(F12, at_sp());
  1.1537 +       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2);
  1.1538 +      break;
  1.1539 +    default : ShouldNotReachHere();
  1.1540 +  }
  1.1541 +
  1.1542 +  __ daddi(SP, SP, 1 * wordSize);
  1.1543  }
  1.1544  
  1.1545  // result in SSF||FSF
  1.1546  // i dont handle the strict flags
  1.1547  void TemplateTable::dop2(Operation op) {
  1.1548 -	transition(dtos, dtos);
  1.1549 -	__ pop_dtos_to_esp();  // pop dtos into esp
  1.1550 -	switch (op) {
  1.1551 -		case add: 
  1.1552 -			__ ldc1(FTF, at_sp());
  1.1553 -			__ add_d(FSF, FTF, FSF);
  1.1554 -			break;
  1.1555 -		case sub: 
  1.1556 -			__ ldc1(FTF, at_sp());
  1.1557 -			__ sub_d(FSF, FTF, FSF);
  1.1558 -			break;
  1.1559 -		case mul: 
  1.1560 -			__ ldc1(FTF, at_sp());
  1.1561 -			__ mul_d(FSF, FTF, FSF);
  1.1562 -			break;
  1.1563 -		case div:
  1.1564 -			__ ldc1(FTF, at_sp());
  1.1565 -			__ div_d(FSF, FTF, FSF);
  1.1566 -			break;
  1.1567 -		case rem:
  1.1568 -            __ mov_d(F13, FSF);
  1.1569 -            __ ldc1(F12, at_sp());
  1.1570 -            __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2);
  1.1571 -			break;
  1.1572 -		default : ShouldNotReachHere();
  1.1573 -	}
  1.1574 -
  1.1575 -	__ daddi(SP, SP, 2 * wordSize);
  1.1576 +  transition(dtos, dtos);
  1.1577 +  switch (op) {
  1.1578 +    case add:
  1.1579 +      __ ldc1(FTF, at_sp());
  1.1580 +      __ add_d(FSF, FTF, FSF);
  1.1581 +      break;
  1.1582 +    case sub:
  1.1583 +      __ ldc1(FTF, at_sp());
  1.1584 +      __ sub_d(FSF, FTF, FSF);
  1.1585 +      break;
  1.1586 +    case mul:
  1.1587 +      __ ldc1(FTF, at_sp());
  1.1588 +      __ mul_d(FSF, FTF, FSF);
  1.1589 +      break;
  1.1590 +    case div:
  1.1591 +      __ ldc1(FTF, at_sp());
  1.1592 +      __ div_d(FSF, FTF, FSF);
  1.1593 +      break;
  1.1594 +    case rem:
  1.1595 +      __ mov_d(F13, FSF);
  1.1596 +      __ ldc1(F12, at_sp());
  1.1597 +      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2);
  1.1598 +      break;
  1.1599 +    default : ShouldNotReachHere();
  1.1600 +  }
  1.1601 +
  1.1602 +  __ daddi(SP, SP, 2 * wordSize);
  1.1603  }
  1.1604  
  1.1605  void TemplateTable::ineg() {
  1.1606 -	transition(itos, itos);
  1.1607 -	__ neg(FSR);
  1.1608 +  transition(itos, itos);
  1.1609 +  __ neg(FSR);
  1.1610  }
  1.1611  
  1.1612  void TemplateTable::lneg() {
  1.1613 -	transition(ltos, ltos);
  1.1614 -	__ dsubu(FSR, R0, FSR);
  1.1615 +  transition(ltos, ltos);
  1.1616 +  __ dsubu(FSR, R0, FSR);
  1.1617  }
  1.1618 -/*
  1.1619 -// Note: 'double' and 'long long' have 32-bits alignment on x86.
  1.1620 -static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  1.1621 -  // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  1.1622 -  // of 128-bits operands for SSE instructions.
  1.1623 -  jlong *operand = (jlong*)(((intptr_t)adr)&((intptr_t)(~0xF)));
  1.1624 -  // Store the value to a 128-bits operand.
  1.1625 -  operand[0] = lo;
  1.1626 -  operand[1] = hi;
  1.1627 -  return operand;
  1.1628 +
  1.1629 +void TemplateTable::fneg() {
  1.1630 +  transition(ftos, ftos);
  1.1631 +  __ neg_s(FSF, FSF);
  1.1632  }
  1.1633  
  1.1634 -// Buffer for 128-bits masks used by SSE instructions.
  1.1635 -static jlong float_signflip_pool[2*2];
  1.1636 -static jlong double_signflip_pool[2*2];
  1.1637 -*/
  1.1638 -void TemplateTable::fneg() {
  1.1639 -	transition(ftos, ftos);
  1.1640 -	__ neg_s(FSF, FSF);
  1.1641 -}
  1.1642 -
  1.1643  void TemplateTable::dneg() {
  1.1644 -	transition(dtos, dtos);
  1.1645 -	__ neg_d(FSF, FSF);
  1.1646 +  transition(dtos, dtos);
  1.1647 +  __ neg_d(FSF, FSF);
  1.1648  }
  1.1649  
  1.1650  // used registers : T2
  1.1651  void TemplateTable::iinc() {
  1.1652 -	transition(vtos, vtos);
  1.1653 -	locals_index(T2);
  1.1654 -	__ lw(FSR, T2, 0);
  1.1655 -	__ lb(AT, at_bcp(2));           // get constant
  1.1656 -	__ daddu(FSR, FSR, AT);
  1.1657 -	__ sw(FSR, T2, 0);
  1.1658 +  transition(vtos, vtos);
  1.1659 +  locals_index(T2);
  1.1660 +  __ lw(FSR, T2, 0);
  1.1661 +  __ lb(AT, at_bcp(2));           // get constant
  1.1662 +  __ daddu(FSR, FSR, AT);
  1.1663 +  __ sw(FSR, T2, 0);
  1.1664  }
  1.1665  
  1.1666  // used register : T2
  1.1667  void TemplateTable::wide_iinc() {
  1.1668 -	transition(vtos, vtos);
  1.1669 -	locals_index_wide(T2);
  1.1670 -	__ get_2_byte_integer_at_bcp(FSR, AT, 4);
  1.1671 -	__ hswap(FSR);
  1.1672 -	__ lw(AT, T2, 0);
  1.1673 -	__ daddu(FSR, AT, FSR);
  1.1674 -	__ sw(FSR, T2, 0);
  1.1675 +  transition(vtos, vtos);
  1.1676 +  locals_index_wide(T2);
  1.1677 +  __ get_2_byte_integer_at_bcp(FSR, AT, 4);
  1.1678 +  __ hswap(FSR);
  1.1679 +  __ lw(AT, T2, 0);
  1.1680 +  __ daddu(FSR, AT, FSR);
  1.1681 +  __ sw(FSR, T2, 0);
  1.1682  }
  1.1683  
  1.1684  void TemplateTable::convert() {
  1.1685    // Checking
  1.1686  #ifdef ASSERT
  1.1687 -  { TosState tos_in  = ilgl;
  1.1688 +  {
  1.1689 +    TosState tos_in  = ilgl;
  1.1690      TosState tos_out = ilgl;
  1.1691      switch (bytecode()) {
  1.1692        case Bytecodes::_i2l: // fall through
  1.1693 @@ -1744,7 +1643,6 @@
  1.1694    // (Note: use pushl(ecx)/popl(ecx) for 1/2-word stack-ptr manipulation)
  1.1695    switch (bytecode()) {
  1.1696      case Bytecodes::_i2l:
  1.1697 -      //__ extend_sign(SSR, FSR);
  1.1698        __ sll(FSR, FSR, 0);
  1.1699        break;
  1.1700      case Bytecodes::_i2f:
  1.1701 @@ -1766,112 +1664,108 @@
  1.1702        break;
  1.1703      case Bytecodes::_l2i:
  1.1704        __ sll(FSR, FSR, 0);
  1.1705 -      //__ dsll32(FSR, FSR, 0);
  1.1706 -      //__ dsra32(FSR, FSR, 0);
  1.1707        break;
  1.1708      case Bytecodes::_l2f:
  1.1709        __ dmtc1(FSR, FSF);
  1.1710 -      //__ mtc1(SSR, SSF);
  1.1711        __ cvt_s_l(FSF, FSF);
  1.1712        break;
  1.1713      case Bytecodes::_l2d:
  1.1714        __ dmtc1(FSR, FSF);
  1.1715 -      //__ mtc1(SSR, SSF);
  1.1716        __ cvt_d_l(FSF, FSF);
  1.1717        break;
  1.1718      case Bytecodes::_f2i:
  1.1719 -      {
  1.1720 -	Label L;
  1.1721 -
  1.1722 -    __ trunc_w_s(F12, FSF);
  1.1723 -    __ move(AT, 0x7fffffff);
  1.1724 -    __ mfc1(FSR, F12);
  1.1725 -    __ c_un_s(FSF, FSF);    //NaN?
  1.1726 -    __ movt(FSR, R0);
  1.1727 -    
  1.1728 -    __ bne(AT, FSR, L);
  1.1729 -    __ delayed()->lui(T9, 0x8000);
  1.1730 -
  1.1731 -    __ mfc1(AT, FSF);
  1.1732 -    __ andr(AT, AT, T9);
  1.1733 -
  1.1734 -    __ movn(FSR, T9, AT);
  1.1735 -    
  1.1736 -    __ bind(L);
  1.1737 -      }
  1.1738 +    {
  1.1739 +      Label L;
  1.1740 +
  1.1741 +      __ trunc_w_s(F12, FSF);
  1.1742 +      __ move(AT, 0x7fffffff);
  1.1743 +      __ mfc1(FSR, F12);
  1.1744 +      __ c_un_s(FSF, FSF);    //NaN?
  1.1745 +      __ movt(FSR, R0);
  1.1746 +
  1.1747 +      __ bne(AT, FSR, L);
  1.1748 +      __ delayed()->lui(T9, 0x8000);
  1.1749 +
  1.1750 +      __ mfc1(AT, FSF);
  1.1751 +      __ andr(AT, AT, T9);
  1.1752 +
  1.1753 +      __ movn(FSR, T9, AT);
  1.1754 +
  1.1755 +      __ bind(L);
  1.1756 +    }
  1.1757        break;
  1.1758      case Bytecodes::_f2l:
  1.1759 -      {
  1.1760 -	Label L;
  1.1761 -
  1.1762 -    __ trunc_l_s(F12, FSF);
  1.1763 -    __ daddiu(AT, R0, -1);
  1.1764 -    __ dsrl(AT, AT, 1);
  1.1765 -    __ dmfc1(FSR, F12);
  1.1766 -    __ c_un_s(FSF, FSF);    //NaN?
  1.1767 -    __ movt(FSR, R0);
  1.1768 -    
  1.1769 -    __ bne(AT, FSR, L);
  1.1770 -    __ delayed()->lui(T9, 0x8000);
  1.1771 -
  1.1772 -    __ mfc1(AT, FSF);
  1.1773 -    __ andr(AT, AT, T9);
  1.1774 -
  1.1775 -    __ dsll32(T9, T9, 0);
  1.1776 -    __ movn(FSR, T9, AT);
  1.1777 -
  1.1778 -	__ bind(L);
  1.1779 -      }
  1.1780 +    {
  1.1781 +      Label L;
  1.1782 +
  1.1783 +      __ trunc_l_s(F12, FSF);
  1.1784 +      __ daddiu(AT, R0, -1);
  1.1785 +      __ dsrl(AT, AT, 1);
  1.1786 +      __ dmfc1(FSR, F12);
  1.1787 +      __ c_un_s(FSF, FSF);    //NaN?
  1.1788 +      __ movt(FSR, R0);
  1.1789 +
  1.1790 +      __ bne(AT, FSR, L);
  1.1791 +      __ delayed()->lui(T9, 0x8000);
  1.1792 +
  1.1793 +      __ mfc1(AT, FSF);
  1.1794 +      __ andr(AT, AT, T9);
  1.1795 +
  1.1796 +      __ dsll32(T9, T9, 0);
  1.1797 +      __ movn(FSR, T9, AT);
  1.1798 +
  1.1799 +      __ bind(L);
  1.1800 +    }
  1.1801        break;
  1.1802      case Bytecodes::_f2d:
  1.1803        __ cvt_d_s(FSF, FSF);
  1.1804        break;
  1.1805      case Bytecodes::_d2i:
  1.1806 -      {
  1.1807 -	Label L;
  1.1808 -
  1.1809 -    __ trunc_w_d(F12, FSF);
  1.1810 -    __ move(AT, 0x7fffffff);
  1.1811 -    __ mfc1(FSR, F12);
  1.1812 -    
  1.1813 -    __ bne(FSR, AT, L);
  1.1814 -    __ delayed()->mtc1(R0, F12);
  1.1815 -
  1.1816 -    __ cvt_d_w(F12, F12); 
  1.1817 -    __ c_ult_d(FSF, F12);
  1.1818 -    __ bc1f(L);
  1.1819 -    __ delayed()->addiu(T9, R0, -1);
  1.1820 -
  1.1821 -    __ c_un_d(FSF, FSF);    //NaN?
  1.1822 -    __ subu32(FSR, T9, AT);
  1.1823 -    __ movt(FSR, R0);
  1.1824 -
  1.1825 -	__ bind(L);
  1.1826 -      }
  1.1827 +    {
  1.1828 +      Label L;
  1.1829 +
  1.1830 +      __ trunc_w_d(F12, FSF);
  1.1831 +      __ move(AT, 0x7fffffff);
  1.1832 +      __ mfc1(FSR, F12);
  1.1833 +
  1.1834 +      __ bne(FSR, AT, L);
  1.1835 +      __ delayed()->mtc1(R0, F12);
  1.1836 +
  1.1837 +      __ cvt_d_w(F12, F12);
  1.1838 +      __ c_ult_d(FSF, F12);
  1.1839 +      __ bc1f(L);
  1.1840 +      __ delayed()->addiu(T9, R0, -1);
  1.1841 +
  1.1842 +      __ c_un_d(FSF, FSF);    //NaN?
  1.1843 +      __ subu32(FSR, T9, AT);
  1.1844 +      __ movt(FSR, R0);
  1.1845 +
  1.1846 +      __ bind(L);
  1.1847 +    }
  1.1848        break;
  1.1849      case Bytecodes::_d2l:
  1.1850 -      {
  1.1851 -	Label L;
  1.1852 -
  1.1853 -    __ trunc_l_d(F12, FSF);
  1.1854 -    __ daddiu(AT, R0, -1);
  1.1855 -    __ dsrl(AT, AT, 1);
  1.1856 -    __ dmfc1(FSR, F12);
  1.1857 -
  1.1858 -    __ bne(FSR, AT, L);
  1.1859 -    __ delayed()->mtc1(R0, F12);
  1.1860 -
  1.1861 -    __ cvt_d_w(F12, F12);
  1.1862 -    __ c_ult_d(FSF, F12);
  1.1863 -    __ bc1f(L);
  1.1864 -    __ delayed()->daddiu(T9, R0, -1);
  1.1865 -
  1.1866 -    __ c_un_d(FSF, FSF);    //NaN?
  1.1867 -    __ subu(FSR, T9, AT);
  1.1868 -    __ movt(FSR, R0);
  1.1869 -
  1.1870 -	__ bind(L);
  1.1871 -      }
  1.1872 +    {
  1.1873 +      Label L;
  1.1874 +
  1.1875 +      __ trunc_l_d(F12, FSF);
  1.1876 +      __ daddiu(AT, R0, -1);
  1.1877 +      __ dsrl(AT, AT, 1);
  1.1878 +      __ dmfc1(FSR, F12);
  1.1879 +
  1.1880 +      __ bne(FSR, AT, L);
  1.1881 +      __ delayed()->mtc1(R0, F12);
  1.1882 +
  1.1883 +      __ cvt_d_w(F12, F12);
  1.1884 +      __ c_ult_d(FSF, F12);
  1.1885 +      __ bc1f(L);
  1.1886 +      __ delayed()->daddiu(T9, R0, -1);
  1.1887 +
  1.1888 +      __ c_un_d(FSF, FSF);    //NaN?
  1.1889 +      __ subu(FSR, T9, AT);
  1.1890 +      __ movt(FSR, R0);
  1.1891 +
  1.1892 +    __ bind(L);
  1.1893 +    }
  1.1894        break;
  1.1895      case Bytecodes::_d2f:
  1.1896        __ cvt_s_d(FSF, FSF);
  1.1897 @@ -1912,41 +1806,39 @@
  1.1898  }
  1.1899  
  1.1900  void TemplateTable::float_cmp(bool is_float, int unordered_result) {
  1.1901 -	Label less, done;
  1.1902 -
  1.1903 -	__ move(FSR, R0);
  1.1904 -
  1.1905 -	if (is_float) {
  1.1906 -		__ pop_ftos_to_esp();
  1.1907 -		__ lwc1(FTF, at_sp());
  1.1908 -		__ c_eq_s(FTF, FSF);
  1.1909 -		__ bc1t(done);
  1.1910 -		__ delayed()->daddi(SP, SP, 1 * wordSize);
  1.1911 -
  1.1912 -		if (unordered_result<0)
  1.1913 -			__ c_ult_s(FTF, FSF);
  1.1914 -		else
  1.1915 -			__ c_olt_s(FTF, FSF);
  1.1916 -	} else {
  1.1917 -		__ pop_dtos_to_esp();
  1.1918 -		__ ldc1(FTF, at_sp());
  1.1919 -		__ c_eq_d(FTF, FSF);
  1.1920 -		__ bc1t(done);
  1.1921 -		__ delayed()->daddi(SP, SP, 2 * wordSize);
  1.1922 -
  1.1923 -		if (unordered_result<0)
  1.1924 -			__ c_ult_d(FTF, FSF);
  1.1925 -		else
  1.1926 -			__ c_olt_d(FTF, FSF);
  1.1927 -	}
  1.1928 -	__ bc1t(less);
  1.1929 -	__ delayed()->nop();
  1.1930 -	__ move(FSR, 1);
  1.1931 -	__ b(done);
  1.1932 -	__ delayed()->nop();
  1.1933 -	__ bind(less);
  1.1934 -	__ move(FSR, -1);
  1.1935 -	__ bind(done);
  1.1936 +  Label less, done;
  1.1937 +
  1.1938 +  __ move(FSR, R0);
  1.1939 +
  1.1940 +  if (is_float) {
  1.1941 +    __ lwc1(FTF, at_sp());
  1.1942 +    __ c_eq_s(FTF, FSF);
  1.1943 +    __ bc1t(done);
  1.1944 +    __ delayed()->daddi(SP, SP, 1 * wordSize);
  1.1945 +
  1.1946 +    if (unordered_result<0)
  1.1947 +      __ c_ult_s(FTF, FSF);
  1.1948 +    else
  1.1949 +      __ c_olt_s(FTF, FSF);
  1.1950 +  } else {
  1.1951 +    __ ldc1(FTF, at_sp());
  1.1952 +    __ c_eq_d(FTF, FSF);
  1.1953 +    __ bc1t(done);
  1.1954 +    __ delayed()->daddi(SP, SP, 2 * wordSize);
  1.1955 +
  1.1956 +    if (unordered_result<0)
  1.1957 +      __ c_ult_d(FTF, FSF);
  1.1958 +    else
  1.1959 +      __ c_olt_d(FTF, FSF);
  1.1960 +  }
  1.1961 +  __ bc1t(less);
  1.1962 +  __ delayed()->nop();
  1.1963 +  __ move(FSR, 1);
  1.1964 +  __ b(done);
  1.1965 +  __ delayed()->nop();
  1.1966 +  __ bind(less);
  1.1967 +  __ move(FSR, -1);
  1.1968 +  __ bind(done);
  1.1969  }
  1.1970  
  1.1971  
  1.1972 @@ -1958,14 +1850,13 @@
  1.1973  // Rnext : next bytecode, this is required by dispatch_base
  1.1974  void TemplateTable::branch(bool is_jsr, bool is_wide) {
  1.1975    __ get_method(T3);
  1.1976 -  __ profile_taken_branch(A7, T2);		// only C2 meaningful 
  1.1977 +  __ profile_taken_branch(A7, T2);    // only C2 meaningful
  1.1978  
  1.1979  #ifndef CORE
  1.1980 -  const ByteSize be_offset = MethodCounters::backedge_counter_offset() 
  1.1981 -    + InvocationCounter::counter_offset();
  1.1982 -  const ByteSize inv_offset = MethodCounters::invocation_counter_offset() 
  1.1983 -    + InvocationCounter::counter_offset();
  1.1984 -  const int method_offset = frame::interpreter_frame_method_offset * wordSize;
  1.1985 +  const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
  1.1986 +                             InvocationCounter::counter_offset();
  1.1987 +  const ByteSize inv_offset = MethodCounters::invocation_counter_offset() +
  1.1988 +                              InvocationCounter::counter_offset();
  1.1989  #endif // CORE
  1.1990  
  1.1991    // Load up T4 with the branch displacement
  1.1992 @@ -1978,8 +1869,8 @@
  1.1993    }
  1.1994  
  1.1995    // Handle all the JSR stuff here, then exit.
  1.1996 -  // It's much shorter and cleaner than intermingling with the
  1.1997 -  // non-JSR normal-branch stuff occuring below.
  1.1998 +  // It's much shorter and cleaner than intermingling with the non-JSR
  1.1999 +  // normal-branch stuff occuring below.
  1.2000    if (is_jsr) {
  1.2001      // Pre-load the next target bytecode into Rnext
  1.2002      __ dadd(AT, BCP, A7);
  1.2003 @@ -1992,7 +1883,6 @@
  1.2004      // Adjust the bcp in BCP by the displacement in A7
  1.2005      __ dadd(BCP, BCP, A7);
  1.2006      // jsr returns atos that is not an oop
  1.2007 -    // __ dispatch_only_noverify(atos);
  1.2008      // Push return address
  1.2009      __ push_i(FSR);
  1.2010      // jsr returns vtos
  1.2011 @@ -2024,12 +1914,12 @@
  1.2012      // T4: target offset
  1.2013      // BCP: target bcp
  1.2014      // LVP: locals pointer
  1.2015 -    __ bgtz(A7, dispatch);	// check if forward or backward branch
  1.2016 +    __ bgtz(A7, dispatch);  // check if forward or backward branch
  1.2017      __ delayed()->nop();
  1.2018  
  1.2019      // check if MethodCounters exists
  1.2020      Label has_counters;
  1.2021 -    __ ld(AT, T3, in_bytes(Method::method_counters_offset()));  // use AT as MDO, TEMP 
  1.2022 +    __ ld(AT, T3, in_bytes(Method::method_counters_offset()));  // use AT as MDO, TEMP
  1.2023      __ bne(AT, R0, has_counters);
  1.2024      __ nop();
  1.2025      __ push(T3);
  1.2026 @@ -2043,7 +1933,7 @@
  1.2027      __ nop();
  1.2028      __ bind(has_counters);
  1.2029  
  1.2030 -    // increment back edge counter 
  1.2031 +    // increment back edge counter
  1.2032      __ ld(T1, T3, in_bytes(Method::method_counters_offset()));
  1.2033      __ lw(T0, T1, in_bytes(be_offset));
  1.2034      __ increment(T0, InvocationCounter::count_increment);
  1.2035 @@ -2052,9 +1942,6 @@
  1.2036      // load invocation counter
  1.2037      __ lw(T1, T1, in_bytes(inv_offset));
  1.2038      // buffer bit added, mask no needed
  1.2039 -    // by yjl 10/24/2005
  1.2040 -    //__ move(AT, InvocationCounter::count_mask_value);
  1.2041 -    //__ andr(T1, T1, AT);
  1.2042  
  1.2043      // dadd backedge counter & invocation counter
  1.2044      __ dadd(T1, T1, T0);
  1.2045 @@ -2074,38 +1961,34 @@
  1.2046        __ test_method_data_pointer(T1, profile_method);
  1.2047  
  1.2048        if (UseOnStackReplacement) {
  1.2049 -	// check for overflow against ebx which is the MDO taken count
  1.2050 -	//__ lui(AT, Assembler::split_high(int(&InvocationCounter::InterpreterBackwardBranchLimit)));
  1.2051 -	//__ lw(AT, AT, Assembler::split_low(int(&InvocationCounter::InterpreterBackwardBranchLimit)));
  1.2052 -	__ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
  1.2053 -	__ lw(AT, AT, 0);
  1.2054 -	// the value Rnext Is get from the beginning profile_taken_branch
  1.2055 -	__ slt(AT, T2, AT);
  1.2056 -	__ bne(AT, R0, dispatch);
  1.2057 -	__ delayed()->nop();
  1.2058 -
  1.2059 -	// When ProfileInterpreter is on, the backedge_count comes 
  1.2060 -	// from the methodDataOop, which value does not get reset on 
  1.2061 -	// the call to  frequency_counter_overflow().  
  1.2062 -	// To avoid excessive calls to the overflow routine while 
  1.2063 -	// the method is being compiled, dadd a second test to make 
  1.2064 -	// sure the overflow function is called only once every 
  1.2065 -	// overflow_frequency.
  1.2066 -	const int overflow_frequency = 1024;
  1.2067 -	__ andi(AT, T2, overflow_frequency-1);
  1.2068 -	__ beq(AT, R0, backedge_counter_overflow);
  1.2069 -	__ delayed()->nop();
  1.2070 +        // check for overflow against ebx which is the MDO taken count
  1.2071 +        __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
  1.2072 +        __ lw(AT, AT, 0);
  1.2073 +        // the value Rnext Is get from the beginning profile_taken_branch
  1.2074 +        __ slt(AT, T2, AT);
  1.2075 +        __ bne(AT, R0, dispatch);
  1.2076 +        __ delayed()->nop();
  1.2077 +
  1.2078 +        // When ProfileInterpreter is on, the backedge_count comes
  1.2079 +        // from the methodDataOop, which value does not get reset on
  1.2080 +        // the call to  frequency_counter_overflow().
  1.2081 +        // To avoid excessive calls to the overflow routine while
  1.2082 +        // the method is being compiled, dadd a second test to make
  1.2083 +        // sure the overflow function is called only once every
  1.2084 +        // overflow_frequency.
  1.2085 +        const int overflow_frequency = 1024;
  1.2086 +        __ andi(AT, T2, overflow_frequency-1);
  1.2087 +        __ beq(AT, R0, backedge_counter_overflow);
  1.2088 +        __ delayed()->nop();
  1.2089        }
  1.2090      } else {
  1.2091        if (UseOnStackReplacement) {
  1.2092 -	// check for overflow against eax, which is the sum of the counters
  1.2093 -	//__ lui(AT, Assembler::split_high(int(&InvocationCounter::InterpreterBackwardBranchLimit)));
  1.2094 -	//__ lw(AT, AT, Assembler::split_low(int(&InvocationCounter::InterpreterBackwardBranchLimit)));
  1.2095 -	__ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
  1.2096 -	__ lw(AT, AT, 0);
  1.2097 -	__ slt(AT, T1, AT);
  1.2098 -	__ beq(AT, R0, backedge_counter_overflow);
  1.2099 -	__ delayed()->nop();
  1.2100 +        // check for overflow against eax, which is the sum of the counters
  1.2101 +        __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
  1.2102 +        __ lw(AT, AT, 0);
  1.2103 +        __ slt(AT, T1, AT);
  1.2104 +        __ beq(AT, R0, backedge_counter_overflow);
  1.2105 +        __ delayed()->nop();
  1.2106        }
  1.2107      }
  1.2108      __ bind(dispatch);
  1.2109 @@ -2126,18 +2009,7 @@
  1.2110        __ bind(profile_method);
  1.2111        __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
  1.2112        __ lbu(Rnext, BCP, 0);
  1.2113 -     
  1.2114        __ set_method_data_pointer_for_bcp();
  1.2115 -/*
  1.2116 -      __ ld(T3, FP, method_offset);
  1.2117 -      __ lw(T3, T3, in_bytes(Method::method_data_offset()));
  1.2118 -      __ sw(T3, FP, frame::interpreter_frame_mdx_offset * wordSize);
  1.2119 -      __ test_method_data_pointer(T3, dispatch);
  1.2120 -      // offset non-null mdp by MDO::data_offset() + IR::profile_method()
  1.2121 -      __ daddi(T3, T3, in_bytes(MethodData::data_offset()));
  1.2122 -      __ dadd(T3, T3, T1);
  1.2123 -      __ sw(T3, FP, frame::interpreter_frame_mdx_offset * wordSize);
  1.2124 -*/
  1.2125        __ b(dispatch);
  1.2126        __ delayed()->nop();
  1.2127      }
  1.2128 @@ -2145,9 +2017,9 @@
  1.2129      if (UseOnStackReplacement) {
  1.2130        // invocation counter overflow
  1.2131        __ bind(backedge_counter_overflow);
  1.2132 -      __ sub(A7, BCP, A7);	// branch bcp
  1.2133 -      call_VM(NOREG, CAST_FROM_FN_PTR(address, 
  1.2134 -	    InterpreterRuntime::frequency_counter_overflow), A7);
  1.2135 +      __ sub(A7, BCP, A7);  // branch bcp
  1.2136 +      call_VM(NOREG, CAST_FROM_FN_PTR(address,
  1.2137 +      InterpreterRuntime::frequency_counter_overflow), A7);
  1.2138        __ lbu(Rnext, BCP, 0);
  1.2139  
  1.2140        // V0: osr nmethod (osr ok) or NULL (osr not possible)
  1.2141 @@ -2169,97 +2041,39 @@
  1.2142        //edx  Rnext: target bytecode
  1.2143        //edi  LVP: locals pointer
  1.2144        //esi  BCP: bcp
  1.2145 -      __ move(BCP, V0); 
  1.2146 +      __ move(BCP, V0);
  1.2147        // const Register thread = ecx;
  1.2148        const Register thread = TREG;
  1.2149  #ifndef OPT_THREAD
  1.2150        __ get_thread(thread);
  1.2151  #endif
  1.2152 -      call_VM(noreg, CAST_FROM_FN_PTR(address, 
  1.2153 -	    SharedRuntime::OSR_migration_begin));
  1.2154 +      call_VM(noreg, CAST_FROM_FN_PTR(address,
  1.2155 +      SharedRuntime::OSR_migration_begin));
  1.2156        // eax is OSR buffer, move it to expected parameter location
  1.2157 -      //refer to osrBufferPointer in c1_LIRAssembler_mips.cpp	
  1.2158 +      //refer to osrBufferPointer in c1_LIRAssembler_mips.cpp
  1.2159        __ move(T0, V0);
  1.2160  
  1.2161        // pop the interpreter frame
  1.2162 -      //  __ movl(edx, Address(ebp, frame::interpreter_frame_sender_sp_offset 
  1.2163 -      //  * wordSize)); // get sender sp
  1.2164 -      __ ld(A7, Address(FP, 
  1.2165 -	    frame::interpreter_frame_sender_sp_offset * wordSize)); 
  1.2166 -      //FIXME, shall we keep the return address on the stack?	
  1.2167 +      __ ld(A7, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
  1.2168 +      //FIXME, shall we keep the return address on the stack?
  1.2169        __ leave();                                // remove frame anchor
  1.2170 -      // __ popl(edi);                         // get return address
  1.2171 -      //__ daddi(SP, SP, wordSize);               // get return address
  1.2172 -      //   __ pop(LVP);	
  1.2173 -      __ move(LVP, RA);	
  1.2174 -      // __ movl(esp, edx);                         // set sp to sender sp
  1.2175 +      __ move(LVP, RA);
  1.2176        __ move(SP, A7);
  1.2177  
  1.2178 -      Label skip;
  1.2179 -      Label chkint;
  1.2180 -
  1.2181 -      // The interpreter frame we have removed may be returning to
  1.2182 -      // either the callstub or the interpreter. Since we will
  1.2183 -      // now be returning from a compiled (OSR) nmethod we must
  1.2184 -      // adjust the return to the return were it can handler compiled
  1.2185 -      // results and clean the fpu stack. This is very similar to
  1.2186 -      // what a i2c adapter must do.
  1.2187 -
  1.2188 -      // Are we returning to the call stub?
  1.2189 -#if 0	
  1.2190 -      // __ cmpl(edi, (int)StubRoutines::_call_stub_return_address);
  1.2191 -      __ daddi(AT, LVP, -(int)StubRoutines::_call_stub_return_address); 
  1.2192 -      //  __ jcc(Assembler::notEqual, chkint);
  1.2193 -      __ bne(AT, R0, chkint);
  1.2194 -      __ delayed()->nop();      
  1.2195 -      // yes adjust to the specialized call stub  return.
  1.2196 -      // assert(StubRoutines::i486::get_call_stub_compiled_return() != NULL,
  1.2197 -      // "must be set");
  1.2198 -      assert(StubRoutines::gs2::get_call_stub_compiled_return() != NULL, 
  1.2199 -	  "must be set");
  1.2200 -      // __ movl(edi, (intptr_t) StubRoutines::i486::get_call_stub_compiled_return());
  1.2201 -      __ move(LVP, (intptr_t) StubRoutines::gs2::get_call_stub_compiled_return()); 
  1.2202 -      //  __ jmp(skip);
  1.2203 -      __ b(skip);
  1.2204 +      __ move(AT, -(StackAlignmentInBytes));
  1.2205 +      __ andr(SP , SP , AT);
  1.2206 +
  1.2207 +      // push the (possibly adjusted) return address
  1.2208 +      //refer to osr_entry in c1_LIRAssembler_mips.cpp
  1.2209 +      __ ld(AT, BCP, nmethod::osr_entry_point_offset());
  1.2210 +      __ jr(AT);
  1.2211        __ delayed()->nop();
  1.2212 -      __ bind(chkint);
  1.2213 -
  1.2214 -      // Are we returning to the interpreter? Look for sentinel
  1.2215 -
  1.2216 -      //__ cmpl(Address(edi, -8), Interpreter::return_sentinel);
  1.2217 -      __ lw(AT, LVP , -8); 
  1.2218 -      __ daddi(AT, AT, -Interpreter::return_sentinel); 
  1.2219 -      //__ jcc(Assembler::notEqual, skip);
  1.2220 -      __ bne(AT, R0, skip);
  1.2221 -      __ delayed()->nop(); 
  1.2222 -      // Adjust to compiled return back to interpreter
  1.2223 -
  1.2224 -      // __ movl(edi, Address(edi, -4));
  1.2225 -      __ lw(LVP, LVP, -4); 
  1.2226 -
  1.2227 -      __ bind(skip);
  1.2228 -#endif
  1.2229 -      // Align stack pointer for compiled code (note that caller is
  1.2230 -      // responsible for undoing this fixup by remembering the old SP
  1.2231 -      // in an ebp-relative location)
  1.2232 -      //  __ andl(esp, -(StackAlignmentInBytes));
  1.2233 -      __ move(AT, -(StackAlignmentInBytes));	
  1.2234 -      __ andr(SP , SP , AT);
  1.2235 -      // push the (possibly adjusted) return address
  1.2236 -      //  __ pushl(edi);
  1.2237 -      //__ push(LVP);
  1.2238 -      //			__ move(RA, LVP);	
  1.2239 -      // and begin the OSR nmethod
  1.2240 -      //  __ jmp(Address(esi, nmethod::osr_entry_point_offset()));
  1.2241 -      //refer to osr_entry in c1_LIRAssembler_mips.cpp	
  1.2242 -      __ ld(AT, BCP, nmethod::osr_entry_point_offset()); 
  1.2243 -      __ jr(AT); 
  1.2244 -      __ delayed()->nop(); 
  1.2245      }
  1.2246    }
  1.2247  #endif // not CORE
  1.2248  }
  1.2249  
  1.2250 +
  1.2251  void TemplateTable::if_0cmp(Condition cc) {
  1.2252    transition(itos, vtos);
  1.2253    // assume branch is more often taken than not (loops use backward branches)
  1.2254 @@ -2292,13 +2106,12 @@
  1.2255    __ profile_not_taken_branch(FSR);
  1.2256  }
  1.2257  
  1.2258 -
  1.2259  void TemplateTable::if_icmp(Condition cc) {
  1.2260    transition(itos, vtos);
  1.2261    // assume branch is more often taken than not (loops use backward branches)
  1.2262    Label not_taken;
  1.2263  
  1.2264 -  __ pop_i(SSR);	
  1.2265 +  __ pop_i(SSR);
  1.2266    switch(cc) {
  1.2267      case not_equal:
  1.2268        __ beq(SSR, FSR, not_taken);
  1.2269 @@ -2326,12 +2139,10 @@
  1.2270    __ delayed()->nop();
  1.2271  
  1.2272    branch(false, false);
  1.2273 -
  1.2274    __ bind(not_taken);
  1.2275    __ profile_not_taken_branch(FSR);
  1.2276  }
  1.2277  
  1.2278 -
  1.2279  void TemplateTable::if_nullcmp(Condition cc) {
  1.2280    transition(atos, vtos);
  1.2281    // assume branch is more often taken than not (loops use backward branches)
  1.2282 @@ -2349,71 +2160,69 @@
  1.2283    __ delayed()->nop();
  1.2284  
  1.2285    branch(false, false);
  1.2286 -
  1.2287    __ bind(not_taken);
  1.2288    __ profile_not_taken_branch(FSR);
  1.2289  }
  1.2290  
  1.2291  
  1.2292  void TemplateTable::if_acmp(Condition cc) {
  1.2293 -	transition(atos, vtos);
  1.2294 -	// assume branch is more often taken than not (loops use backward branches)
  1.2295 -	Label not_taken;
  1.2296 -	//	__ lw(SSR, SP, 0);
  1.2297 -	__ pop_ptr(SSR);
  1.2298 -	switch(cc) {
  1.2299 -		case not_equal:
  1.2300 -			__ beq(SSR, FSR, not_taken);
  1.2301 -			break;
  1.2302 -		case equal:
  1.2303 -			__ bne(SSR, FSR, not_taken);
  1.2304 -			break;
  1.2305 -		default:
  1.2306 -			ShouldNotReachHere();
  1.2307 -	}
  1.2308 -	//	__ delayed()->daddi(SP, SP, 4);
  1.2309 -	__ delayed()->nop();
  1.2310 -
  1.2311 -	branch(false, false);
  1.2312 -
  1.2313 -	__ bind(not_taken);
  1.2314 -	__ profile_not_taken_branch(FSR);
  1.2315 +  transition(atos, vtos);
  1.2316 +  // assume branch is more often taken than not (loops use backward branches)
  1.2317 +  Label not_taken;
  1.2318 +  //  __ lw(SSR, SP, 0);
  1.2319 +  __ pop_ptr(SSR);
  1.2320 +  switch(cc) {
  1.2321 +    case not_equal:
  1.2322 +      __ beq(SSR, FSR, not_taken);
  1.2323 +      break;
  1.2324 +    case equal:
  1.2325 +      __ bne(SSR, FSR, not_taken);
  1.2326 +      break;
  1.2327 +    default:
  1.2328 +      ShouldNotReachHere();
  1.2329 +  }
  1.2330 +  __ delayed()->nop();
  1.2331 +
  1.2332 +  branch(false, false);
  1.2333 +
  1.2334 +  __ bind(not_taken);
  1.2335 +  __ profile_not_taken_branch(FSR);
  1.2336  }
  1.2337  
  1.2338  // used registers : T1, T2, T3
  1.2339  // T1 : method
  1.2340  // T2 : returb bci
  1.2341  void TemplateTable::ret() {
  1.2342 -	transition(vtos, vtos);
  1.2343 -
  1.2344 -	locals_index(T2);
  1.2345 -	__ ld(T2, T2, 0);
  1.2346 -	__ profile_ret(T2, T3);
  1.2347 -
  1.2348 -	__ get_method(T1);
  1.2349 -	__ ld(BCP, T1, in_bytes(Method::const_offset()));
  1.2350 -	__ dadd(BCP, BCP, T2);
  1.2351 -	__ daddi(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
  1.2352 -
  1.2353 -	__ dispatch_next(vtos);
  1.2354 +  transition(vtos, vtos);
  1.2355 +
  1.2356 +  locals_index(T2);
  1.2357 +  __ ld(T2, T2, 0);
  1.2358 +  __ profile_ret(T2, T3);
  1.2359 +
  1.2360 +  __ get_method(T1);
  1.2361 +  __ ld(BCP, T1, in_bytes(Method::const_offset()));
  1.2362 +  __ dadd(BCP, BCP, T2);
  1.2363 +  __ daddi(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
  1.2364 +
  1.2365 +  __ dispatch_next(vtos);
  1.2366  }
  1.2367  
  1.2368  // used registers : T1, T2, T3
  1.2369  // T1 : method
  1.2370  // T2 : returb bci
  1.2371  void TemplateTable::wide_ret() {
  1.2372 -	transition(vtos, vtos);
  1.2373 -
  1.2374 -	locals_index_wide(T2);
  1.2375 -	__ ld(T2, T2, 0);                   // get return bci, compute return bcp
  1.2376 -	__ profile_ret(T2, T3);
  1.2377 -
  1.2378 -	__ get_method(T1);
  1.2379 -	__ ld(BCP, T1, in_bytes(Method::const_offset()));
  1.2380 -	__ dadd(BCP, BCP, T2);
  1.2381 -	__ daddi(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
  1.2382 -
  1.2383 -	__ dispatch_next(vtos);
  1.2384 +  transition(vtos, vtos);
  1.2385 +
  1.2386 +  locals_index_wide(T2);
  1.2387 +  __ ld(T2, T2, 0);                   // get return bci, compute return bcp
  1.2388 +  __ profile_ret(T2, T3);
  1.2389 +
  1.2390 +  __ get_method(T1);
  1.2391 +  __ ld(BCP, T1, in_bytes(Method::const_offset()));
  1.2392 +  __ dadd(BCP, BCP, T2);
  1.2393 +  __ daddi(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
  1.2394 +
  1.2395 +  __ dispatch_next(vtos);
  1.2396  }
  1.2397  
  1.2398  // used register T2, T3, A7, Rnext
  1.2399 @@ -2422,53 +2231,53 @@
  1.2400  // A7 : high
  1.2401  // Rnext : dest bytecode, required by dispatch_base
  1.2402  void TemplateTable::tableswitch() {
  1.2403 -	Label default_case, continue_execution;
  1.2404 -	transition(itos, vtos);
  1.2405 -
  1.2406 -	// align BCP
  1.2407 -	__ daddi(T2, BCP, BytesPerInt);
  1.2408 -	__ li(AT, -BytesPerInt);
  1.2409 -	__ andr(T2, T2, AT);
  1.2410 -
  1.2411 -	// load lo & hi
  1.2412 -	__ lw(T3, T2, 1 * BytesPerInt);
  1.2413 -	__ swap(T3);
  1.2414 -	__ lw(A7, T2, 2 * BytesPerInt);
  1.2415 -	__ swap(A7);
  1.2416 -
  1.2417 -	// check against lo & hi
  1.2418 -	__ slt(AT, FSR, T3);
  1.2419 -	__ bne(AT, R0, default_case);
  1.2420 -	__ delayed()->nop();
  1.2421 -
  1.2422 -	__ slt(AT, A7, FSR);
  1.2423 -	__ bne(AT, R0, default_case);
  1.2424 -	__ delayed()->nop();
  1.2425 -
  1.2426 -	// lookup dispatch offset, in A7 big endian
  1.2427 -	__ dsub(FSR, FSR, T3);
  1.2428 -	__ dsll(AT, FSR, Address::times_4);
  1.2429 -	__ dadd(AT, T2, AT);
  1.2430 -	__ lw(A7, AT, 3 * BytesPerInt);
  1.2431 -	__ profile_switch_case(FSR, T9, T3);
  1.2432 -
  1.2433 -	__ bind(continue_execution);
  1.2434 -	__ swap(A7);
  1.2435 -	__ dadd(BCP, BCP, A7);
  1.2436 -	__ lbu(Rnext, BCP, 0);
  1.2437 -	__ dispatch_only(vtos);
  1.2438 -
  1.2439 -	// handle default
  1.2440 -	__ bind(default_case);
  1.2441 -	__ profile_switch_default(FSR);
  1.2442 -	__ lw(A7, T2, 0);
  1.2443 -	__ b(continue_execution);
  1.2444 -	__ delayed()->nop();
  1.2445 +  Label default_case, continue_execution;
  1.2446 +  transition(itos, vtos);
  1.2447 +
  1.2448 +  // align BCP
  1.2449 +  __ daddi(T2, BCP, BytesPerInt);
  1.2450 +  __ li(AT, -BytesPerInt);
  1.2451 +  __ andr(T2, T2, AT);
  1.2452 +
  1.2453 +  // load lo & hi
  1.2454 +  __ lw(T3, T2, 1 * BytesPerInt);
  1.2455 +  __ swap(T3);
  1.2456 +  __ lw(A7, T2, 2 * BytesPerInt);
  1.2457 +  __ swap(A7);
  1.2458 +
  1.2459 +  // check against lo & hi
  1.2460 +  __ slt(AT, FSR, T3);
  1.2461 +  __ bne(AT, R0, default_case);
  1.2462 +  __ delayed()->nop();
  1.2463 +
  1.2464 +  __ slt(AT, A7, FSR);
  1.2465 +  __ bne(AT, R0, default_case);
  1.2466 +  __ delayed()->nop();
  1.2467 +
  1.2468 +  // lookup dispatch offset, in A7 big endian
  1.2469 +  __ dsub(FSR, FSR, T3);
  1.2470 +  __ dsll(AT, FSR, Address::times_4);
  1.2471 +  __ dadd(AT, T2, AT);
  1.2472 +  __ lw(A7, AT, 3 * BytesPerInt);
  1.2473 +  __ profile_switch_case(FSR, T9, T3);
  1.2474 +
  1.2475 +  __ bind(continue_execution);
  1.2476 +  __ swap(A7);
  1.2477 +  __ dadd(BCP, BCP, A7);
  1.2478 +  __ lbu(Rnext, BCP, 0);
  1.2479 +  __ dispatch_only(vtos);
  1.2480 +
  1.2481 +  // handle default
  1.2482 +  __ bind(default_case);
  1.2483 +  __ profile_switch_default(FSR);
  1.2484 +  __ lw(A7, T2, 0);
  1.2485 +  __ b(continue_execution);
  1.2486 +  __ delayed()->nop();
  1.2487  }
  1.2488  
  1.2489  void TemplateTable::lookupswitch() {
  1.2490 -	transition(itos, itos);
  1.2491 -	__ stop("lookupswitch bytecode should have been rewritten");
  1.2492 +  transition(itos, itos);
  1.2493 +  __ stop("lookupswitch bytecode should have been rewritten");
  1.2494  }
  1.2495  
  1.2496  // used registers : T2, T3, A7, Rnext
  1.2497 @@ -2480,7 +2289,7 @@
  1.2498  // see Rewriter::rewrite_method for more information
  1.2499  void TemplateTable::fast_linearswitch() {
  1.2500    transition(itos, vtos);
  1.2501 -  Label loop_entry, loop, found, continue_execution;  
  1.2502 +  Label loop_entry, loop, found, continue_execution;
  1.2503  
  1.2504    // swap eax so we can avoid swapping the table entries
  1.2505    __ swap(FSR);
  1.2506 @@ -2525,7 +2334,7 @@
  1.2507    __ profile_switch_case(T3, FSR, T2);
  1.2508  
  1.2509    // continue execution
  1.2510 -  __ bind(continue_execution);  
  1.2511 +  __ bind(continue_execution);
  1.2512    __ swap(A7);
  1.2513    __ dadd(BCP, BCP, A7);
  1.2514    __ lbu(Rnext, BCP, 0);
  1.2515 @@ -2579,7 +2388,7 @@
  1.2516    // initialize i & j
  1.2517    __ move(i, R0);
  1.2518    __ lw(j, array, - 1 * BytesPerInt);
  1.2519 -  // Convert j into native byteordering  
  1.2520 +  // Convert j into native byteordering
  1.2521    __ swap(j);
  1.2522  
  1.2523    // and start
  1.2524 @@ -2588,7 +2397,7 @@
  1.2525    __ delayed()->nop();
  1.2526  
  1.2527    // binary search loop
  1.2528 -  { 
  1.2529 +  {
  1.2530      Label loop;
  1.2531      __ bind(loop);
  1.2532      // int h = (i + j) >> 1;
  1.2533 @@ -2609,7 +2418,7 @@
  1.2534        Label set_i, end_of_if;
  1.2535        __ slt(AT, key, temp);
  1.2536        __ beq(AT, R0, set_i);
  1.2537 -      __ delayed()->nop(); 
  1.2538 +      __ delayed()->nop();
  1.2539  
  1.2540        __ b(end_of_if);
  1.2541        __ delayed(); __ move(j, h);
  1.2542 @@ -2660,20 +2469,21 @@
  1.2543  
  1.2544  void TemplateTable::_return(TosState state) {
  1.2545    transition(state, state);
  1.2546 -  assert(_desc->calls_vm(), "inconsistent calls_vm information"); // call in remove_activation
  1.2547 +  assert(_desc->calls_vm(),
  1.2548 +      "inconsistent calls_vm information"); // call in remove_activation
  1.2549 +
  1.2550    if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
  1.2551      assert(state == vtos, "only valid state");
  1.2552      __ ld(T1, aaddress(0));
  1.2553 -    //__ ld(LVP, T1, oopDesc::klass_offset_in_bytes());
  1.2554      __ load_klass(LVP, T1);
  1.2555      __ lw(LVP, LVP, in_bytes(Klass::access_flags_offset()));
  1.2556 -    __ move(AT, JVM_ACC_HAS_FINALIZER); 
  1.2557 +    __ move(AT, JVM_ACC_HAS_FINALIZER);
  1.2558      __ andr(AT, AT, LVP);//by_css
  1.2559      Label skip_register_finalizer;
  1.2560      __ beq(AT, R0, skip_register_finalizer);
  1.2561 -    __ delayed()->nop(); 
  1.2562 -    __ call_VM(noreg, CAST_FROM_FN_PTR(address, 
  1.2563 -	  InterpreterRuntime::register_finalizer), T1);
  1.2564 +    __ delayed()->nop();
  1.2565 +    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
  1.2566 +    InterpreterRuntime::register_finalizer), T1);
  1.2567      __ bind(skip_register_finalizer);
  1.2568    }
  1.2569    __ remove_activation(state, T9);
  1.2570 @@ -2718,25 +2528,25 @@
  1.2571    //if (os::is_MP()) { // Not needed on single CPU
  1.2572    //  __ membar(order_constraint);
  1.2573    //}
  1.2574 -	if( !os::is_MP() ) return;	// Not needed on single CPU
  1.2575 -	__ sync();
  1.2576 +  if( !os::is_MP() ) return;  // Not needed on single CPU
  1.2577 +  __ sync();
  1.2578  }
  1.2579  
  1.2580  // we dont shift left 2 bits in get_cache_and_index_at_bcp
  1.2581 -// for we always need shift the index we use it. the ConstantPoolCacheEntry 
  1.2582 -// is 16-byte long, index is the index in 
  1.2583 -// ConstantPoolCache, so cache + base_offset() + index * 16 is 
  1.2584 +// for we always need shift the index we use it. the ConstantPoolCacheEntry
  1.2585 +// is 16-byte long, index is the index in
  1.2586 +// ConstantPoolCache, so cache + base_offset() + index * 16 is
  1.2587  // the corresponding ConstantPoolCacheEntry
  1.2588  // used registers : T2
  1.2589  // NOTE : the returned index need also shift left 4 to get the address!
  1.2590  void TemplateTable::resolve_cache_and_index(int byte_no,
  1.2591                                              Register Rcache,
  1.2592 -					    Register index,
  1.2593 +                                            Register index,
  1.2594                                              size_t index_size) {
  1.2595    assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
  1.2596    const Register temp = A1;
  1.2597    assert_different_registers(Rcache, index);
  1.2598 -  const int shift_count = (1 + byte_no)*BitsPerByte;
  1.2599 +
  1.2600    Label resolved;
  1.2601    __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
  1.2602    // is resolved?
  1.2603 @@ -2750,14 +2560,14 @@
  1.2604      case Bytecodes::_getstatic      : // fall through
  1.2605      case Bytecodes::_putstatic      : // fall through
  1.2606      case Bytecodes::_getfield       : // fall through
  1.2607 -    case Bytecodes::_putfield       : 
  1.2608 -      entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put); 
  1.2609 +    case Bytecodes::_putfield       :
  1.2610 +      entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put);
  1.2611        break;
  1.2612      case Bytecodes::_invokevirtual  : // fall through
  1.2613      case Bytecodes::_invokespecial  : // fall through
  1.2614      case Bytecodes::_invokestatic   : // fall through
  1.2615 -    case Bytecodes::_invokeinterface: 
  1.2616 -      entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);  
  1.2617 +    case Bytecodes::_invokeinterface:
  1.2618 +      entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);
  1.2619        break;
  1.2620      case Bytecodes::_invokehandle:
  1.2621        entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokehandle);
  1.2622 @@ -2765,8 +2575,9 @@
  1.2623      case Bytecodes::_invokedynamic:
  1.2624        entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic);
  1.2625        break;
  1.2626 -    default                      		: 
  1.2627 +    default                          :
  1.2628        fatal(err_msg("unexpected bytecode: %s", Bytecodes::name(bytecode())));
  1.2629 +      break;
  1.2630    }
  1.2631  
  1.2632    __ move(temp, i);
  1.2633 @@ -2785,21 +2596,22 @@
  1.2634                                                Register flags,
  1.2635                                                bool is_static = false) {
  1.2636    assert_different_registers(cache, index, flags, off);
  1.2637 +
  1.2638    ByteSize cp_base_offset = ConstantPoolCache::base_offset();
  1.2639    // Field offset
  1.2640    __ dsll(AT, index, Address::times_ptr);
  1.2641    __ dadd(AT, cache, AT);
  1.2642    __ ld(off, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset()));
  1.2643 -  // Flags    
  1.2644 +  // Flags
  1.2645    __ ld(flags, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset()));
  1.2646  
  1.2647 -  // klass     overwrite register
  1.2648 +  // klass overwrite register
  1.2649    if (is_static) {
  1.2650 -    __ ld(obj, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset())); 
  1.2651 +    __ ld(obj, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset()));
  1.2652      const int mirror_offset = in_bytes(Klass::java_mirror_offset());
  1.2653      __ ld(obj, Address(obj, mirror_offset));
  1.2654  
  1.2655 -    __ verify_oop(obj);	
  1.2656 +    __ verify_oop(obj);
  1.2657    }
  1.2658  }
  1.2659  
  1.2660 @@ -2821,35 +2633,32 @@
  1.2661    assert(is_invokevirtual == (byte_no == f2_byte), "is invokevirtual flag redundant");
  1.2662    // determine constant pool cache field offsets
  1.2663    const int method_offset = in_bytes(
  1.2664 -      ConstantPoolCache::base_offset() +
  1.2665 +    ConstantPoolCache::base_offset() +
  1.2666        ((byte_no == f2_byte)
  1.2667         ? ConstantPoolCacheEntry::f2_offset()
  1.2668 -       : ConstantPoolCacheEntry::f1_offset()
  1.2669 -      )
  1.2670 -      );
  1.2671 +       : ConstantPoolCacheEntry::f1_offset()));
  1.2672    const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
  1.2673 -      ConstantPoolCacheEntry::flags_offset());
  1.2674 +                                    ConstantPoolCacheEntry::flags_offset());
  1.2675    // access constant pool cache fields
  1.2676    const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
  1.2677 -      ConstantPoolCacheEntry::f2_offset());
  1.2678 +                                    ConstantPoolCacheEntry::f2_offset());
  1.2679 +
  1.2680    size_t index_size = (is_invokedynamic ? sizeof(u4): sizeof(u2));
  1.2681    resolve_cache_and_index(byte_no, cache, index, index_size);
  1.2682  
  1.2683    //assert(wordSize == 8, "adjust code below");
  1.2684 -  // note we shift 4 not 2, for we get is the true inde 
  1.2685 +  // note we shift 4 not 2, for we get is the true inde
  1.2686    // of ConstantPoolCacheEntry, not the shifted 2-bit index as x86 version
  1.2687    __ dsll(AT, index, Address::times_ptr);
  1.2688    __ dadd(AT, cache, AT);
  1.2689    __ ld(method, AT, method_offset);
  1.2690  
  1.2691 -
  1.2692    if (itable_index != NOREG) {
  1.2693      __ ld(itable_index, AT, index_offset);
  1.2694    }
  1.2695    __ ld(flags, AT, flags_offset);
  1.2696  }
  1.2697  
  1.2698 -
  1.2699  // The registers cache and index expected to be set before call.
  1.2700  // Correct values of the cache and index registers are preserved.
  1.2701  void TemplateTable::jvmti_post_field_access(Register cache, Register index,
  1.2702 @@ -2857,35 +2666,34 @@
  1.2703    // do the JVMTI work here to avoid disturbing the register state below
  1.2704    // We use c_rarg registers here because we want to use the register used in
  1.2705    // the call to the VM
  1.2706 -	if (JvmtiExport::can_post_field_access()) {
  1.2707 -		// Check to see if a field access watch has been set before we take
  1.2708 -		// the time to call into the VM.
  1.2709 -		Label L1;
  1.2710 -		assert_different_registers(cache, index, FSR);
  1.2711 -		__ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr());
  1.2712 -		__ lw(FSR, AT, 0);
  1.2713 -		__ beq(FSR, R0, L1);
  1.2714 -		__ delayed()->nop();
  1.2715 -
  1.2716 -		// We rely on the bytecode being resolved and the cpCache entry filled in.
  1.2717 -		// cache entry pointer
  1.2718 -		//__ get_cache_and_index_at_bcp(c_rarg2, c_rarg3, 1);
  1.2719 -		__ daddi(cache, cache, in_bytes(ConstantPoolCache::base_offset()));
  1.2720 -		__ shl(index, 4);
  1.2721 -		__ dadd(cache, cache, index);
  1.2722 -		if (is_static) {
  1.2723 -			__ move(FSR, R0);
  1.2724 -		} else {
  1.2725 -			__ lw(FSR, SP, 0);
  1.2726 -			__ verify_oop(FSR);
  1.2727 -		}
  1.2728 -		// FSR: object pointer or NULL
  1.2729 -		// cache: cache entry pointer
  1.2730 -		__ call_VM(NOREG, CAST_FROM_FN_PTR(address, 
  1.2731 -					InterpreterRuntime::post_field_access), FSR, cache);
  1.2732 -		__ get_cache_and_index_at_bcp(cache, index, 1);
  1.2733 -		__ bind(L1);
  1.2734 -	} 
  1.2735 +  if (JvmtiExport::can_post_field_access()) {
  1.2736 +    // Check to see if a field access watch has been set before we
  1.2737 +    // take the time to call into the VM.
  1.2738 +    Label L1;
  1.2739 +    assert_different_registers(cache, index, FSR);
  1.2740 +    __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr());
  1.2741 +    __ lw(FSR, AT, 0);
  1.2742 +    __ beq(FSR, R0, L1);
  1.2743 +    __ delayed()->nop();
  1.2744 +
  1.2745 +    // We rely on the bytecode being resolved and the cpCache entry filled in.
  1.2746 +    // cache entry pointer
  1.2747 +    __ daddi(cache, cache, in_bytes(ConstantPoolCache::base_offset()));
  1.2748 +    __ shl(index, 4);
  1.2749 +    __ dadd(cache, cache, index);
  1.2750 +    if (is_static) {
  1.2751 +      __ move(FSR, R0);
  1.2752 +    } else {
  1.2753 +      __ lw(FSR, SP, 0);
  1.2754 +      __ verify_oop(FSR);
  1.2755 +    }
  1.2756 +    // FSR: object pointer or NULL
  1.2757 +    // cache: cache entry pointer
  1.2758 +    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
  1.2759 +                                       InterpreterRuntime::post_field_access), FSR, cache);
  1.2760 +    __ get_cache_and_index_at_bcp(cache, index, 1);
  1.2761 +    __ bind(L1);
  1.2762 +  }
  1.2763  }
  1.2764  
  1.2765  void TemplateTable::pop_and_check_object(Register r) {
  1.2766 @@ -2922,14 +2730,14 @@
  1.2767    const Register flags = T1;
  1.2768    resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
  1.2769    //jvmti_post_field_access(cache, index, is_static, false);
  1.2770 -
  1.2771    load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
  1.2772  
  1.2773    if (!is_static) pop_and_check_object(obj);
  1.2774    __ dadd(index, obj, off);
  1.2775  
  1.2776  
  1.2777 -  Label Done, notByte, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
  1.2778 +  Label Done, notByte, notInt, notShort, notChar,
  1.2779 +              notLong, notFloat, notObj, notDouble;
  1.2780  
  1.2781    assert(btos == 0, "change code, btos != 0");
  1.2782    __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
  1.2783 @@ -2938,7 +2746,7 @@
  1.2784    __ delayed()->nop();
  1.2785  
  1.2786    // btos
  1.2787 -  __ lb(FSR, index, 0);	
  1.2788 +  __ lb(FSR, index, 0);
  1.2789    __ sd(FSR, SP, - wordSize);
  1.2790  
  1.2791    // Rewrite bytecode to be faster
  1.2792 @@ -3007,7 +2815,6 @@
  1.2793    __ sd(FSR, SP, - wordSize);
  1.2794  
  1.2795    if (!is_static) {
  1.2796 -    // patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2);
  1.2797      patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2);
  1.2798    }
  1.2799    __ b(Done);
  1.2800 @@ -3066,6 +2873,7 @@
  1.2801    __ bind(Done);
  1.2802  }
  1.2803  
  1.2804 +
  1.2805  void TemplateTable::getfield(int byte_no) {
  1.2806    getfield_or_static(byte_no, false);
  1.2807  }
  1.2808 @@ -3073,95 +2881,81 @@
  1.2809  void TemplateTable::getstatic(int byte_no) {
  1.2810    getfield_or_static(byte_no, true);
  1.2811  }
  1.2812 -/*
  1.2813 -// used registers : T1, T2, T3, T1
  1.2814 -// T1 : cache & cp entry
  1.2815 -// T2 : obj
  1.2816 -// T3 : flags & value pointer
  1.2817 -// T1 : index
  1.2818 -// see ConstantPoolCacheEntry::set_field for more info
  1.2819 -void TemplateTable::jvmti_post_field_mod(int byte_no, bool is_static) {
  1.2820 - */
  1.2821  
  1.2822  // The registers cache and index expected to be set before call.
  1.2823  // The function may destroy various registers, just not the cache and index registers.
  1.2824  void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
  1.2825 -	ByteSize cp_base_offset = ConstantPoolCache::base_offset();
  1.2826 -
  1.2827 -	if (JvmtiExport::can_post_field_modification()) {
  1.2828 -		// Check to see if a field modification watch has been set before we take
  1.2829 -		// the time to call into the VM.
  1.2830 -		Label L1;
  1.2831 -		assert_different_registers(cache, index, AT);
  1.2832 -
  1.2833 -		//__ lui(AT, Assembler::split_high((int)JvmtiExport::get_field_modification_count_addr()));
  1.2834 -		//__ lw(FSR, AT, Assembler::split_low((int)JvmtiExport::get_field_modification_count_addr()));
  1.2835 -		__ li(AT, JvmtiExport::get_field_modification_count_addr());
  1.2836 -		__ lw(FSR, AT, 0);
  1.2837 -		__ beq(FSR, R0, L1);
  1.2838 -		__ delayed()->nop();
  1.2839 -
  1.2840 -		/* // We rely on the bytecode being resolved and the cpCache entry filled in.
  1.2841 -		   resolve_cache_and_index(byte_no, T1, T1);
  1.2842 -		   */
  1.2843 -		// The cache and index registers have been already set.
  1.2844 -		// This allows to eliminate this call but the cache and index
  1.2845 -		// registers have to be correspondingly used after this line.
  1.2846 -		// __ get_cache_and_index_at_bcp(eax, edx, 1);
  1.2847 -		__ get_cache_and_index_at_bcp(T1, T9, 1);
  1.2848 -
  1.2849 -		if (is_static) {
  1.2850 -			__ move(T2, R0);
  1.2851 -		} else {
  1.2852 -			// Life is harder. The stack holds the value on top, 
  1.2853 -			// followed by the object.
  1.2854 -			// We don't know the size of the value, though; 
  1.2855 -			// it could be one or two words
  1.2856 -			// depending on its type. As a result, we must find 
  1.2857 -			// the type to determine where the object is.
  1.2858 -			Label two_word, valsize_known;
  1.2859 -			__ dsll(AT, T1, 4); 
  1.2860 -			__ dadd(AT, T1, AT);
  1.2861 -			__ lw(T3, AT, in_bytes(cp_base_offset 
  1.2862 -						+ ConstantPoolCacheEntry::flags_offset()));
  1.2863 -			__ move(T2, SP);
  1.2864 -			__ shr(T3, ConstantPoolCacheEntry::tos_state_shift);
  1.2865 -
  1.2866 -			// Make sure we don't need to mask ecx for tos_state_shift 
  1.2867 -			// after the above shift
  1.2868 -			ConstantPoolCacheEntry::verify_tos_state_shift();
  1.2869 -			__ move(AT, ltos);
  1.2870 -			__ beq(T3, AT, two_word);
  1.2871 -			__ delayed()->nop();
  1.2872 -			__ move(AT, dtos);
  1.2873 -			__ beq(T3, AT, two_word);
  1.2874 -			__ delayed()->nop();
  1.2875 -			__ b(valsize_known);
  1.2876 -			//__ delayed()->daddi(T2, T2, wordSize*1);
  1.2877 -			__ delayed()->daddi(T2, T2,Interpreter::expr_offset_in_bytes(1) );
  1.2878 -
  1.2879 -			__ bind(two_word);
  1.2880 -			//	__ daddi(T2, T2, wordSize*2);
  1.2881 -			__ daddi(T2, T2,Interpreter::expr_offset_in_bytes(2));
  1.2882 -
  1.2883 -			__ bind(valsize_known);
  1.2884 -			// setup object pointer
  1.2885 -			__ lw(T2, T2, 0*wordSize);
  1.2886 -		}
  1.2887 -		// cache entry pointer
  1.2888 -		__ daddi(T1, T1, in_bytes(cp_base_offset));
  1.2889 -		__ shl(T1, 4); 
  1.2890 -		__ daddu(T1, T1, T1);
  1.2891 -		// object (tos)
  1.2892 -		__ move(T3, SP);
  1.2893 -		// T2: object pointer set up above (NULL if static)
  1.2894 -		// T1: cache entry pointer
  1.2895 -		// T3: jvalue object on the stack
  1.2896 -		__ call_VM(NOREG, CAST_FROM_FN_PTR(address, 
  1.2897 -				InterpreterRuntime::post_field_modification), T2, T1, T3);
  1.2898 -		__ get_cache_and_index_at_bcp(cache, index, 1);
  1.2899 -		__ bind(L1);
  1.2900 -	}
  1.2901 +  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
  1.2902 +
  1.2903 +  if (JvmtiExport::can_post_field_modification()) {
  1.2904 +    // Check to see if a field modification watch has been set before we take
  1.2905 +    // the time to call into the VM.
  1.2906 +    Label L1;
  1.2907 +    assert_different_registers(cache, index, AT);
  1.2908 +
  1.2909 +    __ li(AT, JvmtiExport::get_field_modification_count_addr());
  1.2910 +    __ lw(FSR, AT, 0);
  1.2911 +    __ beq(FSR, R0, L1);
  1.2912 +    __ delayed()->nop();
  1.2913 +
  1.2914 +    /* // We rely on the bytecode being resolved and the cpCache entry filled in.
  1.2915 +       resolve_cache_and_index(byte_no, T1, T1);
  1.2916 +       */
  1.2917 +    // The cache and index registers have been already set.
  1.2918 +    // This allows to eliminate this call but the cache and index
  1.2919 +    // registers have to be correspondingly used after this line.
  1.2920 +    __ get_cache_and_index_at_bcp(T1, T9, 1);
  1.2921 +
  1.2922 +    if (is_static) {
  1.2923 +      __ move(T2, R0);
  1.2924 +    } else {
  1.2925 +      // Life is harder. The stack holds the value on top,
  1.2926 +      // followed by the object.
  1.2927 +      // We don't know the size of the value, though;
  1.2928 +      // it could be one or two words
  1.2929 +      // depending on its type. As a result, we must find
  1.2930 +      // the type to determine where the object is.
  1.2931 +      Label two_word, valsize_known;
  1.2932 +      __ dsll(AT, T1, 4);
  1.2933 +      __ dadd(AT, T1, AT);
  1.2934 +      __ lw(T3, AT, in_bytes(cp_base_offset
  1.2935 +            + ConstantPoolCacheEntry::flags_offset()));
  1.2936 +      __ move(T2, SP);
  1.2937 +      __ shr(T3, ConstantPoolCacheEntry::tos_state_shift);
  1.2938 +
  1.2939 +      // Make sure we don't need to mask ecx for tos_state_shift
  1.2940 +      // after the above shift
  1.2941 +      ConstantPoolCacheEntry::verify_tos_state_shift();
  1.2942 +      __ move(AT, ltos);
  1.2943 +      __ beq(T3, AT, two_word);
  1.2944 +      __ delayed()->nop();
  1.2945 +      __ move(AT, dtos);
  1.2946 +      __ beq(T3, AT, two_word);
  1.2947 +      __ delayed()->nop();
  1.2948 +      __ b(valsize_known);
  1.2949 +      __ delayed()->daddi(T2, T2,Interpreter::expr_offset_in_bytes(1) );
  1.2950 +
  1.2951 +      __ bind(two_word);
  1.2952 +      __ daddi(T2, T2,Interpreter::expr_offset_in_bytes(2));
  1.2953 +
  1.2954 +      __ bind(valsize_known);
  1.2955 +      // setup object pointer
  1.2956 +      __ lw(T2, T2, 0*wordSize);
  1.2957 +    }
  1.2958 +    // cache entry pointer
  1.2959 +    __ daddi(T1, T1, in_bytes(cp_base_offset));
  1.2960 +    __ shl(T1, 4);
  1.2961 +    __ daddu(T1, T1, T1);
  1.2962 +    // object (tos)
  1.2963 +    __ move(T3, SP);
  1.2964 +    // T2: object pointer set up above (NULL if static)
  1.2965 +    // T1: cache entry pointer
  1.2966 +    // T3: jvalue object on the stack
  1.2967 +    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
  1.2968 +               InterpreterRuntime::post_field_modification), T2, T1, T3);
  1.2969 +    __ get_cache_and_index_at_bcp(cache, index, 1);
  1.2970 +    __ bind(L1);
  1.2971 +  }
  1.2972  }
  1.2973  
  1.2974  // used registers : T0, T1, T2, T3, T8
  1.2975 @@ -3181,18 +2975,15 @@
  1.2976    const Register bc    = T3;
  1.2977  
  1.2978    resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
  1.2979 -  //TODO: LEE
  1.2980    //jvmti_post_field_mod(cache, index, is_static);
  1.2981    load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
  1.2982 -  // Doug Lea believes this is not needed with current Sparcs (TSO) and Intel (PSO).
  1.2983 -  // volatile_barrier( );
  1.2984  
  1.2985    Label notVolatile, Done;
  1.2986    __ move(AT, 1<<ConstantPoolCacheEntry::is_volatile_shift);
  1.2987    __ andr(T8, flags, AT);
  1.2988  
  1.2989    Label notByte, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
  1.2990 -  
  1.2991 +
  1.2992    assert(btos == 0, "change code, btos != 0");
  1.2993    // btos
  1.2994    __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
  1.2995 @@ -3202,7 +2993,7 @@
  1.2996  
  1.2997    __ pop(btos);
  1.2998    if (!is_static) {
  1.2999 -    pop_and_check_object(obj); 
  1.3000 +    pop_and_check_object(obj);
  1.3001    }
  1.3002    __ dadd(AT, obj, off);
  1.3003    __ sb(FSR, AT, 0);
  1.3004 @@ -3221,7 +3012,7 @@
  1.3005  
  1.3006    __ pop(itos);
  1.3007    if (!is_static) {
  1.3008 -    pop_and_check_object(obj); 
  1.3009 +    pop_and_check_object(obj);
  1.3010    }
  1.3011    __ dadd(AT, obj, off);
  1.3012    __ sw(FSR, AT, 0);
  1.3013 @@ -3230,7 +3021,7 @@
  1.3014      patch_bytecode(Bytecodes::_fast_iputfield, bc, off, true, byte_no);
  1.3015    }
  1.3016    __ b(Done);
  1.3017 -  __ delayed()->nop();  
  1.3018 +  __ delayed()->nop();
  1.3019    __ bind(notInt);
  1.3020    // atos
  1.3021    __ move(AT, atos);
  1.3022 @@ -3239,11 +3030,10 @@
  1.3023  
  1.3024    __ pop(atos);
  1.3025    if (!is_static) {
  1.3026 -    pop_and_check_object(obj); 
  1.3027 +    pop_and_check_object(obj);
  1.3028    }
  1.3029  
  1.3030    __ dadd(AT, obj, off);
  1.3031 -  //__ sd(FSR, AT, 0);
  1.3032    __ store_heap_oop(Address(AT, 0), FSR);
  1.3033    __ store_check(obj);
  1.3034  
  1.3035 @@ -3260,7 +3050,7 @@
  1.3036  
  1.3037    __ pop(ctos);
  1.3038    if (!is_static) {
  1.3039 -    pop_and_check_object(obj); 
  1.3040 +    pop_and_check_object(obj);
  1.3041    }
  1.3042    __ dadd(AT, obj, off);
  1.3043    __ sh(FSR, AT, 0);
  1.3044 @@ -3277,7 +3067,7 @@
  1.3045  
  1.3046    __ pop(stos);
  1.3047    if (!is_static) {
  1.3048 -    pop_and_check_object(obj); 
  1.3049 +    pop_and_check_object(obj);
  1.3050    }
  1.3051    __ dadd(AT, obj, off);
  1.3052    __ sh(FSR, AT, 0);
  1.3053 @@ -3303,9 +3093,9 @@
  1.3054    //__ lw(FSR, SP, 0);
  1.3055    //__ lw(SSR, SP, 1 * wordSize);
  1.3056    //if (!is_static) {
  1.3057 -  //	__ lw(T3, SP, addent);
  1.3058 -  //	addent += 1 * wordSize;
  1.3059 -  //	__ verify_oop(T3);
  1.3060 +  //  __ lw(T3, SP, addent);
  1.3061 +  //  addent += 1 * wordSize;
  1.3062 +  //  __ verify_oop(T3);
  1.3063    //}
  1.3064  
  1.3065    //__ daddu(AT, T3, T2);
  1.3066 @@ -3313,7 +3103,7 @@
  1.3067    // Replace with real volatile test
  1.3068    // NOTE : we assume that sdc1&ldc1 operate in 32-bit, this is true for Godson2 even in 64-bit kernel
  1.3069    // last modified by yjl 7/12/2005
  1.3070 -  //__ ldc1(FSF, SP, 0); 
  1.3071 +  //__ ldc1(FSF, SP, 0);
  1.3072    //__ sdc1(FSF, AT, 0);
  1.3073    //volatile_barrier();
  1.3074  
  1.3075 @@ -3324,12 +3114,12 @@
  1.3076    //__ bind(notVolatileLong);
  1.3077  
  1.3078    //__ pop(ltos);  // overwrites edx
  1.3079 -  //	__ lw(FSR, SP, 0 * wordSize);
  1.3080 -  //	__ lw(SSR, SP, 1 * wordSize);
  1.3081 -  //	__ daddi(SP, SP, 2*wordSize);
  1.3082 +  //  __ lw(FSR, SP, 0 * wordSize);
  1.3083 +  //  __ lw(SSR, SP, 1 * wordSize);
  1.3084 +  //  __ daddi(SP, SP, 2*wordSize);
  1.3085    __ pop(ltos);
  1.3086    if (!is_static) {
  1.3087 -    pop_and_check_object(obj); 
  1.3088 +    pop_and_check_object(obj);
  1.3089    }
  1.3090    __ dadd(AT, obj, off);
  1.3091    __ sd(FSR, AT, 0);
  1.3092 @@ -3347,7 +3137,7 @@
  1.3093  
  1.3094    __ pop(ftos);
  1.3095    if (!is_static) {
  1.3096 -    pop_and_check_object(obj); 
  1.3097 +    pop_and_check_object(obj);
  1.3098    }
  1.3099    __ dadd(AT, obj, off);
  1.3100    __ swc1(FSF, AT, 0);
  1.3101 @@ -3364,18 +3154,21 @@
  1.3102  
  1.3103    __ pop(dtos);
  1.3104    if (!is_static) {
  1.3105 -    pop_and_check_object(obj); 
  1.3106 +    pop_and_check_object(obj);
  1.3107    }
  1.3108    __ dadd(AT, obj, off);
  1.3109    __ sdc1(FSF, AT, 0);
  1.3110    if (!is_static) {
  1.3111      patch_bytecode(Bytecodes::_fast_dputfield, bc, off, true, byte_no);
  1.3112    }
  1.3113 +
  1.3114 +#ifdef ASSERT
  1.3115    __ b(Done);
  1.3116    __ delayed()->nop();
  1.3117 +
  1.3118    __ bind(notDouble);
  1.3119 -
  1.3120    __ stop("Bad state");
  1.3121 +#endif
  1.3122  
  1.3123    __ bind(Done);
  1.3124  
  1.3125 @@ -3399,78 +3192,68 @@
  1.3126  // T2 : obj
  1.3127  // T3 : value pointer
  1.3128  void TemplateTable::jvmti_post_fast_field_mod() {
  1.3129 -	if (JvmtiExport::can_post_field_modification()) {
  1.3130 -		// Check to see if a field modification watch has been set before we take
  1.3131 -		// the time to call into the VM.
  1.3132 -		Label L2;
  1.3133 -		//__ lui(AT, Assembler::split_high((intptr_t)JvmtiExport::get_field_modification_count_addr()));
  1.3134 -		//__ lw(T3, AT, Assembler::split_low((intptr_t)JvmtiExport::get_field_modification_count_addr()));
  1.3135 -		__ li(AT, JvmtiExport::get_field_modification_count_addr());
  1.3136 -		__ lw(T3, AT, 0);
  1.3137 -		__ beq(T3, R0, L2);
  1.3138 -		__ delayed()->nop();
  1.3139 -		//__ pop(T2);
  1.3140 -		__ pop_ptr(T2);
  1.3141 -		//__ lw(T2, SP, 0);
  1.3142 -		__ verify_oop(T2);
  1.3143 -		__ push_ptr(T2);	
  1.3144 -		__ li(AT, -sizeof(jvalue));
  1.3145 -		__ daddu(SP, SP, AT);
  1.3146 -		__ move(T3, SP);
  1.3147 -		//__ push(T2);
  1.3148 -		//__ move(T2, R0);
  1.3149 -
  1.3150 -		switch (bytecode()) {          // load values into the jvalue object
  1.3151 -			case Bytecodes::_fast_bputfield: 
  1.3152 -				__ sb(FSR, SP, 0); 
  1.3153 -				break;
  1.3154 -			case Bytecodes::_fast_sputfield: 
  1.3155 -				__ sh(FSR, SP, 0);
  1.3156 -				break;
  1.3157 -			case Bytecodes::_fast_cputfield: 
  1.3158 -				__ sh(FSR, SP, 0);
  1.3159 -				break;
  1.3160 -			case Bytecodes::_fast_iputfield: 
  1.3161 -				__ sw(FSR, SP, 0);
  1.3162 -				break;							 
  1.3163 -			case Bytecodes::_fast_lputfield: 
  1.3164 -				__ sd(FSR, SP, 0);
  1.3165 -				break;
  1.3166 -			case Bytecodes::_fast_fputfield: 
  1.3167 -				__ swc1(FSF, SP, 0);
  1.3168 -				break;
  1.3169 -			case Bytecodes::_fast_dputfield: 
  1.3170 -				__ sdc1(FSF, SP, 0);
  1.3171 -				break;
  1.3172 -			case Bytecodes::_fast_aputfield: 
  1.3173 -				__ sd(FSR, SP, 0);
  1.3174 -				break;
  1.3175 -			default:  ShouldNotReachHere();
  1.3176 -		}
  1.3177 -
  1.3178 -		//__ pop(T2);  // restore copy of object pointer
  1.3179 -
  1.3180 -		// Save eax and sometimes edx because call_VM() will clobber them,
  1.3181 -		// then use them for JVM/DI purposes
  1.3182 -		__ push(FSR);
  1.3183 -		if (bytecode() == Bytecodes::_fast_lputfield) __ push(SSR);
  1.3184 -		// access constant pool cache entry
  1.3185 -		__ get_cache_entry_pointer_at_bcp(T1, T2, 1);
  1.3186 -		// no need, verified ahead
  1.3187 -		__ verify_oop(T2);
  1.3188 -
  1.3189 -		// ebx: object pointer copied above
  1.3190 -		// eax: cache entry pointer
  1.3191 -		// ecx: jvalue object on the stack
  1.3192 -		__ call_VM(NOREG, CAST_FROM_FN_PTR(address, 
  1.3193 -					InterpreterRuntime::post_field_modification), T2, T1, T3);
  1.3194 -		if (bytecode() == Bytecodes::_fast_lputfield) __ pop(SSR);  // restore high value
  1.3195 -		//__ pop(FSR);     // restore lower value   
  1.3196 -		//__ daddi(SP, SP, sizeof(jvalue));  // release jvalue object space
  1.3197 -		__ lw(FSR, SP, 0);
  1.3198 -		__ daddiu(SP, SP, sizeof(jvalue) + 1 * wordSize);
  1.3199 -		__ bind(L2);
  1.3200 -	}
  1.3201 +  if (JvmtiExport::can_post_field_modification()) {
  1.3202 +    // Check to see if a field modification watch has been set before
  1.3203 +    // we take the time to call into the VM.
  1.3204 +    Label L2;
  1.3205 +    __ li(AT, JvmtiExport::get_field_modification_count_addr());
  1.3206 +    __ lw(T3, AT, 0);
  1.3207 +    __ beq(T3, R0, L2);
  1.3208 +    __ delayed()->nop();
  1.3209 +    __ pop_ptr(T2);
  1.3210 +    __ verify_oop(T2);
  1.3211 +    __ push_ptr(T2);
  1.3212 +    __ li(AT, -sizeof(jvalue));
  1.3213 +    __ daddu(SP, SP, AT);
  1.3214 +    __ move(T3, SP);
  1.3215 +
  1.3216 +    switch (bytecode()) {          // load values into the jvalue object
  1.3217 +      case Bytecodes::_fast_bputfield:
  1.3218 +        __ sb(FSR, SP, 0);
  1.3219 +        break;
  1.3220 +      case Bytecodes::_fast_sputfield:
  1.3221 +        __ sh(FSR, SP, 0);
  1.3222 +        break;
  1.3223 +      case Bytecodes::_fast_cputfield:
  1.3224 +        __ sh(FSR, SP, 0);
  1.3225 +        break;
  1.3226 +      case Bytecodes::_fast_iputfield:
  1.3227 +        __ sw(FSR, SP, 0);
  1.3228 +        break;
  1.3229 +      case Bytecodes::_fast_lputfield:
  1.3230 +        __ sd(FSR, SP, 0);
  1.3231 +        break;
  1.3232 +      case Bytecodes::_fast_fputfield:
  1.3233 +        __ swc1(FSF, SP, 0);
  1.3234 +        break;
  1.3235 +      case Bytecodes::_fast_dputfield:
  1.3236 +        __ sdc1(FSF, SP, 0);
  1.3237 +        break;
  1.3238 +      case Bytecodes::_fast_aputfield:
  1.3239 +        __ sd(FSR, SP, 0);
  1.3240 +        break;
  1.3241 +      default:  ShouldNotReachHere();
  1.3242 +    }
  1.3243 +
  1.3244 +    // Save eax and sometimes edx because call_VM() will clobber them,
  1.3245 +    // then use them for JVM/DI purposes
  1.3246 +    __ push(FSR);
  1.3247 +    if (bytecode() == Bytecodes::_fast_lputfield) __ push(SSR);
  1.3248 +    // access constant pool cache entry
  1.3249 +    __ get_cache_entry_pointer_at_bcp(T1, T2, 1);
  1.3250 +    // no need, verified ahead
  1.3251 +    __ verify_oop(T2);
  1.3252 +
  1.3253 +    // ebx: object pointer copied above
  1.3254 +    // eax: cache entry pointer
  1.3255 +    // ecx: jvalue object on the stack
  1.3256 +    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
  1.3257 +                                       InterpreterRuntime::post_field_modification), T2, T1, T3);
  1.3258 +    if (bytecode() == Bytecodes::_fast_lputfield) __ pop(SSR);  // restore high value
  1.3259 +    __ lw(FSR, SP, 0);
  1.3260 +    __ daddiu(SP, SP, sizeof(jvalue) + 1 * wordSize);
  1.3261 +    __ bind(L2);
  1.3262 +  }
  1.3263  }
  1.3264  
  1.3265  // used registers : T2, T3, T1
  1.3266 @@ -3488,7 +3271,7 @@
  1.3267    __ get_cache_and_index_at_bcp(T3, T2, 1);
  1.3268  
  1.3269    // test for volatile with edx but edx is tos register for lputfield.
  1.3270 -  __ dsll(AT, T2, Address::times_8); 
  1.3271 +  __ dsll(AT, T2, Address::times_8);
  1.3272    __ dadd(AT, T3, AT);
  1.3273    __ ld(T1, AT, in_bytes(base + ConstantPoolCacheEntry::flags_offset()));
  1.3274  
  1.3275 @@ -3507,35 +3290,33 @@
  1.3276  
  1.3277  
  1.3278    // Get object from stack
  1.3279 -  // NOTE : the value in FSR/FSF now
  1.3280 -  //	__ pop(T3);
  1.3281 -  //	__ verify_oop(T3);
  1.3282    pop_and_check_object(T3);
  1.3283 -  // field addresses
  1.3284 +
  1.3285 +  // field address
  1.3286    __ dadd(T2, T3, T2);
  1.3287  
  1.3288    // access field
  1.3289    switch (bytecode()) {
  1.3290 -    case Bytecodes::_fast_bputfield: 
  1.3291 +    case Bytecodes::_fast_bputfield:
  1.3292        __ sb(FSR, T2, 0);
  1.3293        break;
  1.3294      case Bytecodes::_fast_sputfield: // fall through
  1.3295 -    case Bytecodes::_fast_cputfield: 
  1.3296 +    case Bytecodes::_fast_cputfield:
  1.3297        __ sh(FSR, T2, 0);
  1.3298        break;
  1.3299 -    case Bytecodes::_fast_iputfield: 
  1.3300 +    case Bytecodes::_fast_iputfield:
  1.3301        __ sw(FSR, T2, 0);
  1.3302        break;
  1.3303 -    case Bytecodes::_fast_lputfield: 
  1.3304 +    case Bytecodes::_fast_lputfield:
  1.3305        __ sd(FSR, T2, 0 * wordSize);
  1.3306        break;
  1.3307 -    case Bytecodes::_fast_fputfield: 
  1.3308 +    case Bytecodes::_fast_fputfield:
  1.3309        __ swc1(FSF, T2, 0);
  1.3310        break;
  1.3311 -    case Bytecodes::_fast_dputfield: 
  1.3312 +    case Bytecodes::_fast_dputfield:
  1.3313        __ sdc1(FSF, T2, 0 * wordSize);
  1.3314        break;
  1.3315 -    case Bytecodes::_fast_aputfield: 
  1.3316 +    case Bytecodes::_fast_aputfield:
  1.3317        __ store_heap_oop(Address(T2, 0), FSR);
  1.3318        __ store_check(T3);
  1.3319        break;
  1.3320 @@ -3550,36 +3331,32 @@
  1.3321  
  1.3322    // Same code as above, but don't need edx to test for volatile.
  1.3323    __ bind(notVolatile);
  1.3324 -
  1.3325 -  // Get object from stack
  1.3326 -  //	__ pop(T3);
  1.3327 -  //	__ verify_oop(T3);
  1.3328    pop_and_check_object(T3);
  1.3329    //get the field address
  1.3330    __ dadd(T2, T3, T2);
  1.3331  
  1.3332    // access field
  1.3333    switch (bytecode()) {
  1.3334 -    case Bytecodes::_fast_bputfield: 
  1.3335 -      __ sb(FSR, T2, 0); 
  1.3336 +    case Bytecodes::_fast_bputfield:
  1.3337 +      __ sb(FSR, T2, 0);
  1.3338        break;
  1.3339      case Bytecodes::_fast_sputfield: // fall through
  1.3340 -    case Bytecodes::_fast_cputfield: 
  1.3341 +    case Bytecodes::_fast_cputfield:
  1.3342        __ sh(FSR, T2, 0);
  1.3343        break;
  1.3344 -    case Bytecodes::_fast_iputfield: 
  1.3345 +    case Bytecodes::_fast_iputfield:
  1.3346        __ sw(FSR, T2, 0);
  1.3347        break;
  1.3348 -    case Bytecodes::_fast_lputfield: 
  1.3349 +    case Bytecodes::_fast_lputfield:
  1.3350        __ sd(FSR, T2, 0 * wordSize);
  1.3351        break;
  1.3352 -    case Bytecodes::_fast_fputfield: 
  1.3353 +    case Bytecodes::_fast_fputfield:
  1.3354        __ swc1(FSF, T2, 0);
  1.3355        break;
  1.3356 -    case Bytecodes::_fast_dputfield: 
  1.3357 +    case Bytecodes::_fast_dputfield:
  1.3358        __ sdc1(FSF, T2, 0 * wordSize);
  1.3359        break;
  1.3360 -    case Bytecodes::_fast_aputfield: 
  1.3361 +    case Bytecodes::_fast_aputfield:
  1.3362        //add for compressedoops
  1.3363        __ store_heap_oop(Address(T2, 0), FSR);
  1.3364        __ store_check(T3);
  1.3365 @@ -3611,8 +3388,9 @@
  1.3366      __ verify_oop(FSR);
  1.3367      // FSR: object pointer copied above
  1.3368      // T3: cache entry pointer
  1.3369 -    __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access),
  1.3370 -	FSR, T3);
  1.3371 +    __ call_VM(NOREG,
  1.3372 +               CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access),
  1.3373 +               FSR, T3);
  1.3374      __ move(FSR, TSR);
  1.3375      __ bind(L1);
  1.3376    }
  1.3377 @@ -3621,39 +3399,37 @@
  1.3378    __ get_cache_and_index_at_bcp(T3, T2, 1);
  1.3379    // replace index with field offset from cache entry
  1.3380    __ dsll(AT, T2, Address::times_8);
  1.3381 -  //__ dsll(AT, T2, 4);
  1.3382    __ dadd(AT, T3, AT);
  1.3383 -  __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset() 
  1.3384 -	+ ConstantPoolCacheEntry::f2_offset()));
  1.3385 +  __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset()
  1.3386 +                         + ConstantPoolCacheEntry::f2_offset()));
  1.3387  
  1.3388    // eax: object
  1.3389    __ verify_oop(FSR);
  1.3390 -  // __ null_check(FSR, 0);
  1.3391    __ null_check(FSR);
  1.3392    // field addresses
  1.3393    __ dadd(FSR, FSR, T2);
  1.3394  
  1.3395    // access field
  1.3396    switch (bytecode()) {
  1.3397 -    case Bytecodes::_fast_bgetfield: 
  1.3398 +    case Bytecodes::_fast_bgetfield:
  1.3399        __ lb(FSR, FSR, 0);
  1.3400        break;
  1.3401 -    case Bytecodes::_fast_sgetfield: 
  1.3402 +    case Bytecodes::_fast_sgetfield:
  1.3403        __ lh(FSR, FSR, 0);
  1.3404        break;
  1.3405 -    case Bytecodes::_fast_cgetfield: 
  1.3406 +    case Bytecodes::_fast_cgetfield:
  1.3407        __ lhu(FSR, FSR, 0);
  1.3408        break;
  1.3409      case Bytecodes::_fast_igetfield:
  1.3410        __ lw(FSR, FSR, 0);
  1.3411        break;
  1.3412 -    case Bytecodes::_fast_lgetfield: 
  1.3413 -      __ stop("should not be rewritten");  
  1.3414 +    case Bytecodes::_fast_lgetfield:
  1.3415 +      __ stop("should not be rewritten");
  1.3416        break;
  1.3417 -    case Bytecodes::_fast_fgetfield: 
  1.3418 +    case Bytecodes::_fast_fgetfield:
  1.3419        __ lwc1(FSF, FSR, 0);
  1.3420        break;
  1.3421 -    case Bytecodes::_fast_dgetfield: 
  1.3422 +    case Bytecodes::_fast_dgetfield:
  1.3423        __ ldc1(FSF, FSR, 0);
  1.3424        break;
  1.3425      case Bytecodes::_fast_agetfield:
  1.3426 @@ -3677,25 +3453,24 @@
  1.3427  // T1 : index
  1.3428  void TemplateTable::fast_xaccess(TosState state) {
  1.3429    transition(vtos, state);
  1.3430 +
  1.3431    // get receiver
  1.3432    __ ld(T1, aaddress(0));
  1.3433    // access constant pool cache
  1.3434    __ get_cache_and_index_at_bcp(T3, T2, 2);
  1.3435    __ dsll(AT, T2, Address::times_8);
  1.3436    __ dadd(AT, T3, AT);
  1.3437 -  __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset() 
  1.3438 -	+ ConstantPoolCacheEntry::f2_offset()));
  1.3439 -
  1.3440 -  // make sure exception is reported in correct bcp range (getfield is next instruction)
  1.3441 +  __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
  1.3442 +
  1.3443 +  // make sure exception is reported in correct bcp range (getfield is
  1.3444 +  // next instruction)
  1.3445    __ daddi(BCP, BCP, 1);
  1.3446 -  //	__ null_check(T1, 0);
  1.3447    __ null_check(T1);
  1.3448    __ dadd(T1, T1, T2);
  1.3449  
  1.3450    if (state == itos) {
  1.3451      __ lw(FSR, T1, 0);
  1.3452    } else if (state == atos) {
  1.3453 -    //__ ld(FSR, T1, 0);
  1.3454      __ load_heap_oop(FSR, Address(T1, 0));
  1.3455      __ verify_oop(FSR);
  1.3456    } else if (state == ftos) {
  1.3457 @@ -3706,28 +3481,29 @@
  1.3458    __ daddi(BCP, BCP, -1);
  1.3459  }
  1.3460  
  1.3461 -//---------------------------------------------------
  1.3462 -//-------------------------------------------------
  1.3463 +
  1.3464 +
  1.3465 +//-----------------------------------------------------------------------------
  1.3466  // Calls
  1.3467  
  1.3468 -void TemplateTable::count_calls(Register method, Register temp) {  
  1.3469 -	// implemented elsewhere
  1.3470 -	ShouldNotReachHere();
  1.3471 +void TemplateTable::count_calls(Register method, Register temp) {
  1.3472 +  // implemented elsewhere
  1.3473 +  ShouldNotReachHere();
  1.3474  }
  1.3475  
  1.3476  // method, index, recv, flags: T1, T2, T3, T1
  1.3477  // byte_no = 2 for _invokevirtual, 1 else
  1.3478  // T0 : return address
  1.3479 -// get the method & index of the invoke, and push the return address of 
  1.3480 +// get the method & index of the invoke, and push the return address of
  1.3481  // the invoke(first word in the frame)
  1.3482  // this address is where the return code jmp to.
  1.3483  // NOTE : this method will set T3&T1 as recv&flags
  1.3484  void TemplateTable::prepare_invoke(int byte_no,
  1.3485 -                                   Register method, //linked method (or i-klass)
  1.3486 -                                   Register index, //itable index, MethodType ,etc.
  1.3487 -                                   Register recv, // if caller wants to see it
  1.3488 -                                   Register flags // if caller wants to test it
  1.3489 -		                   ) {
  1.3490 +                                   Register method,  // linked method (or i-klass)
  1.3491 +                                   Register index,   // itable index, MethodType, etc.
  1.3492 +                                   Register recv,    // if caller wants to see it
  1.3493 +                                   Register flags    // if caller wants to test it
  1.3494 +                                   ) {
  1.3495    // determine flags
  1.3496    const Bytecodes::Code code = bytecode();
  1.3497    const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
  1.3498 @@ -3741,16 +3517,17 @@
  1.3499    assert(save_flags    == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
  1.3500    assert(flags == noreg || flags == T1, "error flags reg.");
  1.3501    assert(recv  == noreg || recv  == T3, "error recv reg.");
  1.3502 +
  1.3503    // setup registers & access constant pool cache
  1.3504    if(recv == noreg) recv  = T3;
  1.3505    if(flags == noreg) flags  = T1;
  1.3506 -
  1.3507    assert_different_registers(method, index, recv, flags);
  1.3508  
  1.3509    // save 'interpreter return address'
  1.3510    __ save_bcp();
  1.3511  
  1.3512    load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
  1.3513 +
  1.3514    if (is_invokedynamic || is_invokehandle) {
  1.3515     Label L_no_push;
  1.3516       __ move(AT, (1 << ConstantPoolCacheEntry::has_appendix_shift));
  1.3517 @@ -3768,26 +3545,24 @@
  1.3518       __ pop(tmp);
  1.3519       __ push(index);  // push appendix (MethodType, CallSite, etc.)
  1.3520       __ bind(L_no_push);
  1.3521 -
  1.3522    }
  1.3523  
  1.3524 -// load receiver if needed (after appendix is pushed so parameter size is correct)
  1.3525 -// Note: no return address pushed yet
  1.3526 +  // load receiver if needed (after appendix is pushed so parameter size is correct)
  1.3527 +  // Note: no return address pushed yet
  1.3528    if (load_receiver) {
  1.3529 -	 __ move(AT, ConstantPoolCacheEntry::parameter_size_mask);
  1.3530 -	 __ andr(recv, flags, AT);
  1.3531 -         // 2014/07/31 Fu: Since we won't push RA on stack, no_return_pc_pushed_yet should be 0.
  1.3532 -	 const int no_return_pc_pushed_yet = 0;  // argument slot correction before we push return address
  1.3533 -	 const int receiver_is_at_end      = -1;  // back off one slot to get receiver
  1.3534 -	 Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end);
  1.3535 -
  1.3536 -	 __ ld(recv, recv_addr);
  1.3537 -	 __ verify_oop(recv);	
  1.3538 +    __ move(AT, ConstantPoolCacheEntry::parameter_size_mask);
  1.3539 +    __ andr(recv, flags, AT);
  1.3540 +    // 2014/07/31 Fu: Since we won't push RA on stack, no_return_pc_pushed_yet should be 0.
  1.3541 +    const int no_return_pc_pushed_yet = 0;  // argument slot correction before we push return address
  1.3542 +    const int receiver_is_at_end      = -1;  // back off one slot to get receiver
  1.3543 +    Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end);
  1.3544 +    __ ld(recv, recv_addr);
  1.3545 +    __ verify_oop(recv);
  1.3546    }
  1.3547    if(save_flags) {
  1.3548 -    //__ movl(r13, flags);
  1.3549      __ move(BCP, flags);
  1.3550    }
  1.3551 +
  1.3552    // compute return type
  1.3553    __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
  1.3554    __ andi(flags, flags, 0xf);
  1.3555 @@ -3795,14 +3570,14 @@
  1.3556    // Make sure we don't need to mask flags for tos_state_shift after the above shift
  1.3557    ConstantPoolCacheEntry::verify_tos_state_shift();
  1.3558    // load return address
  1.3559 -  { 
  1.3560 +  {
  1.3561      const address table = (address) Interpreter::invoke_return_entry_table_for(code);
  1.3562      __ li(AT, (long)table);
  1.3563      __ dsll(flags, flags, LogBytesPerWord);
  1.3564      __ dadd(AT, AT, flags);
  1.3565      __ ld(RA, AT, 0);
  1.3566    }
  1.3567 - 
  1.3568 +
  1.3569    if (save_flags) {
  1.3570      __ move(flags, BCP);
  1.3571      __ restore_bcp();
  1.3572 @@ -3813,8 +3588,9 @@
  1.3573  // T3 : recv, this two register using convention is by prepare_invoke
  1.3574  // T1 : flags, klass
  1.3575  // Rmethod : method, index must be Rmethod
  1.3576 -void TemplateTable::invokevirtual_helper(Register index, Register recv,
  1.3577 -		Register flags) {
  1.3578 +void TemplateTable::invokevirtual_helper(Register index,
  1.3579 +                                         Register recv,
  1.3580 +                                         Register flags) {
  1.3581  
  1.3582    assert_different_registers(index, recv, flags, T2);
  1.3583  
  1.3584 @@ -3829,7 +3605,7 @@
  1.3585    assert(method == Rmethod, "methodOop must be Rmethod for interpreter calling convention");
  1.3586  
  1.3587    // do the call - the index is actually the method to call
  1.3588 -  // the index is indeed methodOop, for this is vfinal, 
  1.3589 +  // the index is indeed methodOop, for this is vfinal,
  1.3590    // see ConstantPoolCacheEntry::set_method for more info
  1.3591  
  1.3592    __ verify_oop(method);
  1.3593 @@ -3840,33 +3616,32 @@
  1.3594    // profile this call
  1.3595    __ profile_final_call(T2);
  1.3596  
  1.3597 -  // 2014/11/24 Fu 
  1.3598 +  // 2014/11/24 Fu
  1.3599    // T2: tmp, used for mdp
  1.3600    // method: callee
  1.3601    // T9: tmp
  1.3602 -  // is_virtual: true 
  1.3603 +  // is_virtual: true
  1.3604    __ profile_arguments_type(T2, method, T9, true);
  1.3605  
  1.3606 -//  __ move(T0, recv);
  1.3607    __ jump_from_interpreted(method, T2);
  1.3608  
  1.3609    __ bind(notFinal);
  1.3610  
  1.3611    // get receiver klass
  1.3612    __ null_check(recv, oopDesc::klass_offset_in_bytes());
  1.3613 -  // Keep recv in ecx for callee expects it there
  1.3614    __ load_klass(T2, recv);
  1.3615    __ verify_oop(T2);
  1.3616 +
  1.3617    // profile this call
  1.3618    __ profile_virtual_call(T2, T0, T1);
  1.3619  
  1.3620    // get target methodOop & entry point
  1.3621 -  const int base = InstanceKlass::vtable_start_offset() * wordSize;    
  1.3622 +  const int base = InstanceKlass::vtable_start_offset() * wordSize;
  1.3623    assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
  1.3624    __ dsll(AT, index, Address::times_ptr);
  1.3625    // T2: receiver
  1.3626    __ dadd(AT, T2, AT);
  1.3627 -  //this is a ualign read 
  1.3628 +  //this is a ualign read
  1.3629    __ ld(method, AT, base + vtableEntry::method_offset_in_bytes());
  1.3630    __ profile_arguments_type(T2, method, T9, true);
  1.3631    __ jump_from_interpreted(method, T2);
  1.3632 @@ -3892,11 +3667,11 @@
  1.3633    __ null_check(T3);
  1.3634    __ profile_call(T9);
  1.3635  
  1.3636 -  // 2014/11/24 Fu 
  1.3637 +  // 2014/11/24 Fu
  1.3638    // T8: tmp, used for mdp
  1.3639    // Rmethod: callee
  1.3640    // T9: tmp
  1.3641 -  // is_virtual: false 
  1.3642 +  // is_virtual: false
  1.3643    __ profile_arguments_type(T8, Rmethod, T9, false);
  1.3644  
  1.3645    __ jump_from_interpreted(Rmethod, T9);
  1.3646 @@ -3911,21 +3686,21 @@
  1.3647  
  1.3648    __ profile_call(T9);
  1.3649  
  1.3650 -  // 2014/11/24 Fu 
  1.3651 +  // 2014/11/24 Fu
  1.3652    // T8: tmp, used for mdp
  1.3653    // Rmethod: callee
  1.3654    // T9: tmp
  1.3655 -  // is_virtual: false 
  1.3656 +  // is_virtual: false
  1.3657    __ profile_arguments_type(T8, Rmethod, T9, false);
  1.3658  
  1.3659    __ jump_from_interpreted(Rmethod, T9);
  1.3660  }
  1.3661  
  1.3662 -// i have no idea what to do here, now. for future change. FIXME. 
  1.3663 +// i have no idea what to do here, now. for future change. FIXME.
  1.3664  void TemplateTable::fast_invokevfinal(int byte_no) {
  1.3665 -	transition(vtos, vtos);
  1.3666 -	assert(byte_no == f2_byte, "use this argument");
  1.3667 -	__ stop("fast_invokevfinal not used on x86");
  1.3668 +  transition(vtos, vtos);
  1.3669 +  assert(byte_no == f2_byte, "use this argument");
  1.3670 +  __ stop("fast_invokevfinal not used on mips64");
  1.3671  }
  1.3672  
  1.3673  // used registers : T0, T1, T2, T3, T1, A7
  1.3674 @@ -3941,26 +3716,23 @@
  1.3675    prepare_invoke(byte_no, T2, Rmethod, T3, T1);
  1.3676    // T2: Interface
  1.3677    // Rmethod: index
  1.3678 -  // T3: receiver    
  1.3679 +  // T3: receiver
  1.3680    // T1: flags
  1.3681 +
  1.3682 +  // Special case of invokeinterface called for virtual method of
  1.3683 +  // java.lang.Object.  See cpCacheOop.cpp for details.
  1.3684 +  // This code isn't produced by javac, but could be produced by
  1.3685 +  // another compliant java compiler.
  1.3686    Label notMethod;
  1.3687    __ move(AT, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift));
  1.3688    __ andr(AT, T1, AT);
  1.3689    __ beq(AT, R0, notMethod);
  1.3690    __ delayed()->nop();
  1.3691  
  1.3692 -  // Special case of invokeinterface called for virtual method of
  1.3693 -  // java.lang.Object.  See cpCacheOop.cpp for details.
  1.3694 -  // This code isn't produced by javac, but could be produced by
  1.3695 -  // another compliant java compiler.
  1.3696    invokevirtual_helper(Rmethod, T3, T1);
  1.3697 -
  1.3698    __ bind(notMethod);
  1.3699    // Get receiver klass into T1 - also a null check
  1.3700 -  //__ ld(T1, T3, oopDesc::klass_offset_in_bytes());
  1.3701    //add for compressedoops
  1.3702 -  //__ restore_locals();
  1.3703 -  //__ null_check(T3, oopDesc::klass_offset_in_bytes());
  1.3704    __ load_klass(T1, T3);
  1.3705    __ verify_oop(T1);
  1.3706  
  1.3707 @@ -3969,7 +3741,7 @@
  1.3708  
  1.3709    // Compute start of first itableOffsetEntry (which is at the end of the vtable)
  1.3710    // TODO: x86 add a new method lookup_interface_method  // LEE
  1.3711 -  const int base = InstanceKlass::vtable_start_offset() * wordSize;    
  1.3712 +  const int base = InstanceKlass::vtable_start_offset() * wordSize;
  1.3713    assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below");
  1.3714    __ lw(AT, T1, InstanceKlass::vtable_length_offset() * wordSize);
  1.3715    __ dsll(AT, AT, Address::times_8);
  1.3716 @@ -3983,7 +3755,7 @@
  1.3717  
  1.3718    Label entry, search, interface_ok;
  1.3719  
  1.3720 -  ///__ jmp(entry);   
  1.3721 +  ///__ jmp(entry);
  1.3722    __ b(entry);
  1.3723    __ delayed()->nop();
  1.3724  
  1.3725 @@ -4001,16 +3773,15 @@
  1.3726    // throw exception
  1.3727    // the call_VM checks for exception, so we should never return here.
  1.3728  
  1.3729 -  //__ pop();//FIXME here,			
  1.3730 -  // pop return address (pushed by prepare_invoke). 
  1.3731 +  //__ pop();//FIXME here,
  1.3732 +  // pop return address (pushed by prepare_invoke).
  1.3733    // no need now, we just save the value in RA now
  1.3734  
  1.3735    __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeError));
  1.3736    __ should_not_reach_here();
  1.3737  
  1.3738    __ bind(interface_ok);
  1.3739 -  //NOTICE here, no pop as x86 do	
  1.3740 -  //__ lw(AT, T0, itableOffsetEntry::interface_offset_in_bytes());
  1.3741 +  //NOTICE here, no pop as x86 do
  1.3742    __ bne(AT, T2, search);
  1.3743    __ delayed()->nop();
  1.3744  
  1.3745 @@ -4028,10 +3799,8 @@
  1.3746    // Note: This should be done more efficiently via a throw_abstract_method_error
  1.3747    //       interpreter entry point and a conditional jump to it in case of a null
  1.3748    //       method.
  1.3749 -  { 
  1.3750 +  {
  1.3751      Label L;
  1.3752 -    ///__ testl(ebx, ebx);
  1.3753 -    ///__ jcc(Assembler::notZero, L);
  1.3754      __ bne(Rmethod, R0, L);
  1.3755      __ delayed()->nop();
  1.3756  
  1.3757 @@ -4039,19 +3808,19 @@
  1.3758      // note: must restore interpreter registers to canonical
  1.3759      //       state for exception handling to work correctly!
  1.3760      ///__ popl(ebx);          // pop return address (pushed by prepare_invoke)
  1.3761 -    //__ restore_bcp();      // esi must be correct for exception handler   
  1.3762 +    //__ restore_bcp();      // esi must be correct for exception handler
  1.3763      //(was destroyed)
  1.3764 -    //__ restore_locals();   // make sure locals pointer 
  1.3765 +    //__ restore_locals();   // make sure locals pointer
  1.3766      //is correct as well (was destroyed)
  1.3767 -    ///__ call_VM(noreg, CAST_FROM_FN_PTR(address, 
  1.3768 +    ///__ call_VM(noreg, CAST_FROM_FN_PTR(address,
  1.3769      //InterpreterRuntime::throw_AbstractMethodError));
  1.3770      __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
  1.3771      // the call_VM checks for exception, so we should never return here.
  1.3772      __ should_not_reach_here();
  1.3773      __ bind(L);
  1.3774    }
  1.3775 -  
  1.3776 -  // 2014/11/24 Fu 
  1.3777 +
  1.3778 +  // 2014/11/24 Fu
  1.3779    // T8: tmp, used for mdp
  1.3780    // Rmethod: callee
  1.3781    // T9: tmp
  1.3782 @@ -4061,6 +3830,7 @@
  1.3783    __ jump_from_interpreted(Rmethod, T9);
  1.3784  }
  1.3785  
  1.3786 +
  1.3787  void TemplateTable::invokehandle(int byte_no) {
  1.3788    transition(vtos, vtos);
  1.3789    assert(byte_no == f1_byte, "use this argument");
  1.3790 @@ -4073,34 +3843,34 @@
  1.3791       __ should_not_reach_here();
  1.3792       return;
  1.3793     }
  1.3794 -   
  1.3795 +
  1.3796     prepare_invoke(byte_no, T2_method, FSR_mtype, T3_recv);
  1.3797     //??__ verify_method_ptr(T2_method);
  1.3798     __ verify_oop(T3_recv);
  1.3799     __ null_check(T3_recv);
  1.3800 - 
  1.3801 +
  1.3802     // rax: MethodType object (from cpool->resolved_references[f1], if necessary)
  1.3803     // rbx: MH.invokeExact_MT method (from f2)
  1.3804 - 
  1.3805 +
  1.3806     // Note:  rax_mtype is already pushed (if necessary) by prepare_invoke
  1.3807 - 
  1.3808 +
  1.3809     // FIXME: profile the LambdaForm also
  1.3810     __ profile_final_call(T9);
  1.3811  
  1.3812 -   // 2014/11/24 Fu 
  1.3813 +   // 2014/11/24 Fu
  1.3814     // T8: tmp, used for mdp
  1.3815     // T2_method: callee
  1.3816     // T9: tmp
  1.3817     // is_virtual: true
  1.3818     __ profile_arguments_type(T8, T2_method, T9, true);
  1.3819 - 
  1.3820 +
  1.3821    __ jump_from_interpreted(T2_method, T9);
  1.3822  }
  1.3823  
  1.3824   void TemplateTable::invokedynamic(int byte_no) {
  1.3825     transition(vtos, vtos);
  1.3826     assert(byte_no == f1_byte, "use this argument");
  1.3827 - 
  1.3828 +
  1.3829     if (!EnableInvokeDynamic) {
  1.3830       // We should not encounter this bytecode if !EnableInvokeDynamic.
  1.3831       // The verifier will stop it.  However, if we get past the verifier,
  1.3832 @@ -4111,33 +3881,33 @@
  1.3833       __ should_not_reach_here();
  1.3834       return;
  1.3835     }
  1.3836 - 
  1.3837 +
  1.3838     //const Register Rmethod   = T2;
  1.3839     const Register T2_callsite = T2;
  1.3840 - 
  1.3841 +
  1.3842     prepare_invoke(byte_no, Rmethod, T2_callsite);
  1.3843 - 
  1.3844 +
  1.3845     // rax: CallSite object (from cpool->resolved_references[f1])
  1.3846     // rbx: MH.linkToCallSite method (from f2)
  1.3847 - 
  1.3848 +
  1.3849     // Note:  rax_callsite is already pushed by prepare_invoke
  1.3850     // %%% should make a type profile for any invokedynamic that takes a ref argument
  1.3851     // profile this call
  1.3852     __ profile_call(T9);
  1.3853  
  1.3854 -   // 2014/11/24 Fu 
  1.3855 +   // 2014/11/24 Fu
  1.3856     // T8: tmp, used for mdp
  1.3857     // Rmethod: callee
  1.3858     // T9: tmp
  1.3859 -   // is_virtual: false 
  1.3860 +   // is_virtual: false
  1.3861     __ profile_arguments_type(T8, Rmethod, T9, false);
  1.3862  
  1.3863     __ verify_oop(T2_callsite);
  1.3864 - 
  1.3865 +
  1.3866     __ jump_from_interpreted(Rmethod, T9);
  1.3867   }
  1.3868  
  1.3869 -//----------------------------------------------------------------------------------------------------
  1.3870 +//-----------------------------------------------------------------------------
  1.3871  // Allocation
  1.3872  // T1 : tags & buffer end & thread
  1.3873  // T2 : object end
  1.3874 @@ -4154,7 +3924,7 @@
  1.3875    Label slow_case;
  1.3876    Label done;
  1.3877    Label initialize_header;
  1.3878 -  Label initialize_object;  // including clearing the fields
  1.3879 +  Label initialize_object; // including clearing the fields
  1.3880    Label allocate_shared;
  1.3881  
  1.3882    // get InstanceKlass in T3
  1.3883 @@ -4163,19 +3933,17 @@
  1.3884    __ dadd(AT, A1, AT);
  1.3885    __ ld(T3, AT, sizeof(ConstantPool));
  1.3886  
  1.3887 -  // make sure the class we're about to instantiate has been resolved. 
  1.3888 +  // make sure the class we're about to instantiate has been resolved.
  1.3889    // Note: slow_case does a pop of stack, which is why we loaded class/pushed above
  1.3890    const int tags_offset = Array<u1>::base_offset_in_bytes();
  1.3891    __ dadd(T1, T1, A2);
  1.3892    __ lb(AT, T1, tags_offset);
  1.3893 -  //__ addiu(AT, AT, - (int)JVM_CONSTANT_UnresolvedClass);
  1.3894    __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
  1.3895 -  //__ beq(AT, R0, slow_case);
  1.3896    __ bne(AT, R0, slow_case);
  1.3897    __ delayed()->nop();
  1.3898  
  1.3899 -  /*make sure klass is initialized & doesn't have finalizer*/
  1.3900 -
  1.3901 +
  1.3902 +  // make sure klass is initialized & doesn't have finalizer
  1.3903    // make sure klass is fully initialized
  1.3904    __ lhu(T1, T3, in_bytes(InstanceKlass::init_state_offset()));
  1.3905    __ daddiu(AT, T1, - (int)InstanceKlass::fully_initialized);
  1.3906 @@ -4191,13 +3959,10 @@
  1.3907    __ bne(AT, R0, slow_case);
  1.3908    __ delayed()->nop();
  1.3909  
  1.3910 -  // get instance_size in InstanceKlass (already aligned) in T0, 
  1.3911 -  // be sure to preserve this value 
  1.3912 -  //__ lw(T0, T3, Klass::size_helper_offset_in_bytes() + sizeof(oopDesc));
  1.3913 -  //Klass::_size_helper is renamed Klass::_layout_helper. aoqi 
  1.3914 +  // get instance_size in InstanceKlass (already aligned) in T0,
  1.3915 +  // be sure to preserve this value
  1.3916    __ lw(T0, T3, in_bytes(Klass::layout_helper_offset()) );
  1.3917  
  1.3918 -  // 
  1.3919    // Allocate the instance
  1.3920    // 1) Try to allocate in the TLAB
  1.3921    // 2) if fail and the object is large allocate in the shared Eden
  1.3922 @@ -4220,7 +3985,6 @@
  1.3923      // get tlab_end
  1.3924      __ ld(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
  1.3925      __ slt(AT, AT, T2);
  1.3926 -    //		__ bne(AT, R0, allocate_shared);
  1.3927      __ bne(AT, R0, allow_shared_alloc ? allocate_shared : slow_case);
  1.3928      __ delayed()->nop();
  1.3929      __ sd(T2, thread, in_bytes(JavaThread::tlab_top_offset()));
  1.3930 @@ -4233,32 +3997,20 @@
  1.3931        __ b_far(initialize_object);
  1.3932      }
  1.3933      __ delayed()->nop();
  1.3934 -    /*
  1.3935 -
  1.3936 -       if (CMSIncrementalMode) {
  1.3937 -    // No allocation in shared eden. 
  1.3938 -    ///__ jmp(slow_case);
  1.3939 -    __ b(slow_case);
  1.3940 -    __ delayed()->nop();
  1.3941 -    }
  1.3942 -     */ 
  1.3943    }
  1.3944  
  1.3945    // Allocation in the shared Eden , if allowed
  1.3946    // T0 : instance size in words
  1.3947 -  if(allow_shared_alloc){ 
  1.3948 +  if(allow_shared_alloc){
  1.3949      __ bind(allocate_shared);
  1.3950 +
  1.3951      Label retry;
  1.3952 -    //Address heap_top(T1, (int)Universe::heap()->top_addr());
  1.3953      Address heap_top(T1);
  1.3954 -    //__ lui(T1, Assembler::split_high((int)Universe::heap()->top_addr()));
  1.3955      __ li(T1, (long)Universe::heap()->top_addr());
  1.3956  
  1.3957      __ ld(FSR, heap_top);
  1.3958      __ bind(retry);
  1.3959      __ dadd(T2, FSR, T0);
  1.3960 -    //__ lui(AT, Assembler::split_high((int)Universe::heap()->end_addr()));
  1.3961 -    //__ lw(AT, AT, Assembler::split_low((int)Universe::heap()->end_addr()));
  1.3962      __ li(AT, (long)Universe::heap()->end_addr());
  1.3963      __ ld(AT, AT, 0);
  1.3964      __ slt(AT, AT, T2);
  1.3965 @@ -4273,8 +4025,7 @@
  1.3966      // T2: object end
  1.3967      // T0: instance size in words
  1.3968  
  1.3969 -    // if someone beat us on the allocation, try again, otherwise continue 
  1.3970 -    //__ lui(T1, Assembler::split_high((int)Universe::heap()->top_addr()));
  1.3971 +    // if someone beat us on the allocation, try again, otherwise continue
  1.3972      __ cmpxchg(T2, heap_top, FSR);
  1.3973      __ beq(AT, R0, retry);
  1.3974      __ delayed()->nop();
  1.3975 @@ -4303,35 +4054,29 @@
  1.3976  #endif
  1.3977  
  1.3978      // initialize remaining object fields: T0 is a multiple of 2
  1.3979 -    { 
  1.3980 +    {
  1.3981        Label loop;
  1.3982        __ dadd(T1, FSR, T0);
  1.3983        __ daddi(T1, T1, -oopSize);
  1.3984  
  1.3985        __ bind(loop);
  1.3986        __ sd(R0, T1, sizeof(oopDesc) + 0 * oopSize);
  1.3987 -//      __ sd(R0, T1, sizeof(oopDesc) + 1 * oopSize);
  1.3988        __ bne(T1, FSR, loop); //dont clear header
  1.3989        __ delayed()->daddi(T1, T1, -oopSize);
  1.3990 -      // actually sizeof(oopDesc)==8, so we can move  
  1.3991 +      // actually sizeof(oopDesc)==8, so we can move
  1.3992        // __ addiu(AT, AT, -8) to delay slot, and compare FSR with T1
  1.3993      }
  1.3994 -    //klass in T3, 
  1.3995 +    //klass in T3,
  1.3996      // initialize object header only.
  1.3997      __ bind(initialize_header);
  1.3998      if (UseBiasedLocking) {
  1.3999 -      // __ popl(ecx);   // get saved klass back in the register.
  1.4000 -      // __ movl(ebx, Address(ecx, Klass::prototype_header_offset_in_bytes() 
  1.4001 -      // + klassOopDesc::klass_part_offset_in_bytes()));
  1.4002 -      __ ld(AT, T3, in_bytes(Klass::prototype_header_offset())); 
  1.4003 -      // __ movl(Address(eax, oopDesc::mark_offset_in_bytes ()), ebx);
  1.4004 -      __ sd(AT, FSR, oopDesc::mark_offset_in_bytes ());    
  1.4005 +      __ ld(AT, T3, in_bytes(Klass::prototype_header_offset()));
  1.4006 +      __ sd(AT, FSR, oopDesc::mark_offset_in_bytes ());
  1.4007      } else {
  1.4008        __ li(AT, (long)markOopDesc::prototype());
  1.4009        __ sd(AT, FSR, oopDesc::mark_offset_in_bytes());
  1.4010      }
  1.4011  
  1.4012 -    //__ sd(T3, FSR, oopDesc::klass_offset_in_bytes());
  1.4013      __ store_klass_gap(FSR, R0);
  1.4014      __ store_klass(FSR, T3);
  1.4015  
  1.4016 @@ -4340,12 +4085,14 @@
  1.4017        // Trigger dtrace event for fastpath
  1.4018        __ push(atos);
  1.4019        __ call_VM_leaf(
  1.4020 -	  CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), FSR);
  1.4021 +           CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), FSR);
  1.4022        __ pop(atos);
  1.4023 +
  1.4024      }
  1.4025      __ b(done);
  1.4026      __ delayed()->nop();
  1.4027 -  }	
  1.4028 +  }
  1.4029 +
  1.4030    // slow case
  1.4031    __ bind(slow_case);
  1.4032    call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), A1, A2);
  1.4033 @@ -4356,11 +4103,11 @@
  1.4034  }
  1.4035  
  1.4036  void TemplateTable::newarray() {
  1.4037 -	transition(itos, atos);
  1.4038 -	__ lbu(A1, at_bcp(1));
  1.4039 -	//type, count
  1.4040 -	call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR);
  1.4041 -        __ sync();
  1.4042 +  transition(itos, atos);
  1.4043 +  __ lbu(A1, at_bcp(1));
  1.4044 +  //type, count
  1.4045 +  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR);
  1.4046 +  __ sync();
  1.4047  }
  1.4048  
  1.4049  void TemplateTable::anewarray() {
  1.4050 @@ -4428,7 +4175,6 @@
  1.4051    __ bind(resolved);
  1.4052  
  1.4053    // get subklass in T2
  1.4054 -  //__ ld(T2, FSR, oopDesc::klass_offset_in_bytes());
  1.4055    //add for compressedoops
  1.4056    __ load_klass(T2, FSR);
  1.4057    // Superklass in T3.  Subklass in T2.
  1.4058 @@ -4444,12 +4190,12 @@
  1.4059  
  1.4060    // Collect counts on whether this check-cast sees NULLs a lot or not.
  1.4061    if (ProfileInterpreter) {
  1.4062 -	__ b(done);
  1.4063 -	__ delayed()->nop();
  1.4064 -	__ bind(is_null);
  1.4065 -	__ profile_null_seen(T3);
  1.4066 +    __ b(done);
  1.4067 +    __ delayed()->nop();
  1.4068 +    __ bind(is_null);
  1.4069 +    __ profile_null_seen(T3);
  1.4070    } else {
  1.4071 -	__ bind(is_null);
  1.4072 +    __ bind(is_null);
  1.4073    }
  1.4074    __ bind(done);
  1.4075  }
  1.4076 @@ -4477,11 +4223,6 @@
  1.4077    __ beq(AT, R0, quicked);
  1.4078    __ delayed()->nop();
  1.4079  
  1.4080 -  // get superklass in T3
  1.4081 -  //__ move(TSR, FSR);
  1.4082 -  // sometimes S2 may be changed during the call, 
  1.4083 -  // be careful if u use TSR as a saving place
  1.4084 -  //__ push(FSR);
  1.4085    __ push(atos);
  1.4086    const Register thread = TREG;
  1.4087  #ifndef OPT_THREAD
  1.4088 @@ -4489,24 +4230,21 @@
  1.4089  #endif
  1.4090    call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
  1.4091    __ get_vm_result_2(T3, thread);
  1.4092 -  //__ lw(FSR, SP, 0);
  1.4093 -  __ pop_ptr(FSR);	
  1.4094 +  __ pop_ptr(FSR);
  1.4095    __ b(resolved);
  1.4096    __ delayed()->nop();
  1.4097 -  //__ move(FSR, TSR);
  1.4098  
  1.4099    // get superklass in T3, subklass in T2
  1.4100    __ bind(quicked);
  1.4101    __ dsll(AT, T2, Address::times_8);
  1.4102    __ daddu(AT, T3, AT);
  1.4103 -  __ ld(T3, AT, sizeof(ConstantPool)); 
  1.4104 +  __ ld(T3, AT, sizeof(ConstantPool));
  1.4105  
  1.4106    __ bind(resolved);
  1.4107    // get subklass in T2
  1.4108 -  //__ ld(T2, FSR, oopDesc::klass_offset_in_bytes());
  1.4109    //add for compressedoops
  1.4110    __ load_klass(T2, FSR);
  1.4111 -  
  1.4112 +
  1.4113    // Superklass in T3.  Subklass in T2.
  1.4114    __ gen_subtype_check(T3, T2, ok_is_subtype);
  1.4115    // Come here on failure
  1.4116 @@ -4519,12 +4257,12 @@
  1.4117  
  1.4118    // Collect counts on whether this test sees NULLs a lot or not.
  1.4119    if (ProfileInterpreter) {
  1.4120 -     __ beq(R0, R0, done);
  1.4121 -     __ nop();
  1.4122 -     __ bind(is_null);
  1.4123 -     __ profile_null_seen(T3);
  1.4124 +    __ beq(R0, R0, done);
  1.4125 +    __ nop();
  1.4126 +    __ bind(is_null);
  1.4127 +    __ profile_null_seen(T3);
  1.4128    } else {
  1.4129 -     __ bind(is_null);   // same as 'done'
  1.4130 +    __ bind(is_null);   // same as 'done'
  1.4131    }
  1.4132    __ bind(done);
  1.4133    // FSR = 0: obj == NULL or  obj is not an instanceof the specified klass
  1.4134 @@ -4535,44 +4273,39 @@
  1.4135  //--------------------------------------------
  1.4136  // Breakpoints
  1.4137  void TemplateTable::_breakpoint() {
  1.4138 -
  1.4139 -	// Note: We get here even if we are single stepping..
  1.4140 -	// jbug inists on setting breakpoints at every bytecode 
  1.4141 -	// even if we are in single step mode.  
  1.4142 -
  1.4143 -	transition(vtos, vtos);
  1.4144 -
  1.4145 -	// get the unpatched byte code
  1.4146 -	///__ get_method(ecx);
  1.4147 -	///__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::get_original_bytecode_at)
  1.4148 -	//, ecx, esi);
  1.4149 -	///__ movl(ebx, eax);
  1.4150 -	__ get_method(A1);
  1.4151 -	__ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::get_original_bytecode_at), 
  1.4152 -			A1, BCP);
  1.4153 -	__ move(Rnext, V0); // Jin: Rnext will be used in dispatch_only_normal
  1.4154 -
  1.4155 -	// post the breakpoint event
  1.4156 -	///__ get_method(ecx);
  1.4157 -	///__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), ecx, esi);
  1.4158 -	__ get_method(A1);
  1.4159 -	__ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), A1, BCP);
  1.4160 -
  1.4161 -	// complete the execution of original bytecode
  1.4162 -	__ dispatch_only_normal(vtos);
  1.4163 -} 
  1.4164 -
  1.4165 -//----------------------------------------------------------------------------------------------------
  1.4166 +  // Note: We get here even if we are single stepping..
  1.4167 +  // jbug inists on setting breakpoints at every bytecode
  1.4168 +  // even if we are in single step mode.
  1.4169 +
  1.4170 +  transition(vtos, vtos);
  1.4171 +
  1.4172 +  // get the unpatched byte code
  1.4173 +  __ get_method(A1);
  1.4174 +  __ call_VM(NOREG,
  1.4175 +             CAST_FROM_FN_PTR(address,
  1.4176 +                              InterpreterRuntime::get_original_bytecode_at),
  1.4177 +             A1, BCP);
  1.4178 +  __ move(Rnext, V0); // Jin: Rnext will be used in dispatch_only_normal
  1.4179 +
  1.4180 +  // post the breakpoint event
  1.4181 +  __ get_method(A1);
  1.4182 +  __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), A1, BCP);
  1.4183 +
  1.4184 +  // complete the execution of original bytecode
  1.4185 +  __ dispatch_only_normal(vtos);
  1.4186 +}
  1.4187 +
  1.4188 +//-----------------------------------------------------------------------------
  1.4189  // Exceptions
  1.4190  
  1.4191  void TemplateTable::athrow() {
  1.4192 -	transition(atos, vtos);
  1.4193 -	__ null_check(FSR);
  1.4194 -	__ jmp(Interpreter::throw_exception_entry());
  1.4195 -	__ delayed()->nop();
  1.4196 +  transition(atos, vtos);
  1.4197 +  __ null_check(FSR);
  1.4198 +  __ jmp(Interpreter::throw_exception_entry());
  1.4199 +  __ delayed()->nop();
  1.4200  }
  1.4201  
  1.4202 -//----------------------------------------------------------------------------------------------------
  1.4203 +//-----------------------------------------------------------------------------
  1.4204  // Synchronization
  1.4205  //
  1.4206  // Note: monitorenter & exit are symmetric routines; which is reflected
  1.4207 @@ -4594,10 +4327,11 @@
  1.4208  // object always in FSR
  1.4209  void TemplateTable::monitorenter() {
  1.4210    transition(atos, vtos);
  1.4211 +
  1.4212    // check for NULL object
  1.4213    __ null_check(FSR);
  1.4214  
  1.4215 -  const Address monitor_block_top(FP, frame::interpreter_frame_monitor_block_top_offset 
  1.4216 +  const Address monitor_block_top(FP, frame::interpreter_frame_monitor_block_top_offset
  1.4217        * wordSize);
  1.4218    const int entry_size = (frame::interpreter_frame_monitor_size()* wordSize);
  1.4219    Label allocated;
  1.4220 @@ -4606,7 +4340,7 @@
  1.4221    __ move(c_rarg0, R0);
  1.4222  
  1.4223    // find a free slot in the monitor block (result in edx)
  1.4224 -  { 
  1.4225 +  {
  1.4226      Label entry, loop, exit, next;
  1.4227      __ ld(T2, monitor_block_top);
  1.4228      __ b(entry);
  1.4229 @@ -4634,7 +4368,7 @@
  1.4230    __ delayed()->nop();
  1.4231  
  1.4232    // allocate one if there's no free slot
  1.4233 -  { 
  1.4234 +  {
  1.4235      Label entry, loop;
  1.4236      // 1. compute new pointers                   // SP: old expression stack top
  1.4237      __ ld(c_rarg0, monitor_block_top);
  1.4238 @@ -4648,18 +4382,18 @@
  1.4239      __ bind(loop);
  1.4240      __ ld(AT, T3, entry_size);
  1.4241      __ sd(AT, T3, 0);
  1.4242 -    __ daddi(T3, T3, wordSize); 
  1.4243 +    __ daddi(T3, T3, wordSize);
  1.4244      __ bind(entry);
  1.4245      __ bne(T3, c_rarg0, loop);
  1.4246      __ delayed()->nop();
  1.4247    }
  1.4248  
  1.4249    __ bind(allocated);
  1.4250 -  // Increment bcp to point to the next bytecode, 
  1.4251 -  // so exception handling for async. exceptions work correctly. 
  1.4252 -  // The object has already been poped from the stack, so the 
  1.4253 +  // Increment bcp to point to the next bytecode,
  1.4254 +  // so exception handling for async. exceptions work correctly.
  1.4255 +  // The object has already been poped from the stack, so the
  1.4256    // expression stack looks correct.
  1.4257 -  __ daddi(BCP, BCP, 1); 
  1.4258 +  __ daddi(BCP, BCP, 1);
  1.4259    __ sd(FSR, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
  1.4260    __ lock_object(c_rarg0);
  1.4261    // check to make sure this monitor doesn't cause stack overflow after locking
  1.4262 @@ -4681,7 +4415,7 @@
  1.4263    Label found;
  1.4264  
  1.4265    // find matching slot
  1.4266 -  { 
  1.4267 +  {
  1.4268      Label entry, loop;
  1.4269      __ ld(c_rarg0, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
  1.4270      __ b(entry);
  1.4271 @@ -4699,8 +4433,8 @@
  1.4272  
  1.4273    // error handling. Unlocking was not block-structured
  1.4274    Label end;
  1.4275 -  __ call_VM(NOREG, CAST_FROM_FN_PTR(address, 
  1.4276 -	InterpreterRuntime::throw_illegal_monitor_state_exception));
  1.4277 +  __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
  1.4278 +  InterpreterRuntime::throw_illegal_monitor_state_exception));
  1.4279    __ should_not_reach_here();
  1.4280  
  1.4281    // call run-time routine
  1.4282 @@ -4712,8 +4446,8 @@
  1.4283    __ bind(end);
  1.4284  }
  1.4285  
  1.4286 -//--------------------------------------------------------------------------------------------------// Wide instructions
  1.4287 -
  1.4288 +
  1.4289 +// Wide instructions
  1.4290  void TemplateTable::wide() {
  1.4291    transition(vtos, vtos);
  1.4292    // Note: the esi increment step is part of the individual wide bytecode implementations
  1.4293 @@ -4726,21 +4460,19 @@
  1.4294    __ delayed()->nop();
  1.4295  }
  1.4296  
  1.4297 -//--------------------------------------------------------------------------------------------------// Multi arrays
  1.4298  
  1.4299  void TemplateTable::multianewarray() {
  1.4300    transition(vtos, atos);
  1.4301    // last dim is on top of stack; we want address of first one:
  1.4302    // first_addr = last_addr + (ndims - 1) * wordSize
  1.4303 -  __ lbu(A1, at_bcp(3));	// dimension
  1.4304 -  __ daddi(A1, A1, -1);	
  1.4305 +  __ lbu(A1, at_bcp(3));  // dimension
  1.4306 +  __ daddi(A1, A1, -1);
  1.4307    __ dsll(A1, A1, Address::times_8);
  1.4308 -  __ dadd(A1, SP, A1);		// now A1 pointer to the count array on the stack
  1.4309 +  __ dadd(A1, SP, A1);    // now A1 pointer to the count array on the stack
  1.4310    call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), A1);
  1.4311    __ lbu(AT, at_bcp(3));
  1.4312    __ dsll(AT, AT, Address::times_8);
  1.4313    __ dadd(SP, SP, AT);
  1.4314    __ sync();
  1.4315  }
  1.4316 -
  1.4317  #endif // !CC_INTERP

mercurial