src/cpu/mips/vm/templateTable_mips_64.cpp

Thu, 09 Mar 2017 15:11:22 +0800

author
jiangshaofeng
date
Thu, 09 Mar 2017 15:11:22 +0800
changeset 362
e46417a01c32
parent 257
bba1c817d040
child 406
bcbfdb66a6fb
permissions
-rw-r--r--

#4784 [interpreter] Use array bounds check instructions to optimize array load and store bytecodes.
Reviewed-by: aoqi
Contributed and tested by: jiangshaofeng, aoqi

Added Loongson EXT instructions such as gslwle/gslwgt. These instructions can check array bound automatically, throw exceptions when index is out of bound.
These instructions were used to accelerate Java byte-code in the interpreter, such as iaload, iastore.
Added UseBoundCheckInstruction to enable/disable the use of these instructions.
UseBoundCheckInstruction is under development yet:
1. C1/C2 is not supported.
2. Array index not out of bound load & store passed, but array index out of bound load & store would crash.

     1 /*
     2  * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
     3  * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
     4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     5  *
     6  * This code is free software; you can redistribute it and/or modify it
     7  * under the terms of the GNU General Public License version 2 only, as
     8  * published by the Free Software Foundation.
     9  *
    10  * This code is distributed in the hope that it will be useful, but WITHOUT
    11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    13  * version 2 for more details (a copy is included in the LICENSE file that
    14  * accompanied this code).
    15  *
    16  * You should have received a copy of the GNU General Public License version
    17  * 2 along with this work; if not, write to the Free Software Foundation,
    18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    19  *
    20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    21  * or visit www.oracle.com if you need additional information or have any
    22  * questions.
    23  *
    24  */
    26 #include "precompiled.hpp"
    27 #include "asm/macroAssembler.hpp"
    28 #include "interpreter/interpreter.hpp"
    29 #include "interpreter/interpreterRuntime.hpp"
    30 #include "interpreter/templateTable.hpp"
    31 #include "memory/universe.inline.hpp"
    32 #include "oops/methodData.hpp"
    33 #include "oops/objArrayKlass.hpp"
    34 #include "oops/oop.inline.hpp"
    35 #include "prims/methodHandles.hpp"
    36 #include "runtime/sharedRuntime.hpp"
    37 #include "runtime/stubRoutines.hpp"
    38 #include "runtime/synchronizer.hpp"
    41 #ifndef CC_INTERP
    43 #define __ _masm->
    45 // Platform-dependent initialization
    47 void TemplateTable::pd_initialize() {
    48   // No mips specific initialization
    49 }
    51 // Address computation: local variables
    52 // we use t8 as the local variables pointer register, by yjl 6/27/2005
    53 static inline Address iaddress(int n) {
    54   return Address(LVP, Interpreter::local_offset_in_bytes(n));
    55 }
    57 static inline Address laddress(int n) {
    58   return iaddress(n + 1);
    59 }
    61 static inline Address faddress(int n) {
    62   return iaddress(n);
    63 }
    65 static inline Address daddress(int n) {
    66   return laddress(n);
    67 }
    69 static inline Address aaddress(int n) {
    70   return iaddress(n);
    71 }
    72 static inline Address haddress(int n)            { return iaddress(n + 0); }
    74 //FIXME , can not use dadd and dsll
    75 /*
    76 static inline Address iaddress(Register r) {
    77   return Address(r14, r, Address::times_8, Interpreter::value_offset_in_bytes());
    78 }
    80 static inline Address laddress(Register r) {
    81   return Address(r14, r, Address::times_8, Interpreter::local_offset_in_bytes(1));
    82 }
    84 static inline Address faddress(Register r) {
    85   return iaddress(r);
    86 }
    88 static inline Address daddress(Register r) {
    89   return laddress(r);
    90 }
    92 static inline Address aaddress(Register r) {
    93   return iaddress(r);
    94 }
    95 */
    97 static inline Address at_sp() 						{	return Address(SP, 	0); }					
    98 static inline Address at_sp_p1()          { return Address(SP,  1 * wordSize); }
    99 static inline Address at_sp_p2()          { return Address(SP,  2 * wordSize); }
   101 // At top of Java expression stack which may be different than esp().  It
   102 // isn't for category 1 objects.
   103 static inline Address at_tos   () {
   104   Address tos = Address(SP,  Interpreter::expr_offset_in_bytes(0));
   105   return tos;
   106 }
   108 static inline Address at_tos_p1() {
   109   return Address(SP,  Interpreter::expr_offset_in_bytes(1));
   110 }
   112 static inline Address at_tos_p2() {
   113   return Address(SP,  Interpreter::expr_offset_in_bytes(2));
   114 }
   116 static inline Address at_tos_p3() {
   117   return Address(SP,  Interpreter::expr_offset_in_bytes(3));
   118 }
   120 // we use S0 as bcp, be sure you have bcp in S0 before you call any of the Template generator 
   121 Address TemplateTable::at_bcp(int offset) {
   122   assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
   123   return Address(BCP, offset);
   124 }
   126 #define callee_saved_register(R) assert((R>=S0 && R<=S7), "should use callee saved registers!")
   128 // bytecode folding
   129 void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
   130                                    Register tmp_reg, 
   131                                    bool load_bc_into_bc_reg,/*=true*/
   132                                    int byte_no) {
   133   if (!RewriteBytecodes) {
   134     return;
   135   }
   137   Label L_patch_done;
   138   switch (bc) {
   139   case Bytecodes::_fast_aputfield:
   140   case Bytecodes::_fast_bputfield:
   141   case Bytecodes::_fast_cputfield:
   142   case Bytecodes::_fast_dputfield:
   143   case Bytecodes::_fast_fputfield:
   144   case Bytecodes::_fast_iputfield:
   145   case Bytecodes::_fast_lputfield:
   146   case Bytecodes::_fast_sputfield:
   147     {
   148     // We skip bytecode quickening for putfield instructions when the put_code written to the constant pool cache
   149     // is zero. This is required so that every execution of this instruction calls out to 
   150     // InterpreterRuntime::resolve_get_put to do additional, required work.
   151     assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
   152     assert(load_bc_into_bc_reg, "we use bc_reg as temp");
   153     __ get_cache_and_index_and_bytecode_at_bcp(tmp_reg, bc_reg, tmp_reg, byte_no, 1);
   154     __ daddi(bc_reg, R0, bc);
   155     __ beq(tmp_reg, R0, L_patch_done);
   156     __ delayed()->nop();
   157     }
   158     break;
   159   default:
   160     assert(byte_no == -1, "sanity");
   161  // the pair bytecodes have already done the load.
   162   if (load_bc_into_bc_reg) {
   163     __ move(bc_reg, bc);
   164   }
   166   }
   167   if (JvmtiExport::can_post_breakpoint()) {
   168     Label L_fast_patch;
   169     // if a breakpoint is present we can't rewrite the stream directly
   170     __ lbu(tmp_reg, at_bcp(0));
   171     __ move(AT, Bytecodes::_breakpoint);
   172     __ bne(tmp_reg, AT, L_fast_patch);
   173     __ delayed()->nop();
   175     __ get_method(tmp_reg);
   176     // Let breakpoint table handling rewrite to quicker bytecode 
   177     __ call_VM(NOREG, CAST_FROM_FN_PTR(address, 
   178 	  InterpreterRuntime::set_original_bytecode_at), tmp_reg, BCP, bc_reg);
   180     __ b(L_patch_done);
   181     __ delayed()->nop();
   182     __ bind(L_fast_patch);
   183   }
   185 #ifdef ASSERT
   186   Label L_okay;
   187   __ lbu(tmp_reg, at_bcp(0));
   188   __ move(AT, (int)Bytecodes::java_code(bc));
   189   __ beq(tmp_reg, AT, L_okay);
   190   __ delayed()->nop();
   191   __ beq(tmp_reg, bc_reg, L_patch_done);
   192   __ delayed()->nop();
   193   __ stop("patching the wrong bytecode");
   194   __ bind(L_okay);
   195 #endif
   197   // patch bytecode
   198   __ sb(bc_reg, at_bcp(0));
   199   __ bind(L_patch_done);
   200 }
   203 // Individual instructions
   205 void TemplateTable::nop() {
   206   transition(vtos, vtos);
   207   // nothing to do
   208 }
   210 void TemplateTable::shouldnotreachhere() {
   211   transition(vtos, vtos);
   212   __ stop("shouldnotreachhere bytecode");
   213 }
   215 void TemplateTable::aconst_null() {
   216   transition(vtos, atos);
   217   __ move(FSR, R0);
   218 }
   220 void TemplateTable::iconst(int value) {
   221   transition(vtos, itos);
   222   if (value == 0) {
   223     __ move(FSR, R0);
   224   } else {
   225     __ move(FSR, value);
   226   }
   227 }
   229 void TemplateTable::lconst(int value) {
   230   transition(vtos, ltos);
   231   if (value == 0) {
   232     __ move(FSR, R0);
   233   } else {
   234     __ move(FSR, value);
   235   }
   236   assert(value >= 0, "check this code");
   237   //__ move(SSR, R0);
   238 }
   240 void TemplateTable::fconst(int value) {
   241   static float  _f1 = 1.0, _f2 = 2.0;
   242   transition(vtos, ftos);
   243   float* p;
   244   switch( value ) {
   245     default: ShouldNotReachHere();
   246     case 0:  __ dmtc1(R0, FSF);  return;
   247     case 1:  p = &_f1;   break;
   248     case 2:  p = &_f2;   break;
   249   }
   250   __ li(AT, (address)p);
   251   __ lwc1(FSF, AT, 0);
   252 }
   254 void TemplateTable::dconst(int value) {
   255   static double _d1 = 1.0;
   256   transition(vtos, dtos);
   257   double* p;
   258   switch( value ) {
   259     default: ShouldNotReachHere();
   260     case 0:  __ dmtc1(R0, FSF);  return;
   261     case 1:  p = &_d1;   break;
   262   }
   263   __ li(AT, (address)p);
   264   __ ldc1(FSF, AT, 0);
   265 }
   267 void TemplateTable::bipush() {
   268   transition(vtos, itos);
   269   __ lb(FSR, at_bcp(1));
   270 }
   272 void TemplateTable::sipush() {
   273 	transition(vtos, itos);
   274 	__ get_2_byte_integer_at_bcp(FSR, AT, 1);
   275 	__ hswap(FSR);
   276 }
   278 // T1 : tags
   279 // T2 : index
   280 // T3 : cpool
   281 // T8 : tag
   282 void TemplateTable::ldc(bool wide) {
   283   transition(vtos, vtos);
   284   Label call_ldc, notFloat, notClass, Done;
   285   // get index in cpool
   286   if (wide) {
   287     __ get_2_byte_integer_at_bcp(T2, AT, 1);
   288     __ huswap(T2);
   289   } else {
   290     __ lbu(T2, at_bcp(1));
   291   }
   293   __ get_cpool_and_tags(T3, T1);
   295   const int base_offset = ConstantPool::header_size() * wordSize;
   296   const int tags_offset = Array<u1>::base_offset_in_bytes();
   298   // get type
   299   __ dadd(AT, T1, T2);
   300   __ lb(T1, AT, tags_offset);
   301   //now T1 is the tag
   303   // unresolved string - get the resolved string
   304   /*__ daddiu(AT, T1, - JVM_CONSTANT_UnresolvedString);
   305   __ beq(AT, R0, call_ldc);
   306   __ delayed()->nop();*/
   308   // unresolved class - get the resolved class
   309   __ daddiu(AT, T1, - JVM_CONSTANT_UnresolvedClass);
   310   __ beq(AT, R0, call_ldc);
   311   __ delayed()->nop();
   313   // unresolved class in error (resolution failed) - call into runtime
   314   // so that the same error from first resolution attempt is thrown.
   315   __ daddiu(AT, T1, -JVM_CONSTANT_UnresolvedClassInError); 
   316   __ beq(AT, R0, call_ldc);
   317   __ delayed()->nop();
   319   // resolved class - need to call vm to get java mirror of the class
   320   __ daddiu(AT, T1, - JVM_CONSTANT_Class);
   321   __ bne(AT, R0, notClass);
   322   __ delayed()->dsll(T2, T2, Address::times_8);
   324   __ bind(call_ldc);
   326   __ move(A1, wide);
   327   call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), A1);
   328   //	__ sw(FSR, SP, - 1 * wordSize);
   329   __ push(atos);	
   330   __ b(Done);
   331   //	__ delayed()->daddi(SP, SP, - 1 * wordSize);
   332   __ delayed()->nop();
   333   __ bind(notClass);
   335   __ daddiu(AT, T1, -JVM_CONSTANT_Float);
   336   __ bne(AT, R0, notFloat);
   337   __ delayed()->nop();
   338   // ftos
   339   __ dadd(AT, T3, T2);
   340   __ lwc1(FSF, AT, base_offset);
   341   __ push_f();
   342   __ b(Done);
   343   __ delayed()->nop();
   345   __ bind(notFloat);
   346 #ifdef ASSERT
   347   { 
   348     Label L;
   349     __ daddiu(AT, T1, -JVM_CONSTANT_Integer);
   350     __ beq(AT, R0, L);
   351     __ delayed()->nop();
   352     __ stop("unexpected tag type in ldc");
   353     __ bind(L);
   354   }
   355 #endif
   356   // atos and itos
   357   __ dadd(T0, T3, T2);
   358   __ lw(FSR, T0, base_offset);
   359   __ push(itos);
   360   __ b(Done);
   361   __ delayed()->nop(); 
   364   if (VerifyOops) {
   365     __ verify_oop(FSR);
   366   }
   368   __ bind(Done);
   369 }
   371 // Fast path for caching oop constants.
   372 void TemplateTable::fast_aldc(bool wide) {
   373   transition(vtos, atos);
   375   Register result = FSR;
   376   Register tmp = SSR;
   377   int index_size = wide ? sizeof(u2) : sizeof(u1);
   379   Label resolved;
   380  // We are resolved if the resolved reference cache entry contains a
   381  // non-null object (String, MethodType, etc.)
   382   assert_different_registers(result, tmp);
   383   __ get_cache_index_at_bcp(tmp, 1, index_size);
   384   __ load_resolved_reference_at_index(result, tmp);
   385   __ bne(result, R0, resolved);
   386   __ delayed()->nop();
   388   address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
   389   // first time invocation - must resolve first
   390   int i = (int)bytecode();
   391   __ move(tmp, i);
   392   __ call_VM(result, entry, tmp);
   394   __ bind(resolved);
   396   if (VerifyOops) {
   397     __ verify_oop(result);
   398   }
   399 }
   402 // used register: T2, T3, T1
   403 // T2 : index
   404 // T3 : cpool
   405 // T1 : tag
   406 void TemplateTable::ldc2_w() {
   407   transition(vtos, vtos);
   408   Label Long, Done;
   410   // get index in cpool
   411   __ get_2_byte_integer_at_bcp(T2, AT, 1);
   412   __ huswap(T2);
   414   __ get_cpool_and_tags(T3, T1);
   416   const int base_offset = ConstantPool::header_size() * wordSize;
   417   const int tags_offset = Array<u1>::base_offset_in_bytes();
   419   // get type in T1
   420   __ dadd(AT, T1, T2);
   421   __ lb(T1, AT, tags_offset);
   423   __ daddiu(AT, T1, - JVM_CONSTANT_Double);
   424   __ bne(AT, R0, Long);
   425   __ delayed()->dsll(T2, T2, Address::times_8);
   426   // dtos	
   427   __ daddu(AT, T3, T2);
   428   __ ldc1(FSF, AT, base_offset + 0 * wordSize);
   429   __ sdc1(FSF, SP, - 2 * wordSize);
   430   __ b(Done);
   431   __ delayed()->daddi(SP, SP, - 2 * wordSize);
   433   // ltos
   434   __ bind(Long);
   435   __ dadd(AT, T3, T2);	
   436   __ ld(FSR, AT, base_offset + 0 * wordSize);
   437   __ push(ltos);
   439   __ bind(Done);
   440 }
   442 // we compute the actual local variable address here
   443 // the x86 dont do so for it has scaled index memory access model, we dont have, so do here
   444 void TemplateTable::locals_index(Register reg, int offset) {
   445   __ lbu(reg, at_bcp(offset));
   446   __ dsll(reg, reg, Address::times_8);
   447   __ dsub(reg, LVP, reg);
   448 }
   450 // this method will do bytecode folding of the two form:
   451 // iload iload			iload caload
   452 // used register : T2, T3
   453 // T2 : bytecode
   454 // T3 : folded code
   455 void TemplateTable::iload() {
   456   transition(vtos, itos);
   457   if (RewriteFrequentPairs) { 
   458     Label rewrite, done;
   459     // get the next bytecode in T2
   460     __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
   461     // if _iload, wait to rewrite to iload2.  We only want to rewrite the
   462     // last two iloads in a pair.  Comparing against fast_iload means that
   463     // the next bytecode is neither an iload or a caload, and therefore
   464     // an iload pair.
   465     __ move(AT, Bytecodes::_iload);
   466     __ beq(AT, T2, done);
   467     __ delayed()->nop();
   469     __ move(T3, Bytecodes::_fast_iload2);
   470     __ move(AT, Bytecodes::_fast_iload);
   471     __ beq(AT, T2, rewrite);
   472     __ delayed()->nop();
   474     // if _caload, rewrite to fast_icaload
   475     __ move(T3, Bytecodes::_fast_icaload);
   476     __ move(AT, Bytecodes::_caload);
   477     __ beq(AT, T2, rewrite);
   478     __ delayed()->nop();
   480     // rewrite so iload doesn't check again.
   481     __ move(T3, Bytecodes::_fast_iload);
   483     // rewrite
   484     // T3 : fast bytecode
   485     __ bind(rewrite);
   486     patch_bytecode(Bytecodes::_iload, T3, T2, false);
   487     __ bind(done);
   488   }
   490   // Get the local value into tos
   491   locals_index(T2);
   492   __ lw(FSR, T2, 0);
   493 }
   495 // used register T2
   496 // T2 : index
   497 void TemplateTable::fast_iload2() {
   498 	transition(vtos, itos);
   499 	locals_index(T2);
   500 	__ lw(FSR, T2, 0);
   501 	__ push(itos);
   502 	locals_index(T2, 3);
   503 	__ lw(FSR, T2, 0);
   504 }
   506 // used register T2
   507 // T2 : index
   508 void TemplateTable::fast_iload() {
   509   transition(vtos, itos);
   510   locals_index(T2);
   511   __ lw(FSR, T2, 0);
   512 }
   514 // used register T2
   515 // T2 : index
   516 void TemplateTable::lload() {
   518   transition(vtos, ltos);
   519   locals_index(T2);
   520   __ ld(FSR, T2, -wordSize);
   521   __ ld(SSR, T2, 0);
   522 }
   524 // used register T2
   525 // T2 : index
   526 void TemplateTable::fload() {
   527   transition(vtos, ftos);
   528   locals_index(T2);
   529 //FIXME, aoqi. How should the high 32bits be when store a single float into a 64bits register. 
   530   //__ mtc1(R0, FSF);
   531   __ lwc1(FSF, T2, 0);
   532 }
   534 // used register T2
   535 // T2 : index
   536 void TemplateTable::dload() {
   538   transition(vtos, dtos);
   539   locals_index(T2);
   540 /*  if (TaggedStackInterpreter) {
   541     // Get double out of locals array, onto temp stack and load with
   542     // float instruction into ST0
   543     __ dsll(AT,T2,Interpreter::stackElementScale());
   544     __ dadd(AT, LVP, AT);
   545     __ ldc1(FSF, AT, Interpreter::local_offset_in_bytes(1)); 
   546   } else {*/
   547     __ ldc1(FSF, T2, -wordSize);
   548     __ ldc1(SSF, T2, 0);
   549  // }
   550 }
   552 // used register T2
   553 // T2 : index
   554 void TemplateTable::aload() 
   555 {
   556   transition(vtos, atos);
   557   locals_index(T2);
   558   __ ld(FSR, T2, 0);
   559 }
   561 void TemplateTable::locals_index_wide(Register reg) {
   562   __ get_2_byte_integer_at_bcp(reg, AT, 2);
   563   __ huswap(reg);
   564   __ dsll(reg, reg, Address::times_8);
   565   __ dsub(reg, LVP, reg);
   566 }
   568 // used register T2
   569 // T2 : index
   570 void TemplateTable::wide_iload() {
   571 	transition(vtos, itos);
   572 	locals_index_wide(T2);
   573 	__ ld(FSR, T2, 0);
   574 }
   576 // used register T2
   577 // T2 : index
   578 void TemplateTable::wide_lload() {
   579 	transition(vtos, ltos);
   580 	locals_index_wide(T2);
   581 	__ ld(FSR, T2, -4);
   582 }
   584 // used register T2
   585 // T2 : index
   586 void TemplateTable::wide_fload() {
   587 	transition(vtos, ftos);
   588 	locals_index_wide(T2);
   589 	__ lwc1(FSF, T2, 0);
   590 }
   592 // used register T2
   593 // T2 : index
   594 void TemplateTable::wide_dload() {
   595 	transition(vtos, dtos);
   596 	locals_index_wide(T2);
   597 /*	if (TaggedStackInterpreter) {
   598 		// Get double out of locals array, onto temp stack and load with
   599 		// float instruction into ST0
   600 		//   __ movl(eax, laddress(ebx));
   601 		//  __ movl(edx, haddress(ebx));
   602 		__ dsll(AT,T2,Interpreter::stackElementScale());
   603 		__ dadd(AT, LVP, AT);
   604 		__ ldc1(FSF, AT, Interpreter::local_offset_in_bytes(1)); 
   606 		//  __ pushl(edx);  // push hi first
   607 		//  __ pushl(eax);
   608 		//  __ fld_d(Address(esp));
   609 		//  __ addl(esp, 2*wordSize);
   610 	} else {*/
   611 		__ ldc1(FSF, T2, -4);
   612 	//}
   613 }
   615 // used register T2
   616 // T2 : index
   617 void TemplateTable::wide_aload() {
   618 	transition(vtos, atos);
   619 	locals_index_wide(T2);
   620 	__ ld(FSR, T2, 0);
   621 }
   623 // we use A2 as the regiser for index, BE CAREFUL!
   624 // we dont use our tge 29 now, for later optimization
   625 void TemplateTable::index_check(Register array, Register index) {
   626   // Pop ptr into array
   627   __ pop_ptr(array);
   628   index_check_without_pop(array, index);
   629 }
   631 void TemplateTable::index_check_without_pop(Register array, Register index) {
   632   // destroys ebx
   633   // check array
   634   __ null_check(array, arrayOopDesc::length_offset_in_bytes());
   636 #ifdef _LP64
   637   // sign extend since tos (index) might contain garbage in upper bits
   638   __ sll(index, index, 0);
   639 #endif // _LP64
   641   // check index
   642   Label ok;
   643   __ lw(AT, array, arrayOopDesc::length_offset_in_bytes());
   644 #ifndef OPT_RANGECHECK
   645   __ sltu(AT, index, AT);
   646   __ bne(AT, R0, ok);
   647   __ delayed()->nop(); 
   649   //throw_ArrayIndexOutOfBoundsException assume abberrant index in A2
   650   if (A2 != index) __ move(A2, index);		
   651   __ jmp(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
   652   __ delayed()->nop();
   653   __ bind(ok);
   654 #else
   655   __ lw(AT, array, arrayOopDesc::length_offset_in_bytes());
   656   __ move(A2, index);
   657   __ tgeu(A2, AT, 29);
   658 #endif
   659 }
   661 void TemplateTable::iaload() {
   662   transition(itos, itos);
   663   //  __ pop(SSR);
   664   if(UseBoundCheckInstruction) {  
   665     __ pop(SSR); //SSR:array    FSR: index
   666     __ dsll(FSR, FSR, 2);
   667     __ dadd(FSR, SSR, FSR);
   668     __ addi(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT));
   670     __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
   671     __ dsll(AT, AT, 2);
   672     __ dadd(AT, SSR, AT);
   673     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT));
   675     __ gslwle(FSR, FSR, AT);
   676   } else {
   677     index_check(SSR, FSR);
   678     __ dsll(FSR, FSR, 2);
   679     __ dadd(FSR, SSR, FSR);
   680     //FSR: index
   681     __ lw(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT));
   682   }
   683 }
   685 void TemplateTable::laload() {
   686   transition(itos, ltos);
   687   //  __ pop(SSR);
   688   if(UseBoundCheckInstruction) {
   689     __ pop(SSR); //SSR:array    FSR: index
   690     __ dsll(FSR, FSR, Address::times_8);
   691     __ dadd(FSR, SSR, FSR);
   692     __ addi(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);
   694     __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
   695     __ dsll(AT, AT, Address::times_8);
   696     __ dadd(AT, SSR, AT);
   697     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);
   699     __ gsldle(FSR, FSR, AT); 
   700   } else {
   701     index_check(SSR, FSR);
   702     __ dsll(AT, FSR, Address::times_8);
   703     __ dadd(AT, SSR, AT);
   704     __ ld(FSR, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);
   705   }
   706 }
   708 void TemplateTable::faload() {
   709   transition(itos, ftos);
   710   // __ pop(SSR);
   711   if(UseBoundCheckInstruction) {
   712     __ pop(SSR); //SSR:array    FSR: index
   713     __ shl(FSR, 2);
   714     __ dadd(FSR, SSR, FSR);
   715     __ addi(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
   717     __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
   718     __ shl(AT, 2);
   719     __ dadd(AT, SSR, AT);
   720     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
   722     __ gslwlec1(FSF, FSR, AT);
   723   } else {
   724     index_check(SSR, FSR);  
   725     __ shl(FSR, 2);
   726     __ dadd(FSR, SSR, FSR);
   727     __ lwc1(FSF, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
   728   }
   729 }
   731 void TemplateTable::daload() {
   732   transition(itos, dtos);
   733   //__ pop(SSR);
   734   if(UseBoundCheckInstruction) {
   735     __ pop(SSR); //SSR:array    FSR: index
   736     __ dsll(FSR, FSR, 3);
   737     __ dadd(FSR, SSR, FSR);
   738     __ addi(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
   740     __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
   741     __ dsll(AT, AT, 3);
   742     __ dadd(AT, SSR, AT);
   743     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
   745     __ gsldlec1(FSF, FSR, AT);
   746   } else {
   747     index_check(SSR, FSR);  
   748     __ dsll(AT, FSR, 3);
   749     __ dadd(AT, SSR, AT);
   750     __ ldc1(FSF, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
   751   }
   752 }
   754 void TemplateTable::aaload() {
   755   transition(itos, atos);
   756   //__ pop(SSR);
   757   index_check(SSR, FSR);
   758   __ dsll(FSR, FSR, UseCompressedOops ? Address::times_4 : Address::times_8);
   759   __ dadd(FSR, SSR, FSR);
   760   //add for compressedoops
   761   __ load_heap_oop(FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
   762 }
   764 void TemplateTable::baload() {
   765   transition(itos, itos);
   766   //__ pop(SSR);
   767   if(UseBoundCheckInstruction) {
   768     __ pop(SSR); //SSR:array   FSR:index
   769     __ dadd(FSR, SSR, FSR);
   770     __ addi(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //base
   772     __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  
   773     __ dadd(AT, SSR, AT);
   774     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //bound
   776     __ gslble(FSR, FSR, AT);
   777   } else {
   778     index_check(SSR, FSR); 
   779     __ dadd(FSR, SSR, FSR);
   780     __ lb(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));
   781   }
   782 }
   784 void TemplateTable::caload() {
   785   transition(itos, itos);
   786   // __ pop(SSR);
   787   index_check(SSR, FSR);
   788   __ dsll(FSR, FSR, Address::times_2);
   789   __ dadd(FSR, SSR, FSR);
   790   __ lhu(FSR, FSR,  arrayOopDesc::base_offset_in_bytes(T_CHAR));
   791 }
   793 // iload followed by caload frequent pair
   794 // used register : T2
   795 // T2 : index
   796 void TemplateTable::fast_icaload() {
   797   transition(vtos, itos);
   798   // load index out of locals
   799   locals_index(T2);
   800   __ lw(FSR, T2, 0);
   801   //	__ pop(SSR);
   802   index_check(SSR, FSR);
   803   __ dsll(FSR, FSR, 1);
   804   __ dadd(FSR, SSR, FSR);
   805   __ lhu(FSR, FSR,  arrayOopDesc::base_offset_in_bytes(T_CHAR));
   806 }
   808 void TemplateTable::saload() {
   809   transition(itos, itos);
   810   // __ pop(SSR);
   811   if(UseBoundCheckInstruction) {
   812     __ pop(SSR); //SSR:array    FSR: index
   813     __ dsll(FSR, FSR, Address::times_2);
   814     __ dadd(FSR, SSR, FSR);
   815     __ addi(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT));
   817     __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
   818     __ dsll(AT, AT, Address::times_2);
   819     __ dadd(AT, SSR, AT);
   820     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_SHORT));
   822     __ gslhle(FSR, FSR, AT); 
   823   } else {
   824     index_check(SSR, FSR);  
   825     __ dsll(FSR, FSR, Address::times_2);
   826     __ dadd(FSR, SSR, FSR);
   827     __ lh(FSR, FSR,  arrayOopDesc::base_offset_in_bytes(T_SHORT));
   828   }
   829 }
   831 void TemplateTable::iload(int n) {
   832 	transition(vtos, itos);
   833 	__ lw(FSR, iaddress(n));
   834 }
   836 void TemplateTable::lload(int n) {
   837 	transition(vtos, ltos);
   838 	__ ld(FSR, laddress(n));
   839 }
   841 void TemplateTable::fload(int n) {
   842   transition(vtos, ftos);
   843   //__ mtc1(R0, FSF);
   844   __ lwc1(FSF, faddress(n));
   845 }
   846 //FIXME here
   847 void TemplateTable::dload(int n) {
   848 	transition(vtos, dtos);
   849 	__ ldc1(FSF, laddress(n));
   850 }
   852 void TemplateTable::aload(int n) {
   853   transition(vtos, atos);
   854   __ ld(FSR, aaddress(n));
   855 }
   857 // used register : T2, T3
   858 // T2 : bytecode
   859 // T3 : folded code
   860 void TemplateTable::aload_0() {
   861 	transition(vtos, atos);
   862 	// According to bytecode histograms, the pairs:
   863 	//
   864 	// _aload_0, _fast_igetfield
   865 	// _aload_0, _fast_agetfield
   866 	// _aload_0, _fast_fgetfield
   867 	//
   868 	// occur frequently. If RewriteFrequentPairs is set, the (slow) _aload_0
   869 	// bytecode checks if the next bytecode is either _fast_igetfield, 
   870 	// _fast_agetfield or _fast_fgetfield and then rewrites the
   871 	// current bytecode into a pair bytecode; otherwise it rewrites the current
   872 	// bytecode into _fast_aload_0 that doesn't do the pair check anymore.
   873 	//
   874 	// Note: If the next bytecode is _getfield, the rewrite must be delayed,
   875 	//       otherwise we may miss an opportunity for a pair.
   876 	//
   877 	// Also rewrite frequent pairs
   878 	//   aload_0, aload_1
   879 	//   aload_0, iload_1
   880 	// These bytecodes with a small amount of code are most profitable to rewrite
   881 	if (RewriteFrequentPairs) {
   882 		Label rewrite, done;
   883 		// get the next bytecode in T2
   884 		__ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
   886 		// do actual aload_0
   887 		aload(0);
   889 		// if _getfield then wait with rewrite
   890 		__ move(AT, Bytecodes::_getfield);
   891 		__ beq(AT, T2, done);
   892 		__ delayed()->nop();
   894 		// if _igetfield then reqrite to _fast_iaccess_0
   895 		assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == 
   896 				Bytecodes::_aload_0, "fix bytecode definition");
   897 		__ move(T3, Bytecodes::_fast_iaccess_0);
   898 		__ move(AT, Bytecodes::_fast_igetfield);
   899 		__ beq(AT, T2, rewrite);
   900 		__ delayed()->nop();
   902 		// if _agetfield then reqrite to _fast_aaccess_0
   903 		assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == 
   904 				Bytecodes::_aload_0, "fix bytecode definition");
   905 		__ move(T3, Bytecodes::_fast_aaccess_0);
   906 		__ move(AT, Bytecodes::_fast_agetfield);
   907 		__ beq(AT, T2, rewrite);
   908 		__ delayed()->nop();
   910 		// if _fgetfield then reqrite to _fast_faccess_0
   911 		assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == 
   912 				Bytecodes::_aload_0, "fix bytecode definition");
   913 		__ move(T3, Bytecodes::_fast_faccess_0);
   914 		__ move(AT, Bytecodes::_fast_fgetfield);
   915 		__ beq(AT, T2, rewrite);
   916 		__ delayed()->nop();
   918 		// else rewrite to _fast_aload0
   919 		assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == 
   920 				Bytecodes::_aload_0, "fix bytecode definition");
   921 		__ move(T3, Bytecodes::_fast_aload_0);
   923 		// rewrite
   924 		__ bind(rewrite);
   925 		patch_bytecode(Bytecodes::_aload_0, T3, T2, false);
   927 		__ bind(done);
   928 	} else {
   929 		aload(0);
   930 	}
   931 }
   933 void TemplateTable::istore() {
   934 	transition(itos, vtos);
   935 	locals_index(T2);
   936 	__ sw(FSR, T2, 0);
   937 }
   939 void TemplateTable::lstore() {
   940   transition(ltos, vtos);
   941   locals_index(T2);
   942   __ sd(FSR, T2, -wordSize);
   943 }
   945 void TemplateTable::fstore() {
   946 	transition(ftos, vtos);
   947 	locals_index(T2);
   948 	__ swc1(FSF, T2, 0);
   949 }
   951 void TemplateTable::dstore() {
   952   transition(dtos, vtos);
   953   locals_index(T2);
   954   __ sdc1(FSF, T2, -wordSize);
   955 }
   957 void TemplateTable::astore() {
   958   transition(vtos, vtos);
   959   //  __ pop(FSR);
   960   __ pop_ptr(FSR);
   961   locals_index(T2);
   962   __ sd(FSR, T2, 0);
   963 }
   965 void TemplateTable::wide_istore() {
   966 	transition(vtos, vtos);
   967 	//  __ pop(FSR);
   968 	__ pop_i(FSR);
   969 	locals_index_wide(T2);
   970 	__ sd(FSR, T2, 0);
   971 }
   973 void TemplateTable::wide_lstore() {
   974 	transition(vtos, vtos);
   975 	//__ pop2(FSR, SSR);
   976 	//__ pop_l(FSR, SSR); 
   977 	__ pop_l(FSR); //aoqi:FIXME Is this right?
   978 	locals_index_wide(T2);
   979 	__ sd(FSR, T2, -4);
   980 }
   982 void TemplateTable::wide_fstore() {
   983 	wide_istore();
   984 }
   986 void TemplateTable::wide_dstore() {
   987 	wide_lstore();
   988 }
   990 void TemplateTable::wide_astore() {
   991 	transition(vtos, vtos);
   992 	__ pop_ptr(FSR);
   993 	locals_index_wide(T2);
   994 	__ sd(FSR, T2, 0);
   995 }
   997 // used register : T2
   998 void TemplateTable::iastore() {
   999   transition(itos, vtos);
  1000   __ pop_i(SSR);   // T2: array  SSR: index
  1001   if(UseBoundCheckInstruction) {
  1002     __ pop_ptr(T2); 
  1003     __ dsll(SSR, SSR, Address::times_4);
  1004     __ dadd(SSR, T2, SSR);
  1005     __ addi(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_INT));  // base
  1007     __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());  
  1008     __ dsll(AT, AT, Address::times_4);
  1009     __ dadd(AT, T2, AT);
  1010     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT));  //bound
  1012     __ gsswle(FSR, SSR, AT);
  1013   } else {
  1014     index_check(T2, SSR);  // prefer index in ebx
  1015     __ dsll(SSR, SSR, Address::times_4);
  1016     __ dadd(T2, T2, SSR);
  1017     __ sw(FSR, T2, arrayOopDesc::base_offset_in_bytes(T_INT));
  1023 // used register T2, T3
  1024 void TemplateTable::lastore() {
  1025   transition(ltos, vtos);
  1026   __ pop_i (T2);
  1027   if(UseBoundCheckInstruction) {
  1028     __ pop_ptr(T3); 
  1029     __ dsll(T2, T2, Address::times_8);
  1030     __ dadd(T2, T3, T2);
  1031     __ addi(T2, T2, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);  // base
  1033     __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes());  
  1034     __ dsll(AT, AT, Address::times_8);
  1035     __ dadd(AT, T3, AT);
  1036     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);  //bound
  1038     __ gssdle(FSR, T2, AT);
  1039   } else {
  1040     index_check(T3, T2);
  1041     __ dsll(T2, T2, Address::times_8);
  1042     __ dadd(T3, T3, T2);
  1043     __ sd(FSR, T3, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);
  1047 // used register T2
  1048 void TemplateTable::fastore() {
  1049   transition(ftos, vtos);
  1050   __ pop_i(SSR);	
  1051   if(UseBoundCheckInstruction) {
  1052     __ pop_ptr(T2); 
  1053     __ dsll(SSR, SSR, Address::times_4);
  1054     __ dadd(SSR, T2, SSR);
  1055     __ addi(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));  // base
  1057     __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());  
  1058     __ dsll(AT, AT, Address::times_4);
  1059     __ dadd(AT, T2, AT);
  1060     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT));  //bound
  1062     __ gsswlec1(FSF, SSR, AT);
  1063   } else {
  1064     index_check(T2, SSR); 
  1065     __ dsll(SSR, SSR, Address::times_4);
  1066     __ dadd(T2, T2, SSR);
  1067     __ swc1(FSF, T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
  1071 // used register T2, T3
  1072 void TemplateTable::dastore() {
  1073   transition(dtos, vtos);
  1074   __ pop_i (T2); 
  1075   if(UseBoundCheckInstruction) {
  1076     __ pop_ptr(T3); 
  1077     __ dsll(T2, T2, Address::times_8);
  1078     __ dadd(T2, T3, T2);
  1079     __ addi(T2, T2, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);  // base
  1081     __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes());  
  1082     __ dsll(AT, AT, Address::times_8);
  1083     __ dadd(AT, T3, AT);
  1084     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);  //bound
  1086     __ gssdlec1(FSF, T2, AT);
  1087   } else {
  1088     index_check(T3, T2);  
  1089     __ dsll(T2, T2, Address::times_8);
  1090     __ daddu(T3, T3, T2);
  1091     __ sdc1(FSF, T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
  1095 // used register : T2, T3, T8
  1096 // T2 : array
  1097 // T3 : subklass
  1098 // T8 : supklass
  1099 void TemplateTable::aastore() {
  1100   Label is_null, ok_is_subtype, done;
  1101   transition(vtos, vtos);
  1102   // stack: ..., array, index, value
  1103   __ ld(FSR, at_tos());     // Value
  1104   __ lw(SSR, at_tos_p1());  // Index
  1105   __ ld(T2, at_tos_p2());  // Array
  1107   // index_check(T2, SSR);
  1108   index_check_without_pop(T2, SSR);
  1109   // do array store check - check for NULL value first
  1110   __ beq(FSR, R0, is_null);
  1111   __ delayed()->nop();
  1113   // Move subklass into T3
  1114   //__ ld(T3,  Address(FSR, oopDesc::klass_offset_in_bytes()));
  1115   //add for compressedoops
  1116   __ load_klass(T3, FSR);
  1117   // Move superklass into T8
  1118   //__ ld(T8, Address(T2, oopDesc::klass_offset_in_bytes()));
  1119   //add for compressedoops
  1120   __ load_klass(T8, T2);
  1121   __ ld(T8, Address(T8,  ObjArrayKlass::element_klass_offset()));
  1122   // Compress array+index*4+12 into a single register. T2
  1123   __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8);
  1124   __ dadd(T2, T2, AT);
  1125   __ daddi(T2, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
  1127   // Generate subtype check.
  1128   // Superklass in T8.  Subklass in T3.
  1129   __ gen_subtype_check(T8, T3, ok_is_subtype);				// <-- Jin
  1130   // Come here on failure
  1131   // object is at FSR
  1132   __ jmp(Interpreter::_throw_ArrayStoreException_entry);    // <-- Jin
  1133   __ delayed()->nop();
  1134   // Come here on success
  1135   __ bind(ok_is_subtype);
  1136   //replace with do_oop_store->store_heap_oop
  1137   //__ sd(FSR, T2, 0);
  1138   __ store_heap_oop(Address(T2, 0), FSR);					// <-- Jin
  1139   __ store_check(T2);
  1140   __ b(done);
  1141   __ delayed()->nop();
  1143   // Have a NULL in FSR, EDX=T2, SSR=index.  Store NULL at ary[idx]
  1144   __ bind(is_null);
  1145   __ profile_null_seen(T9);
  1146   __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8);
  1147   __ dadd(T2, T2, AT);
  1148   //__ sd(FSR, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
  1149   __ store_heap_oop(Address(T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), FSR);	/* FSR is null here */
  1151   __ bind(done);
  1152   __ daddi(SP, SP, 3 * Interpreter::stackElementSize);
  1155 void TemplateTable::bastore() {
  1156   transition(itos, vtos);
  1157   __ pop_i (SSR); 
  1158   if(UseBoundCheckInstruction) {
  1159     __ pop_ptr(T2); 
  1160     __ dadd(SSR, T2, SSR);
  1161     __ addi(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));  // base
  1163     __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());  
  1164     __ dadd(AT, T2, AT);
  1165     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE));  //bound
  1167     __ gssble(FSR, SSR, AT);
  1168   } else {
  1169     index_check(T2, SSR);
  1170     __ dadd(SSR, T2, SSR);
  1171     __ sb(FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));
  1175 void TemplateTable::castore() {
  1176   transition(itos, vtos);
  1177   __ pop_i(SSR); 
  1178   if(UseBoundCheckInstruction) {
  1179     __ pop_ptr(T2); 
  1180     __ dsll(SSR, SSR, Address::times_2);
  1181     __ dadd(SSR, T2, SSR);
  1182     __ addi(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR));  // base
  1184     __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());  
  1185     __ dsll(AT, AT, Address::times_2);
  1186     __ dadd(AT, T2, AT);
  1187     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_CHAR));  //bound
  1189     __ gsshle(FSR, SSR, AT);
  1190   } else {
  1191     index_check(T2, SSR); 
  1192     __ dsll(SSR, SSR, Address::times_2);
  1193     __ dadd(SSR, T2, SSR);
  1194     __ sh(FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR));
  1198 void TemplateTable::sastore() {
  1199   castore();
  1202 void TemplateTable::istore(int n) {
  1203   transition(itos, vtos);
  1204   __ sw(FSR, iaddress(n));
  1207 void TemplateTable::lstore(int n) {
  1208   transition(ltos, vtos);
  1209   __ sd(FSR, laddress(n));
  1212 void TemplateTable::fstore(int n) {
  1213   transition(ftos, vtos);
  1214   __ swc1(FSF, faddress(n));
  1217 void TemplateTable::dstore(int n) {
  1218   transition(dtos, vtos);
  1219   __ sdc1(FSF, laddress(n));
  1222 void TemplateTable::astore(int n) {
  1223   transition(vtos, vtos);
  1224   __ pop_ptr(FSR);
  1225   __ sd(FSR, aaddress(n));
  1228 void TemplateTable::pop() {
  1229   transition(vtos, vtos);
  1230   __ daddi(SP, SP, Interpreter::stackElementSize);
  1233 void TemplateTable::pop2() {
  1234   transition(vtos, vtos);
  1235   __ daddi(SP, SP, 2 * Interpreter::stackElementSize);
  1238 void TemplateTable::dup() {
  1239   transition(vtos, vtos);
  1240   // stack: ..., a
  1241   __ load_ptr(0, FSR);
  1242   __ push_ptr(FSR);
  1243   // stack: ..., a, a
  1246 // blows FSR
  1247 void TemplateTable::dup_x1() {
  1248 	transition(vtos, vtos);
  1249 	// stack: ..., a, b
  1250 	__ load_ptr(0, FSR);  // load b
  1251 	__ load_ptr(1, A5);  // load a
  1252 	__ store_ptr(1, FSR); // store b
  1253 	__ store_ptr(0, A5); // store a
  1254 	__ push_ptr(FSR);             // push b
  1255 	// stack: ..., b, a, b
  1258 // blows FSR
  1259 void TemplateTable::dup_x2() {
  1260 	transition(vtos, vtos);
  1261 	// stack: ..., a, b, c
  1262 	__ load_ptr(0, FSR);  // load c
  1263 	__ load_ptr(2, A5);  // load a
  1264 	__ store_ptr(2, FSR); // store c in a
  1265 	__ push_ptr(FSR);             // push c
  1266 	// stack: ..., c, b, c, c
  1267 	__ load_ptr(2, FSR);  // load b
  1268 	__ store_ptr(2, A5); // store a in b
  1269 	// stack: ..., c, a, c, c
  1270 	__ store_ptr(1, FSR); // store b in c
  1271 	// stack: ..., c, a, b, c
  1274 // blows FSR
  1275 void TemplateTable::dup2() {
  1276 	transition(vtos, vtos);
  1277 	// stack: ..., a, b
  1278 	__ load_ptr(1, FSR);  // load a
  1279 	__ push_ptr(FSR);             // push a
  1280 	__ load_ptr(1, FSR);  // load b
  1281 	__ push_ptr(FSR);             // push b
  1282 	// stack: ..., a, b, a, b
  1285 // blows FSR
  1286 void TemplateTable::dup2_x1() {
  1287 	transition(vtos, vtos);
  1288 	// stack: ..., a, b, c
  1289 	__ load_ptr(0, T2);  // load c
  1290 	__ load_ptr(1, FSR);  // load b
  1291 	__ push_ptr(FSR);             // push b
  1292 	__ push_ptr(T2);             // push c
  1293 	// stack: ..., a, b, c, b, c
  1294 	__ store_ptr(3, T2); // store c in b
  1295 	// stack: ..., a, c, c, b, c
  1296 	__ load_ptr(4, T2);  // load a
  1297 	__ store_ptr(2, T2); // store a in 2nd c
  1298 	// stack: ..., a, c, a, b, c
  1299 	__ store_ptr(4, FSR); // store b in a
  1300 	// stack: ..., b, c, a, b, c
  1302 	// stack: ..., b, c, a, b, c
  1305 // blows FSR, SSR
  1306 void TemplateTable::dup2_x2() {
  1307 	transition(vtos, vtos);
  1308 	// stack: ..., a, b, c, d
  1309 	// stack: ..., a, b, c, d
  1310 	__ load_ptr(0, T2);  // load d
  1311 	__ load_ptr(1, FSR);  // load c
  1312 	__ push_ptr(FSR);             // push c
  1313 	__ push_ptr(T2);             // push d
  1314 	// stack: ..., a, b, c, d, c, d
  1315 	__ load_ptr(4, FSR);  // load b
  1316 	__ store_ptr(2, FSR); // store b in d
  1317 	__ store_ptr(4, T2); // store d in b
  1318 	// stack: ..., a, d, c, b, c, d
  1319 	__ load_ptr(5, T2);  // load a
  1320 	__ load_ptr(3, FSR);  // load c
  1321 	__ store_ptr(3, T2); // store a in c
  1322 	__ store_ptr(5, FSR); // store c in a
  1323 	// stack: ..., c, d, a, b, c, d
  1325 	// stack: ..., c, d, a, b, c, d
  1328 // blows FSR
  1329 void TemplateTable::swap() {
  1330 	transition(vtos, vtos);
  1331 	// stack: ..., a, b
  1333 	__ load_ptr(1, A5);  // load a
  1334 	__ load_ptr(0, FSR);  // load b
  1335 	__ store_ptr(0, A5); // store a in b
  1336 	__ store_ptr(1, FSR); // store b in a
  1338 	// stack: ..., b, a
  1341 void TemplateTable::iop2(Operation op) {
  1342 	transition(itos, itos);
  1343 	switch (op) {
  1344 		case add  :                    
  1345 			__ pop_i(SSR); 
  1346 			__ addu32(FSR, SSR, FSR); 
  1347 			break;
  1348 		case sub  :  
  1349 			__ pop_i(SSR); 
  1350 			__ subu32(FSR, SSR, FSR); 
  1351 			break;
  1352 		case mul  :                    
  1353 			__ lw(SSR, SP, 0);
  1354 			__ daddi(SP, SP, wordSize);
  1355                         __ mul(FSR, SSR, FSR);
  1356 			break;
  1357 		case _and :                    
  1358 			__ pop_i(SSR); 
  1359 			__ andr(FSR, SSR, FSR); 
  1360 			break;
  1361 		case _or  :                    
  1362 			__ pop_i(SSR); 
  1363 			__ orr(FSR, SSR, FSR); 
  1364 			break;
  1365 		case _xor :                    
  1366 			__ pop_i(SSR); 
  1367 			__ xorr(FSR, SSR, FSR); 
  1368 			break;
  1369 		case shl  : 
  1370 			__ pop_i(SSR); 
  1371 			__ sllv(FSR, SSR, FSR);      
  1372 			break; // implicit masking of lower 5 bits by Intel shift instr. mips also
  1373 		case shr  : 
  1374 			__ pop_i(SSR); 
  1375 			__ srav(FSR, SSR, FSR);      
  1376 			break; // implicit masking of lower 5 bits by Intel shift instr. mips also
  1377 		case ushr : 
  1378 			__ pop_i(SSR); 
  1379 			__ srlv(FSR, SSR, FSR);     
  1380 			break; // implicit masking of lower 5 bits by Intel shift instr. mips also
  1381 		default   : ShouldNotReachHere();
  1385 // the result stored in FSR, SSR,
  1386 // used registers : T2, T3
  1387 //FIXME, aoqi
  1388 void TemplateTable::lop2(Operation op) {
  1389   transition(ltos, ltos);
  1390   //__ pop2(T2, T3);
  1391   __ pop_l(T2, T3);
  1392 #ifdef ASSERT
  1394     Label  L;
  1395     __ beq(T3, R0, L);
  1396     __ delayed()->nop();
  1397     // FIXME: stack verification required
  1398 //    __ stop("lop2, wrong stack");  // <--- Fu 20130930
  1399     __ bind(L);
  1401 #endif
  1402   switch (op) {
  1403     case add : 
  1404       __ daddu(FSR, T2, FSR);
  1405       //__ sltu(AT, FSR, T2);
  1406       //__ daddu(SSR, T3, SSR);
  1407       //__ daddu(SSR, SSR, AT); 
  1408       break;
  1409     case sub :
  1410       __ dsubu(FSR, T2, FSR);
  1411       //__ sltu(AT, T2, FSR);
  1412       //__ dsubu(SSR, T3, SSR);
  1413       //__ dsubu(SSR, SSR, AT);
  1414       break;
  1415     case _and: 
  1416       __ andr(FSR, T2, FSR); 
  1417       //__ andr(SSR, T3, SSR); 
  1418       break;
  1419     case _or : 
  1420       __ orr(FSR, T2, FSR); 
  1421       //__ orr(SSR, T3, SSR); 
  1422       break;
  1423     case _xor: 
  1424       __ xorr(FSR, T2, FSR); 
  1425       //__ xorr(SSR, T3, SSR); 
  1426       break;
  1427     default : ShouldNotReachHere();
  1431 // java require this bytecode could handle 0x80000000/-1, dont cause a overflow exception, 
  1432 // the result is 0x80000000
  1433 // the godson2 cpu do the same, so we need not handle this specially like x86
  1434 void TemplateTable::idiv() {
  1435 	transition(itos, itos);
  1436 	Label not_zero;
  1438 	__ bne(FSR, R0, not_zero);
  1439 	__ delayed()->nop();
  1440 	__ jmp(Interpreter::_throw_ArithmeticException_entry); 
  1441 	__ delayed()->nop();
  1442 	__ bind(not_zero);
  1444 	__ pop_i(SSR);
  1445         if (UseLoongsonISA) {
  1446           __ gsdiv(FSR, SSR, FSR);
  1447         } else {
  1448 	  __ div(SSR, FSR);
  1449 	  __ mflo(FSR);
  1453 void TemplateTable::irem() {
  1454 	transition(itos, itos);
  1455 	Label not_zero;
  1456 	//__ pop(SSR);
  1457 	__ pop_i(SSR);
  1458 	__ div(SSR, FSR);
  1460 	__ bne(FSR, R0, not_zero);
  1461 	__ delayed()->nop();
  1462 	//__ brk(7);
  1463 	__ jmp(Interpreter::_throw_ArithmeticException_entry);
  1464 	__ delayed()->nop();
  1466 	__ bind(not_zero);
  1467 	__ mfhi(FSR);
  1470 void TemplateTable::lmul() {
  1471   transition(ltos, ltos);
  1472   __ pop_l(T2);
  1473   if(UseLoongsonISA){
  1474     __ gsdmult(FSR, T2, FSR);
  1475   } else {
  1476       __ dmult(T2, FSR);
  1477       __ mflo(FSR);
  1481 // NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
  1482 void TemplateTable::ldiv() {
  1483   transition(ltos, ltos);
  1484   Label normal;
  1486   __ bne(FSR, R0, normal);
  1487   __ delayed()->nop();
  1489   //__ brk(7);		//generate FPE
  1490   __ jmp(Interpreter::_throw_ArithmeticException_entry);
  1491   __ delayed()->nop();
  1493   __ bind(normal);
  1494   __ pop_l(A2, A3);
  1495   if (UseLoongsonISA) {
  1496     __ gsddiv(FSR, A2, FSR);	
  1497   } else {
  1498     __ ddiv(A2, FSR);
  1499     __ mflo(FSR);
  1503 // NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
  1504 void TemplateTable::lrem() {
  1505   transition(ltos, ltos);
  1506   Label normal;
  1508   __ bne(FSR, R0, normal);
  1509   __ delayed()->nop();
  1511   __ jmp(Interpreter::_throw_ArithmeticException_entry);
  1512   __ delayed()->nop();
  1514   __ bind(normal);
  1515   __ pop_l (A2, A3); 
  1517   if(UseLoongsonISA){
  1518     __ gsdmod(FSR, A2, FSR);
  1519   } else { 
  1520     __ ddiv(A2, FSR);
  1521     __ mfhi(FSR);
  1525 // result in FSR
  1526 // used registers : T0
  1527 void TemplateTable::lshl() {
  1528   transition(itos, ltos);
  1529   __ pop_l(T0, T1);	
  1530 #ifdef ASSERT
  1532     Label  L;
  1533     __ beq(T1, R0, L);
  1534     __ delayed()->nop();
  1535     //__ stop("lshl, wrong stack");  // <-- Fu 20130930 
  1536     __ bind(L);
  1538 #endif
  1539   __ andi(FSR, FSR, 0x3f);	      // the bit to be shifted
  1540   __ dsllv(FSR, T0, FSR);
  1543 // used registers : T0
  1544 void TemplateTable::lshr() {
  1545   transition(itos, ltos);
  1546   __ pop_l(T0, T1);	
  1547 #ifdef ASSERT
  1549     Label  L;
  1550     __ beq(T1, R0, L);
  1551     __ delayed()->nop();
  1552     __ stop("lshr, wrong stack");
  1553     __ bind(L);
  1555 #endif
  1556   __ andi(FSR, FSR, 0x3f);				// the bit to be shifted
  1557   __ dsrav(FSR, T0, FSR);
  1560 // used registers : T0
  1561 void TemplateTable::lushr() {
  1562   transition(itos, ltos);
  1563   __ pop_l(T0, T1);	
  1564 #ifdef ASSERT
  1566     Label  L;
  1567     __ beq(T1, R0, L);
  1568     __ delayed()->nop();
  1569     __ stop("lushr, wrong stack");
  1570     __ bind(L);
  1572 #endif
  1573   __ andi(FSR, FSR, 0x3f);				// the bit to be shifted
  1574   __ dsrlv(FSR, T0, FSR);
  1577 // result in FSF
  1578 void TemplateTable::fop2(Operation op) {
  1579 	transition(ftos, ftos);
  1580 	__ pop_ftos_to_esp();  // pop ftos into esp
  1581 	switch (op) {
  1582 		case add:
  1583 			__ lwc1(FTF, at_sp());
  1584 			__ add_s(FSF, FTF, FSF);
  1585 			break;
  1586 		case sub: 
  1587 			__ lwc1(FTF, at_sp());
  1588 			__ sub_s(FSF, FTF, FSF);
  1589 			break;
  1590 		case mul: 
  1591 			__ lwc1(FTF, at_sp());
  1592 			__ mul_s(FSF, FTF, FSF);
  1593 			break;
  1594 		case div: 
  1595 			__ lwc1(FTF, at_sp());
  1596 			__ div_s(FSF, FTF, FSF);
  1597 			break;
  1598 		case rem: 
  1599 			__ mfc1(FSR, FSF);
  1600 			__ mtc1(FSR, F12);
  1601 			__ lwc1(FTF, at_sp());
  1602 			__ rem_s(FSF, FTF, F12, FSF);
  1603 			break;
  1604 		default : ShouldNotReachHere();
  1607 	__ daddi(SP, SP, 1 * wordSize);
  1610 // result in SSF||FSF
  1611 // i dont handle the strict flags
  1612 void TemplateTable::dop2(Operation op) {
  1613 	transition(dtos, dtos);
  1614 	__ pop_dtos_to_esp();  // pop dtos into esp
  1615 	switch (op) {
  1616 		case add: 
  1617 			__ ldc1(FTF, at_sp());
  1618 			__ add_d(FSF, FTF, FSF);
  1619 			break;
  1620 		case sub: 
  1621 			__ ldc1(FTF, at_sp());
  1622 			__ sub_d(FSF, FTF, FSF);
  1623 			break;
  1624 		case mul: 
  1625 			__ ldc1(FTF, at_sp());
  1626 			__ mul_d(FSF, FTF, FSF);
  1627 			break;
  1628 		case div:
  1629 			__ ldc1(FTF, at_sp());
  1630 			__ div_d(FSF, FTF, FSF);
  1631 			break;
  1632 		case rem:
  1633 			__ dmfc1(FSR, FSF);
  1634 			__ dmtc1(FSR, F12);
  1635 			__ ldc1(FTF, at_sp());
  1636 			__ rem_d(FSF, FTF, F12, FSF);
  1637 			break;
  1638 		default : ShouldNotReachHere();
  1641 	__ daddi(SP, SP, 2 * wordSize);
  1644 void TemplateTable::ineg() {
  1645 	transition(itos, itos);
  1646 	__ neg(FSR);
  1649 void TemplateTable::lneg() {
  1650 	transition(ltos, ltos);
  1651 	__ dsubu(FSR, R0, FSR);
  1653 /*
  1654 // Note: 'double' and 'long long' have 32-bits alignment on x86.
  1655 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  1656   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  1657   // of 128-bits operands for SSE instructions.
  1658   jlong *operand = (jlong*)(((intptr_t)adr)&((intptr_t)(~0xF)));
  1659   // Store the value to a 128-bits operand.
  1660   operand[0] = lo;
  1661   operand[1] = hi;
  1662   return operand;
  1665 // Buffer for 128-bits masks used by SSE instructions.
  1666 static jlong float_signflip_pool[2*2];
  1667 static jlong double_signflip_pool[2*2];
  1668 */
  1669 void TemplateTable::fneg() {
  1670 	transition(ftos, ftos);
  1671 	__ neg_s(FSF, FSF);
  1674 void TemplateTable::dneg() {
  1675 	transition(dtos, dtos);
  1676 	__ neg_d(FSF, FSF);
  1679 // used registers : T2
  1680 void TemplateTable::iinc() {
  1681 	transition(vtos, vtos);
  1682 	locals_index(T2);
  1683 	__ lw(FSR, T2, 0);
  1684 	__ lb(AT, at_bcp(2));           // get constant
  1685 	__ daddu(FSR, FSR, AT);
  1686 	__ sw(FSR, T2, 0);
  1689 // used register : T2
  1690 void TemplateTable::wide_iinc() {
  1691 	transition(vtos, vtos);
  1692 	locals_index_wide(T2);
  1693 	__ get_2_byte_integer_at_bcp(FSR, AT, 4);
  1694 	__ hswap(FSR);
  1695 	__ lw(AT, T2, 0);
  1696 	__ daddu(FSR, AT, FSR);
  1697 	__ sw(FSR, T2, 0);
  1700 void TemplateTable::convert() {
  1701   // Checking
  1702 #ifdef ASSERT
  1703   { TosState tos_in  = ilgl;
  1704     TosState tos_out = ilgl;
  1705     switch (bytecode()) {
  1706       case Bytecodes::_i2l: // fall through
  1707       case Bytecodes::_i2f: // fall through
  1708       case Bytecodes::_i2d: // fall through
  1709       case Bytecodes::_i2b: // fall through
  1710       case Bytecodes::_i2c: // fall through
  1711       case Bytecodes::_i2s: tos_in = itos; break;
  1712       case Bytecodes::_l2i: // fall through
  1713       case Bytecodes::_l2f: // fall through
  1714       case Bytecodes::_l2d: tos_in = ltos; break;
  1715       case Bytecodes::_f2i: // fall through
  1716       case Bytecodes::_f2l: // fall through
  1717       case Bytecodes::_f2d: tos_in = ftos; break;
  1718       case Bytecodes::_d2i: // fall through
  1719       case Bytecodes::_d2l: // fall through
  1720       case Bytecodes::_d2f: tos_in = dtos; break;
  1721       default             : ShouldNotReachHere();
  1723     switch (bytecode()) {
  1724       case Bytecodes::_l2i: // fall through
  1725       case Bytecodes::_f2i: // fall through
  1726       case Bytecodes::_d2i: // fall through
  1727       case Bytecodes::_i2b: // fall through
  1728       case Bytecodes::_i2c: // fall through
  1729       case Bytecodes::_i2s: tos_out = itos; break;
  1730       case Bytecodes::_i2l: // fall through
  1731       case Bytecodes::_f2l: // fall through
  1732       case Bytecodes::_d2l: tos_out = ltos; break;
  1733       case Bytecodes::_i2f: // fall through
  1734       case Bytecodes::_l2f: // fall through
  1735       case Bytecodes::_d2f: tos_out = ftos; break;
  1736       case Bytecodes::_i2d: // fall through
  1737       case Bytecodes::_l2d: // fall through
  1738       case Bytecodes::_f2d: tos_out = dtos; break;
  1739       default             : ShouldNotReachHere();
  1741     transition(tos_in, tos_out);
  1743 #endif // ASSERT
  1745   // Conversion
  1746   // (Note: use pushl(ecx)/popl(ecx) for 1/2-word stack-ptr manipulation)
  1747   switch (bytecode()) {
  1748     case Bytecodes::_i2l:
  1749       //__ extend_sign(SSR, FSR);
  1750       __ sll(FSR, FSR, 0);
  1751       break;
  1752     case Bytecodes::_i2f:
  1753       __ mtc1(FSR, FSF);
  1754       __ cvt_s_w(FSF, FSF);
  1755       break;
  1756     case Bytecodes::_i2d:
  1757       __ mtc1(FSR, FSF);
  1758       __ cvt_d_w(FSF, FSF);
  1759       break;
  1760     case Bytecodes::_i2b:
  1761       __ seb(FSR, FSR);
  1762       break;
  1763     case Bytecodes::_i2c:
  1764       __ andi(FSR, FSR, 0xFFFF);  // truncate upper 56 bits
  1765       break;
  1766     case Bytecodes::_i2s:
  1767       __ seh(FSR, FSR);
  1768       break;
  1769     case Bytecodes::_l2i:
  1770       __ sll(FSR, FSR, 0);
  1771       //__ dsll32(FSR, FSR, 0);
  1772       //__ dsra32(FSR, FSR, 0);
  1773       break;
  1774     case Bytecodes::_l2f:
  1775       __ dmtc1(FSR, FSF);
  1776       //__ mtc1(SSR, SSF);
  1777       __ cvt_s_l(FSF, FSF);
  1778       break;
  1779     case Bytecodes::_l2d:
  1780       __ dmtc1(FSR, FSF);
  1781       //__ mtc1(SSR, SSF);
  1782       __ cvt_d_l(FSF, FSF);
  1783       break;
  1784     case Bytecodes::_f2i:
  1786 	Label L;
  1787 	/*
  1788 	__ c_un_s(FSF, FSF);		//NaN?
  1789 	__ bc1t(L);
  1790 	__ delayed(); __ move(FSR, R0);
  1791 	*/
  1792 	__ trunc_w_s(F12, FSF);
  1793 	__ cfc1(AT, 31);
  1794 	__ li(T0, 0x10000);
  1795 	__ andr(AT, AT, T0);
  1796 	__ beq(AT, R0, L);
  1797 	__ delayed()->mfc1(FSR, F12);
  1799 	__ mov_s(F12, FSF);
  1800 	__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1);
  1801 	__ bind(L);
  1803       break;
  1804     case Bytecodes::_f2l:
  1806 	Label L;
  1807 	/*
  1808 	__ move(SSR, R0);
  1809 	__ c_un_s(FSF, FSF);		//NaN?
  1810 	__ bc1t(L);
  1811 	__ delayed();
  1812 	__ move(FSR, R0);
  1813 	*/
  1814 	__ trunc_l_s(F12, FSF);
  1815 	__ cfc1(AT, 31);
  1816 	__ li(T0, 0x10000);
  1817 	__ andr(AT, AT, T0);
  1818 	__ beq(AT, R0, L);
  1819 	__ delayed()->dmfc1(FSR, F12);
  1821 	__ mov_s(F12, FSF);
  1822 	__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1);
  1823 	__ bind(L);
  1825       break;
  1826     case Bytecodes::_f2d:
  1827       __ cvt_d_s(FSF, FSF);
  1828       break;
  1829     case Bytecodes::_d2i:
  1831 	Label L;
  1832 	/*
  1833 	__ c_un_d(FSF, FSF);		//NaN?
  1834 	__ bc1t(L);
  1835 	__ delayed(); __ move(FSR, R0);
  1836 	*/
  1837 	__ trunc_w_d(F12, FSF);
  1838 	__ cfc1(AT, 31);
  1839 	__ li(T0, 0x10000);
  1840 	__ andr(AT, AT, T0);
  1841 	__ beq(AT, R0, L);
  1842 	__ delayed()->mfc1(FSR, F12);
  1844 	__ mov_d(F12, FSF);
  1845 	__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 1);
  1846 	__ bind(L);
  1848       break;
  1849     case Bytecodes::_d2l:
  1851 	Label L;
  1852 	/*
  1853 	__ move(SSR, R0);
  1854 	__ c_un_d(FSF, FSF);		//NaN?
  1855 	__ bc1t(L);
  1856 	__ delayed(); __ move(FSR, R0);
  1857 	*/
  1858 	__ trunc_l_d(F12, FSF);
  1859 	__ cfc1(AT, 31);
  1860 	__ li(T0, 0x10000);
  1861 	__ andr(AT, AT, T0);
  1862 	__ beq(AT, R0, L);
  1863 	__ delayed()->dmfc1(FSR, F12);
  1865 	__ mov_d(F12, FSF);
  1866 	__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 1);
  1867 	__ bind(L);
  1869       break;
  1870     case Bytecodes::_d2f:
  1871       __ cvt_s_d(FSF, FSF);
  1872       break;
  1873     default             :
  1874       ShouldNotReachHere();
  1878 void TemplateTable::lcmp() {
  1879   transition(ltos, itos);
  1881   Label low, high, done;
  1882   __ pop(T0);
  1883   __ pop(R0);
  1884   __ slt(AT, T0, FSR);
  1885   __ bne(AT, R0, low);
  1886   __ delayed()->nop();
  1888   __ bne(T0, FSR, high);
  1889   __ delayed()->nop();
  1891   __ li(FSR, (long)0);
  1892   __ b(done);
  1893   __ delayed()->nop();
  1895   __ bind(low);
  1896   __ li(FSR, (long)-1);
  1897   __ b(done);
  1898   __ delayed()->nop();
  1900   __ bind(high);
  1901   __ li(FSR, (long)1);
  1902   __ b(done);
  1903   __ delayed()->nop();
  1905   __ bind(done);
  1908 void TemplateTable::float_cmp(bool is_float, int unordered_result) {
  1909 	Label less, done;
  1911 	__ move(FSR, R0);
  1913 	if (is_float) {
  1914 		__ pop_ftos_to_esp();
  1915 		__ lwc1(FTF, at_sp());
  1916 		__ c_eq_s(FTF, FSF);
  1917 		__ bc1t(done);
  1918 		__ delayed()->daddi(SP, SP, 1 * wordSize);
  1920 		if (unordered_result<0)
  1921 			__ c_ult_s(FTF, FSF);
  1922 		else
  1923 			__ c_olt_s(FTF, FSF);
  1924 	} else {
  1925 		__ pop_dtos_to_esp();
  1926 		__ ldc1(FTF, at_sp());
  1927 		__ c_eq_d(FTF, FSF);
  1928 		__ bc1t(done);
  1929 		__ delayed()->daddi(SP, SP, 2 * wordSize);
  1931 		if (unordered_result<0)
  1932 			__ c_ult_d(FTF, FSF);
  1933 		else
  1934 			__ c_olt_d(FTF, FSF);
  1936 	__ bc1t(less);
  1937 	__ delayed()->nop();
  1938 	__ move(FSR, 1);
  1939 	__ b(done);
  1940 	__ delayed()->nop();
  1941 	__ bind(less);
  1942 	__ move(FSR, -1);
  1943 	__ bind(done);
  1947 // used registers : T3, A7, Rnext
  1948 // FSR : return bci, this is defined by the vm specification
  1949 // T2 : MDO taken count
  1950 // T3 : method
  1951 // A7 : offset
  1952 // Rnext : next bytecode, this is required by dispatch_base
  1953 void TemplateTable::branch(bool is_jsr, bool is_wide) {
  1954   __ get_method(T3);
  1955   __ profile_taken_branch(A7, T2);		// only C2 meaningful 
  1957 #ifndef CORE
  1958   const ByteSize be_offset = MethodCounters::backedge_counter_offset() 
  1959     + InvocationCounter::counter_offset();
  1960   const ByteSize inv_offset = MethodCounters::invocation_counter_offset() 
  1961     + InvocationCounter::counter_offset();
  1962   const int method_offset = frame::interpreter_frame_method_offset * wordSize;
  1963 #endif // CORE
  1965   // Load up T4 with the branch displacement
  1966   if (!is_wide) {
  1967     __ get_2_byte_integer_at_bcp(A7, AT, 1);
  1968     __ hswap(A7);
  1969   } else {
  1970     __ get_4_byte_integer_at_bcp(A7, AT, 1);
  1971     __ swap(A7);
  1974   // Handle all the JSR stuff here, then exit.
  1975   // It's much shorter and cleaner than intermingling with the
  1976   // non-JSR normal-branch stuff occuring below.
  1977   if (is_jsr) {
  1978     // Pre-load the next target bytecode into Rnext
  1979     __ dadd(AT, BCP, A7);
  1980     __ lbu(Rnext, AT, 0);
  1982     // compute return address as bci in FSR
  1983     __ daddi(FSR, BCP, (is_wide?5:3) - in_bytes(ConstMethod::codes_offset()));
  1984     __ ld(AT, T3, in_bytes(Method::const_offset()));
  1985     __ dsub(FSR, FSR, AT);
  1986     // Adjust the bcp in BCP by the displacement in A7
  1987     __ dadd(BCP, BCP, A7);
  1988     // jsr returns atos that is not an oop
  1989     // __ dispatch_only_noverify(atos);
  1990     // Push return address
  1991     __ push_i(FSR);
  1992     // jsr returns vtos
  1993     __ dispatch_only_noverify(vtos);
  1995     return;
  1998   // Normal (non-jsr) branch handling
  2000   // Adjust the bcp in S0 by the displacement in T4
  2001   __ dadd(BCP, BCP, A7);
  2003 #ifdef CORE
  2004   // Pre-load the next target bytecode into EBX
  2005   __ lbu(Rnext, BCP, 0);
  2006   // continue with the bytecode @ target
  2007   __ dispatch_only(vtos);
  2008 #else
  2009   assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters");
  2010   Label backedge_counter_overflow;
  2011   Label profile_method;
  2012   Label dispatch;
  2013   if (UseLoopCounter) {
  2014     // increment backedge counter for backward branches
  2015     // eax: MDO
  2016     // ebx: MDO bumped taken-count
  2017     // T3: method
  2018     // T4: target offset
  2019     // BCP: target bcp
  2020     // LVP: locals pointer
  2021     __ bgtz(A7, dispatch);	// check if forward or backward branch
  2022     __ delayed()->nop();
  2024     // check if MethodCounters exists
  2025     Label has_counters;
  2026     __ ld(AT, T3, in_bytes(Method::method_counters_offset()));  // use AT as MDO, TEMP 
  2027     __ bne(AT, R0, has_counters);
  2028     __ nop();
  2029     //__ push(T3);
  2030     //__ push(A7);
  2031     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters),
  2032                T3);
  2033     //__ pop(A7);
  2034     //__ pop(T3);
  2035     __ ld(AT, T3, in_bytes(Method::method_counters_offset()));  // use AT as MDO, TEMP
  2036     __ beq(AT, R0, dispatch);
  2037     __ nop();
  2038     __ bind(has_counters);
  2040     // increment back edge counter 
  2041     __ ld(T1, T3, in_bytes(Method::method_counters_offset()));
  2042     __ lw(T0, T1, in_bytes(be_offset));
  2043     __ increment(T0, InvocationCounter::count_increment);
  2044     __ sw(T0, T1, in_bytes(be_offset));
  2046     // load invocation counter
  2047     __ lw(T1, T1, in_bytes(inv_offset));
  2048     // buffer bit added, mask no needed
  2049     // by yjl 10/24/2005
  2050     //__ move(AT, InvocationCounter::count_mask_value);
  2051     //__ andr(T1, T1, AT);
  2053     // dadd backedge counter & invocation counter
  2054     __ dadd(T1, T1, T0);
  2056     if (ProfileInterpreter) {
  2057       // Test to see if we should create a method data oop
  2058       //__ lui(AT, Assembler::split_high(int(&InvocationCounter::InterpreterProfileLimit)));
  2059       //__ lw(AT, AT, Assembler::split_low(int(&InvocationCounter::InterpreterProfileLimit)));
  2060       // T1 : backedge counter & invocation counter
  2061       __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit);
  2062       __ lw(AT, AT, 0);
  2063       __ slt(AT, T1, AT);
  2064       __ bne(AT, R0, dispatch);
  2065       __ delayed()->nop();
  2067       // if no method data exists, go to profile method
  2068       __ test_method_data_pointer(T1, profile_method);
  2070       if (UseOnStackReplacement) {
  2071 	// check for overflow against ebx which is the MDO taken count
  2072 	//__ lui(AT, Assembler::split_high(int(&InvocationCounter::InterpreterBackwardBranchLimit)));
  2073 	//__ lw(AT, AT, Assembler::split_low(int(&InvocationCounter::InterpreterBackwardBranchLimit)));
  2074 	__ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
  2075 	__ lw(AT, AT, 0);
  2076 	// the value Rnext Is get from the beginning profile_taken_branch
  2077 	__ slt(AT, T2, AT);
  2078 	__ bne(AT, R0, dispatch);
  2079 	__ delayed()->nop();
  2081 	// When ProfileInterpreter is on, the backedge_count comes 
  2082 	// from the methodDataOop, which value does not get reset on 
  2083 	// the call to  frequency_counter_overflow().  
  2084 	// To avoid excessive calls to the overflow routine while 
  2085 	// the method is being compiled, dadd a second test to make 
  2086 	// sure the overflow function is called only once every 
  2087 	// overflow_frequency.
  2088 	const int overflow_frequency = 1024;
  2089 	__ andi(AT, T2, overflow_frequency-1);
  2090 	__ beq(AT, R0, backedge_counter_overflow);
  2091 	__ delayed()->nop();
  2093     } else {
  2094       if (UseOnStackReplacement) {
  2095 	// check for overflow against eax, which is the sum of the counters
  2096 	//__ lui(AT, Assembler::split_high(int(&InvocationCounter::InterpreterBackwardBranchLimit)));
  2097 	//__ lw(AT, AT, Assembler::split_low(int(&InvocationCounter::InterpreterBackwardBranchLimit)));
  2098 	__ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
  2099 	__ lw(AT, AT, 0);
  2100 	__ slt(AT, T1, AT);
  2101 	__ beq(AT, R0, backedge_counter_overflow);
  2102 	__ delayed()->nop();
  2105     __ bind(dispatch);
  2108   // Pre-load the next target bytecode into Rnext
  2109   __ lbu(Rnext, BCP, 0);
  2111   // continue with the bytecode @ target
  2112   // FSR: return bci for jsr's, unused otherwise
  2113   // Rnext: target bytecode
  2114   // BCP: target bcp
  2115   __ dispatch_only(vtos);
  2117   if (UseLoopCounter) {
  2118     if (ProfileInterpreter) {
  2119       // Out-of-line code to allocate method data oop.
  2120       __ bind(profile_method);
  2121       __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
  2122       __ lbu(Rnext, BCP, 0);
  2124       __ set_method_data_pointer_for_bcp();
  2125 /*
  2126       __ ld(T3, FP, method_offset);
  2127       __ lw(T3, T3, in_bytes(Method::method_data_offset()));
  2128       __ sw(T3, FP, frame::interpreter_frame_mdx_offset * wordSize);
  2129       __ test_method_data_pointer(T3, dispatch);
  2130       // offset non-null mdp by MDO::data_offset() + IR::profile_method()
  2131       __ daddi(T3, T3, in_bytes(MethodData::data_offset()));
  2132       __ dadd(T3, T3, T1);
  2133       __ sw(T3, FP, frame::interpreter_frame_mdx_offset * wordSize);
  2134 */
  2135       __ b(dispatch);
  2136       __ delayed()->nop();
  2139     if (UseOnStackReplacement) {
  2140       // invocation counter overflow
  2141       __ bind(backedge_counter_overflow);
  2142       __ sub(A7, BCP, A7);	// branch bcp
  2143       call_VM(NOREG, CAST_FROM_FN_PTR(address, 
  2144 	    InterpreterRuntime::frequency_counter_overflow), A7);
  2145       __ lbu(Rnext, BCP, 0);
  2147       // V0: osr nmethod (osr ok) or NULL (osr not possible)
  2148       // V1: osr adapter frame return address
  2149       // Rnext: target bytecode
  2150       // LVP: locals pointer
  2151       // BCP: bcp
  2152       __ beq(V0, R0, dispatch);
  2153       __ delayed()->nop();
  2154       // nmethod may have been invalidated (VM may block upon call_VM return)
  2155       __ lw(T3, V0, nmethod::entry_bci_offset());
  2156       __ move(AT, InvalidOSREntryBci);
  2157       __ beq(AT, T3, dispatch);
  2158       __ delayed()->nop();
  2159       // We need to prepare to execute the OSR method. First we must
  2160       // migrate the locals and monitors off of the stack.
  2161       //eax V0: osr nmethod (osr ok) or NULL (osr not possible)
  2162       //ebx V1: osr adapter frame return address
  2163       //edx  Rnext: target bytecode
  2164       //edi  LVP: locals pointer
  2165       //esi  BCP: bcp
  2166       __ move(BCP, V0); 
  2167       // const Register thread = ecx;
  2168       const Register thread = TREG;
  2169 #ifndef OPT_THREAD
  2170       __ get_thread(thread);
  2171 #endif
  2172       call_VM(noreg, CAST_FROM_FN_PTR(address, 
  2173 	    SharedRuntime::OSR_migration_begin));
  2174       // eax is OSR buffer, move it to expected parameter location
  2175       //refer to osrBufferPointer in c1_LIRAssembler_mips.cpp	
  2176       __ move(T0, V0);
  2178       // pop the interpreter frame
  2179       //  __ movl(edx, Address(ebp, frame::interpreter_frame_sender_sp_offset 
  2180       //  * wordSize)); // get sender sp
  2181       __ ld(A7, Address(FP, 
  2182 	    frame::interpreter_frame_sender_sp_offset * wordSize)); 
  2183       //FIXME, shall we keep the return address on the stack?	
  2184       __ leave();                                // remove frame anchor
  2185       // __ popl(edi);                         // get return address
  2186       //__ daddi(SP, SP, wordSize);               // get return address
  2187       //   __ pop(LVP);	
  2188       __ move(LVP, RA);	
  2189       // __ movl(esp, edx);                         // set sp to sender sp
  2190       __ move(SP, A7);
  2192       Label skip;
  2193       Label chkint;
  2195       // The interpreter frame we have removed may be returning to
  2196       // either the callstub or the interpreter. Since we will
  2197       // now be returning from a compiled (OSR) nmethod we must
  2198       // adjust the return to the return were it can handler compiled
  2199       // results and clean the fpu stack. This is very similar to
  2200       // what a i2c adapter must do.
  2202       // Are we returning to the call stub?
  2203 #if 0	
  2204       // __ cmpl(edi, (int)StubRoutines::_call_stub_return_address);
  2205       __ daddi(AT, LVP, -(int)StubRoutines::_call_stub_return_address); 
  2206       //  __ jcc(Assembler::notEqual, chkint);
  2207       __ bne(AT, R0, chkint);
  2208       __ delayed()->nop();      
  2209       // yes adjust to the specialized call stub  return.
  2210       // assert(StubRoutines::i486::get_call_stub_compiled_return() != NULL,
  2211       // "must be set");
  2212       assert(StubRoutines::gs2::get_call_stub_compiled_return() != NULL, 
  2213 	  "must be set");
  2214       // __ movl(edi, (intptr_t) StubRoutines::i486::get_call_stub_compiled_return());
  2215       __ move(LVP, (intptr_t) StubRoutines::gs2::get_call_stub_compiled_return()); 
  2216       //  __ jmp(skip);
  2217       __ b(skip);
  2218       __ delayed()->nop();
  2219       __ bind(chkint);
  2221       // Are we returning to the interpreter? Look for sentinel
  2223       //__ cmpl(Address(edi, -8), Interpreter::return_sentinel);
  2224       __ lw(AT, LVP , -8); 
  2225       __ daddi(AT, AT, -Interpreter::return_sentinel); 
  2226       //__ jcc(Assembler::notEqual, skip);
  2227       __ bne(AT, R0, skip);
  2228       __ delayed()->nop(); 
  2229       // Adjust to compiled return back to interpreter
  2231       // __ movl(edi, Address(edi, -4));
  2232       __ lw(LVP, LVP, -4); 
  2234       __ bind(skip);
  2235 #endif
  2236       // Align stack pointer for compiled code (note that caller is
  2237       // responsible for undoing this fixup by remembering the old SP
  2238       // in an ebp-relative location)
  2239       //  __ andl(esp, -(StackAlignmentInBytes));
  2240       __ move(AT, -(StackAlignmentInBytes));	
  2241       __ andr(SP , SP , AT);
  2242       // push the (possibly adjusted) return address
  2243       //  __ pushl(edi);
  2244       //__ push(LVP);
  2245       //			__ move(RA, LVP);	
  2246       // and begin the OSR nmethod
  2247       //  __ jmp(Address(esi, nmethod::osr_entry_point_offset()));
  2248       //refer to osr_entry in c1_LIRAssembler_mips.cpp	
  2249       __ ld(AT, BCP, nmethod::osr_entry_point_offset()); 
  2250       __ jr(AT); 
  2251       __ delayed()->nop(); 
  2254 #endif // not CORE
  2257 void TemplateTable::if_0cmp(Condition cc) {
  2258   transition(itos, vtos);
  2259   // assume branch is more often taken than not (loops use backward branches)
  2260   Label not_taken;
  2261   switch(cc) {
  2262     case not_equal:
  2263       __ beq(FSR, R0, not_taken);
  2264       break;
  2265     case equal:
  2266       __ bne(FSR, R0, not_taken);
  2267       break;
  2268     case less:
  2269       __ bgez(FSR, not_taken);
  2270       break;
  2271     case less_equal:
  2272       __ bgtz(FSR, not_taken);
  2273       break;
  2274     case greater:
  2275       __ blez(FSR, not_taken);
  2276       break;
  2277     case greater_equal:
  2278       __ bltz(FSR, not_taken);
  2279       break;
  2281   __ delayed()->nop();
  2283   branch(false, false);
  2285   __ bind(not_taken);
  2286   __ profile_not_taken_branch(FSR);
  2290 void TemplateTable::if_icmp(Condition cc) {
  2291   transition(itos, vtos);
  2292   // assume branch is more often taken than not (loops use backward branches)
  2293   Label not_taken;
  2295   __ pop_i(SSR);	
  2296   switch(cc) {
  2297     case not_equal:
  2298       __ beq(SSR, FSR, not_taken);
  2299       break;
  2300     case equal:
  2301       __ bne(SSR, FSR, not_taken);
  2302       break;
  2303     case less:
  2304       __ slt(AT, SSR, FSR);
  2305       __ beq(AT, R0, not_taken);
  2306       break;
  2307     case less_equal:
  2308       __ slt(AT, FSR, SSR);
  2309       __ bne(AT, R0, not_taken);
  2310       break;
  2311     case greater:
  2312       __ slt(AT, FSR, SSR);
  2313       __ beq(AT, R0, not_taken);
  2314       break;
  2315     case greater_equal:
  2316       __ slt(AT, SSR, FSR);
  2317       __ bne(AT, R0, not_taken);
  2318       break;
  2320   __ delayed()->nop();
  2322   branch(false, false);
  2324   __ bind(not_taken);
  2325   __ profile_not_taken_branch(FSR);
  2329 void TemplateTable::if_nullcmp(Condition cc) {
  2330   transition(atos, vtos);
  2331   // assume branch is more often taken than not (loops use backward branches)
  2332   Label not_taken;
  2333   switch(cc) {
  2334     case not_equal:
  2335       __ beq(FSR, R0, not_taken);
  2336       break;
  2337     case equal:
  2338       __ bne(FSR, R0, not_taken);
  2339       break;
  2340     default:
  2341       ShouldNotReachHere();
  2343   __ delayed()->nop();
  2345   branch(false, false);
  2347   __ bind(not_taken);
  2348   __ profile_not_taken_branch(FSR);
  2352 void TemplateTable::if_acmp(Condition cc) {
  2353 	transition(atos, vtos);
  2354 	// assume branch is more often taken than not (loops use backward branches)
  2355 	Label not_taken;
  2356 	//	__ lw(SSR, SP, 0);
  2357 	__ pop_ptr(SSR);
  2358 	switch(cc) {
  2359 		case not_equal:
  2360 			__ beq(SSR, FSR, not_taken);
  2361 			break;
  2362 		case equal:
  2363 			__ bne(SSR, FSR, not_taken);
  2364 			break;
  2365 		default:
  2366 			ShouldNotReachHere();
  2368 	//	__ delayed()->daddi(SP, SP, 4);
  2369 	__ delayed()->nop();
  2371 	branch(false, false);
  2373 	__ bind(not_taken);
  2374 	__ profile_not_taken_branch(FSR);
  2377 // used registers : T1, T2, T3
  2378 // T1 : method
  2379 // T2 : returb bci
  2380 void TemplateTable::ret() {
  2381 	transition(vtos, vtos);
  2383 	locals_index(T2);
  2384 	__ ld(T2, T2, 0);
  2385 	__ profile_ret(T2, T3);
  2387 	__ get_method(T1);
  2388 	__ ld(BCP, T1, in_bytes(Method::const_offset()));
  2389 	__ dadd(BCP, BCP, T2);
  2390 	__ daddi(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
  2392 	__ dispatch_next(vtos);
  2395 // used registers : T1, T2, T3
  2396 // T1 : method
  2397 // T2 : returb bci
  2398 void TemplateTable::wide_ret() {
  2399 	transition(vtos, vtos);
  2401 	locals_index_wide(T2);
  2402 	__ ld(T2, T2, 0);                   // get return bci, compute return bcp
  2403 	__ profile_ret(T2, T3);
  2405 	__ get_method(T1);
  2406 	__ ld(BCP, T1, in_bytes(Method::const_offset()));
  2407 	__ dadd(BCP, BCP, T2);
  2408 	__ daddi(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
  2410 	__ dispatch_next(vtos);
  2413 // used register T2, T3, A7, Rnext
  2414 // T2 : bytecode pointer
  2415 // T3 : low
  2416 // A7 : high
  2417 // Rnext : dest bytecode, required by dispatch_base
  2418 void TemplateTable::tableswitch() {
  2419 	Label default_case, continue_execution;
  2420 	transition(itos, vtos);
  2422 	// align BCP
  2423 	__ daddi(T2, BCP, BytesPerInt);
  2424 	__ li(AT, -BytesPerInt);
  2425 	__ andr(T2, T2, AT);
  2427 	// load lo & hi
  2428 	__ lw(T3, T2, 1 * BytesPerInt);
  2429 	__ swap(T3);
  2430 	__ lw(A7, T2, 2 * BytesPerInt);
  2431 	__ swap(A7);
  2433 	// check against lo & hi
  2434 	__ slt(AT, FSR, T3);
  2435 	__ bne(AT, R0, default_case);
  2436 	__ delayed()->nop();
  2438 	__ slt(AT, A7, FSR);
  2439 	__ bne(AT, R0, default_case);
  2440 	__ delayed()->nop();
  2442 	// lookup dispatch offset, in A7 big endian
  2443 	__ dsub(FSR, FSR, T3);
  2444 	__ dsll(AT, FSR, Address::times_4);
  2445 	__ dadd(AT, T2, AT);
  2446 	__ lw(A7, AT, 3 * BytesPerInt);
  2447 	__ profile_switch_case(FSR, T9, T3);
  2449 	__ bind(continue_execution);
  2450 	__ swap(A7);
  2451 	__ dadd(BCP, BCP, A7);
  2452 	__ lbu(Rnext, BCP, 0);
  2453 	__ dispatch_only(vtos);
  2455 	// handle default
  2456 	__ bind(default_case);
  2457 	__ profile_switch_default(FSR);
  2458 	__ lw(A7, T2, 0);
  2459 	__ b(continue_execution);
  2460 	__ delayed()->nop();
  2463 void TemplateTable::lookupswitch() {
  2464 	transition(itos, itos);
  2465 	__ stop("lookupswitch bytecode should have been rewritten");
  2468 // used registers : T2, T3, A7, Rnext
  2469 // T2 : bytecode pointer
  2470 // T3 : pair index
  2471 // A7 : offset
  2472 // Rnext : dest bytecode
  2473 // the data after the opcode is the same as lookupswitch
  2474 // see Rewriter::rewrite_method for more information
  2475 void TemplateTable::fast_linearswitch() {
  2476   transition(itos, vtos);
  2477   Label loop_entry, loop, found, continue_execution;  
  2479   // swap eax so we can avoid swapping the table entries
  2480   __ swap(FSR);
  2482   // align BCP
  2483   __ daddi(T2, BCP, BytesPerInt);
  2484   __ li(AT, -BytesPerInt);
  2485   __ andr(T2, T2, AT);
  2487   // set counter
  2488   __ lw(T3, T2, BytesPerInt);
  2489   __ swap(T3);
  2490   __ b(loop_entry);
  2491   __ delayed()->nop();
  2493   // table search
  2494   __ bind(loop);
  2495   // get the entry value
  2496   __ dsll(AT, T3, Address::times_8);
  2497   __ dadd(AT, T2, AT);
  2498   __ lw(AT, AT, 2 * BytesPerInt);
  2500   // found?
  2501   __ beq(FSR, AT, found);
  2502   __ delayed()->nop();
  2504   __ bind(loop_entry);
  2505   __ bgtz(T3, loop);
  2506   __ delayed()->daddiu(T3, T3, -1);
  2508   // default case
  2509   __ profile_switch_default(FSR);
  2510   __ lw(A7, T2, 0);
  2511   __ b(continue_execution);
  2512   __ delayed()->nop();
  2514   // entry found -> get offset
  2515   __ bind(found);
  2516   __ dsll(AT, T3, Address::times_8);
  2517   __ dadd(AT, T2, AT);
  2518   __ lw(A7, AT, 3 * BytesPerInt);
  2519   __ profile_switch_case(T3, FSR, T2);
  2521   // continue execution
  2522   __ bind(continue_execution);  
  2523   __ swap(A7);
  2524   __ dadd(BCP, BCP, A7);
  2525   __ lbu(Rnext, BCP, 0);
  2526   __ dispatch_only(vtos);
  2529 // used registers : T0, T1, T2, T3, A7, Rnext
  2530 // T2 : pairs address(array)
  2531 // Rnext : dest bytecode
  2532 // the data after the opcode is the same as lookupswitch
  2533 // see Rewriter::rewrite_method for more information
  2534 void TemplateTable::fast_binaryswitch() {
  2535   transition(itos, vtos);
  2536   // Implementation using the following core algorithm:
  2537   //
  2538   // int binary_search(int key, LookupswitchPair* array, int n) {
  2539   //   // Binary search according to "Methodik des Programmierens" by
  2540   //   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
  2541   //   int i = 0;
  2542   //   int j = n;
  2543   //   while (i+1 < j) {
  2544   //     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
  2545   //     // with      Q: for all i: 0 <= i < n: key < a[i]
  2546   //     // where a stands for the array and assuming that the (inexisting)
  2547   //     // element a[n] is infinitely big.
  2548   //     int h = (i + j) >> 1;
  2549   //     // i < h < j
  2550   //     if (key < array[h].fast_match()) {
  2551   //       j = h;
  2552   //     } else {
  2553   //       i = h;
  2554   //     }
  2555   //   }
  2556   //   // R: a[i] <= key < a[i+1] or Q
  2557   //   // (i.e., if key is within array, i is the correct index)
  2558   //   return i;
  2559   // }
  2561   // register allocation
  2562   const Register array = T2;
  2563   const Register i = T3, j = A7;
  2564   const Register h = T1;
  2565   const Register temp = T0;
  2566   const Register key = FSR;
  2568   // setup array
  2569   __ daddi(array, BCP, 3*BytesPerInt);
  2570   __ li(AT, -BytesPerInt);
  2571   __ andr(array, array, AT);
  2573   // initialize i & j
  2574   __ move(i, R0);
  2575   __ lw(j, array, - 1 * BytesPerInt);
  2576   // Convert j into native byteordering  
  2577   __ swap(j);
  2579   // and start
  2580   Label entry;
  2581   __ b(entry);
  2582   __ delayed()->nop();
  2584   // binary search loop
  2586     Label loop;
  2587     __ bind(loop);
  2588     // int h = (i + j) >> 1;
  2589     __ dadd(h, i, j);
  2590     __ dsrl(h, h, 1);
  2591     // if (key < array[h].fast_match()) {
  2592     //   j = h;
  2593     // } else {
  2594     //   i = h;
  2595     // }
  2596     // Convert array[h].match to native byte-ordering before compare
  2597     __ dsll(AT, h, Address::times_8);
  2598     __ dadd(AT, array, AT);
  2599     __ lw(temp, AT, 0 * BytesPerInt);
  2600     __ swap(temp);
  2603       Label set_i, end_of_if;
  2604       __ slt(AT, key, temp);
  2605       __ beq(AT, R0, set_i);
  2606       __ delayed()->nop(); 
  2608       __ b(end_of_if);
  2609       __ delayed(); __ move(j, h);
  2611       __ bind(set_i);
  2612       __ move(i, h);
  2614       __ bind(end_of_if);
  2616     // while (i+1 < j)
  2617     __ bind(entry);
  2618     __ daddi(h, i, 1);
  2619     __ slt(AT, h, j);
  2620     __ bne(AT, R0, loop);
  2621     __ delayed()->nop();
  2624   // end of binary search, result index is i (must check again!)
  2625   Label default_case;
  2626   // Convert array[i].match to native byte-ordering before compare
  2627   __ dsll(AT, i, Address::times_8);
  2628   __ dadd(AT, array, AT);
  2629   __ lw(temp, AT, 0 * BytesPerInt);
  2630   __ swap(temp);
  2631   __ bne(key, temp, default_case);
  2632   __ delayed()->nop();
  2634   // entry found -> j = offset
  2635   __ dsll(AT, i, Address::times_8);
  2636   __ dadd(AT, array, AT);
  2637   __ lw(j, AT, 1 * BytesPerInt);
  2638   __ profile_switch_case(i, key, array);
  2639   __ swap(j);
  2641   __ dadd(BCP, BCP, j);
  2642   __ lbu(Rnext, BCP, 0);
  2643   __ dispatch_only(vtos);
  2645   // default case -> j = default offset
  2646   __ bind(default_case);
  2647   __ profile_switch_default(i);
  2648   __ lw(j, array, - 2 * BytesPerInt);
  2649   __ swap(j);
  2650   __ dadd(BCP, BCP, j);
  2651   __ lbu(Rnext, BCP, 0);
  2652   __ dispatch_only(vtos);
  2655 void TemplateTable::_return(TosState state) {
  2656   transition(state, state);
  2657   assert(_desc->calls_vm(), "inconsistent calls_vm information"); // call in remove_activation
  2658   if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
  2659     assert(state == vtos, "only valid state");
  2660     __ ld(T1, aaddress(0));
  2661     //__ ld(LVP, T1, oopDesc::klass_offset_in_bytes());
  2662     __ load_klass(LVP, T1);
  2663     __ lw(LVP, LVP, in_bytes(Klass::access_flags_offset()));
  2664     __ move(AT, JVM_ACC_HAS_FINALIZER); 
  2665     __ andr(AT, AT, LVP);//by_css
  2666     Label skip_register_finalizer;
  2667     __ beq(AT, R0, skip_register_finalizer);
  2668     __ delayed()->nop(); 
  2669     __ call_VM(noreg, CAST_FROM_FN_PTR(address, 
  2670 	  InterpreterRuntime::register_finalizer), T1);
  2671     __ bind(skip_register_finalizer);
  2673   __ remove_activation(state, T9);
  2674   __ sync();
  2676   __ jr(T9);
  2677   __ delayed()->nop();
  2680 // ----------------------------------------------------------------------------
  2681 // Volatile variables demand their effects be made known to all CPU's
  2682 // in order.  Store buffers on most chips allow reads & writes to
  2683 // reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode
  2684 // without some kind of memory barrier (i.e., it's not sufficient that
  2685 // the interpreter does not reorder volatile references, the hardware
  2686 // also must not reorder them).
  2687 //
  2688 // According to the new Java Memory Model (JMM):
  2689 // (1) All volatiles are serialized wrt to each other.  ALSO reads &
  2690 //     writes act as aquire & release, so:
  2691 // (2) A read cannot let unrelated NON-volatile memory refs that
  2692 //     happen after the read float up to before the read.  It's OK for
  2693 //     non-volatile memory refs that happen before the volatile read to
  2694 //     float down below it.
  2695 // (3) Similar a volatile write cannot let unrelated NON-volatile
  2696 //     memory refs that happen BEFORE the write float down to after the
  2697 //     write.  It's OK for non-volatile memory refs that happen after the
  2698 //     volatile write to float up before it.
  2699 //
  2700 // We only put in barriers around volatile refs (they are expensive),
  2701 // not _between_ memory refs (that would require us to track the
  2702 // flavor of the previous memory refs).  Requirements (2) and (3)
  2703 // require some barriers before volatile stores and after volatile
  2704 // loads.  These nearly cover requirement (1) but miss the
  2705 // volatile-store-volatile-load case.  This final case is placed after
  2706 // volatile-stores although it could just as well go before
  2707 // volatile-loads.
  2708 //void TemplateTable::volatile_barrier(Assembler::Membar_mask_bits
  2709 //                                     order_constraint) {
  2710 void TemplateTable::volatile_barrier( ) {
  2711   // Helper function to insert a is-volatile test and memory barrier
  2712   //if (os::is_MP()) { // Not needed on single CPU
  2713   //  __ membar(order_constraint);
  2714   //}
  2715 	if( !os::is_MP() ) return;	// Not needed on single CPU
  2716 	__ sync();
  2719 // we dont shift left 2 bits in get_cache_and_index_at_bcp
  2720 // for we always need shift the index we use it. the ConstantPoolCacheEntry 
  2721 // is 16-byte long, index is the index in 
  2722 // ConstantPoolCache, so cache + base_offset() + index * 16 is 
  2723 // the corresponding ConstantPoolCacheEntry
  2724 // used registers : T2
  2725 // NOTE : the returned index need also shift left 4 to get the address!
  2726 void TemplateTable::resolve_cache_and_index(int byte_no,
  2727                                             Register Rcache,
  2728 					    Register index,
  2729                                             size_t index_size) {
  2730   assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
  2731   const Register temp = A1;
  2732   assert_different_registers(Rcache, index);
  2733   const int shift_count = (1 + byte_no)*BitsPerByte;
  2734   Label resolved;
  2735   __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
  2736   // is resolved?
  2737   int i = (int)bytecode();
  2738   __ addi(temp, temp, -i);
  2739   __ beq(temp, R0, resolved);
  2740   __ delayed()->nop();
  2741   // resolve first time through
  2742   address entry;
  2743   switch (bytecode()) {
  2744     case Bytecodes::_getstatic      : // fall through
  2745     case Bytecodes::_putstatic      : // fall through
  2746     case Bytecodes::_getfield       : // fall through
  2747     case Bytecodes::_putfield       : 
  2748       entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put); 
  2749       break;
  2750     case Bytecodes::_invokevirtual  : // fall through
  2751     case Bytecodes::_invokespecial  : // fall through
  2752     case Bytecodes::_invokestatic   : // fall through
  2753     case Bytecodes::_invokeinterface: 
  2754       entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);  
  2755       break;
  2756     case Bytecodes::_invokehandle:
  2757       entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokehandle);
  2758       break;
  2759     case Bytecodes::_invokedynamic:
  2760       entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic);
  2761       break;
  2762     default                      		: 
  2763       fatal(err_msg("unexpected bytecode: %s", Bytecodes::name(bytecode())));
  2766   __ move(temp, i);
  2767   __ call_VM(NOREG, entry, temp);
  2769   // Update registers with resolved info
  2770   __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
  2771   __ bind(resolved);
  2774 // The Rcache and index registers must be set before call
  2775 void TemplateTable::load_field_cp_cache_entry(Register obj,
  2776                                               Register cache,
  2777                                               Register index,
  2778                                               Register off,
  2779                                               Register flags,
  2780                                               bool is_static = false) {
  2781   assert_different_registers(cache, index, flags, off);
  2782   ByteSize cp_base_offset = ConstantPoolCache::base_offset();
  2783   // Field offset
  2784   __ dsll(AT, index, Address::times_ptr);
  2785   __ dadd(AT, cache, AT);
  2786   __ ld(off, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset()));
  2787   // Flags    
  2788   __ ld(flags, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset()));
  2790   // klass     overwrite register
  2791   if (is_static) {
  2792     __ ld(obj, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset())); 
  2793     const int mirror_offset = in_bytes(Klass::java_mirror_offset());
  2794     __ ld(obj, Address(obj, mirror_offset));
  2796     __ verify_oop(obj);	
  2800 // get the method, itable_index and flags of the current invoke
  2801 void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
  2802                                                Register method,
  2803                                                Register itable_index,
  2804                                                Register flags,
  2805                                                bool is_invokevirtual,
  2806                                                bool is_invokevfinal, /*unused*/
  2807                                                bool is_invokedynamic) {
  2808   // setup registers
  2809   const Register cache = T3;
  2810   const Register index = T1;
  2811   assert_different_registers(method, flags);
  2812   assert_different_registers(method, cache, index);
  2813   assert_different_registers(itable_index, flags);
  2814   assert_different_registers(itable_index, cache, index);
  2815   assert(is_invokevirtual == (byte_no == f2_byte), "is invokevirtual flag redundant");
  2816   // determine constant pool cache field offsets
  2817   const int method_offset = in_bytes(
  2818       ConstantPoolCache::base_offset() +
  2819       ((byte_no == f2_byte)
  2820        ? ConstantPoolCacheEntry::f2_offset()
  2821        : ConstantPoolCacheEntry::f1_offset()
  2823       );
  2824   const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
  2825       ConstantPoolCacheEntry::flags_offset());
  2826   // access constant pool cache fields
  2827   const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
  2828       ConstantPoolCacheEntry::f2_offset());
  2829   size_t index_size = (is_invokedynamic ? sizeof(u4): sizeof(u2));
  2830   resolve_cache_and_index(byte_no, cache, index, index_size);
  2832   //assert(wordSize == 8, "adjust code below");
  2833   // note we shift 4 not 2, for we get is the true inde 
  2834   // of ConstantPoolCacheEntry, not the shifted 2-bit index as x86 version
  2835   __ dsll(AT, index, Address::times_ptr);
  2836   __ dadd(AT, cache, AT);
  2837   __ ld(method, AT, method_offset);
  2840   if (itable_index != NOREG) {
  2841     __ ld(itable_index, AT, index_offset);
  2843   __ ld(flags, AT, flags_offset);
  2847 // The registers cache and index expected to be set before call.
  2848 // Correct values of the cache and index registers are preserved.
  2849 void TemplateTable::jvmti_post_field_access(Register cache, Register index,
  2850                                             bool is_static, bool has_tos) {
  2851   // do the JVMTI work here to avoid disturbing the register state below
  2852   // We use c_rarg registers here because we want to use the register used in
  2853   // the call to the VM
  2854 	if (JvmtiExport::can_post_field_access()) {
  2855 		// Check to see if a field access watch has been set before we take
  2856 		// the time to call into the VM.
  2857 		Label L1;
  2858 		assert_different_registers(cache, index, FSR);
  2859 		__ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr());
  2860 		__ lw(FSR, AT, 0);
  2861 		__ beq(FSR, R0, L1);
  2862 		__ delayed()->nop();
  2864 		// We rely on the bytecode being resolved and the cpCache entry filled in.
  2865 		// cache entry pointer
  2866 		//__ get_cache_and_index_at_bcp(c_rarg2, c_rarg3, 1);
  2867 		__ daddi(cache, cache, in_bytes(ConstantPoolCache::base_offset()));
  2868 		__ shl(index, 4);
  2869 		__ dadd(cache, cache, index);
  2870 		if (is_static) {
  2871 			__ move(FSR, R0);
  2872 		} else {
  2873 			__ lw(FSR, SP, 0);
  2874 			__ verify_oop(FSR);
  2876 		// FSR: object pointer or NULL
  2877 		// cache: cache entry pointer
  2878 		__ call_VM(NOREG, CAST_FROM_FN_PTR(address, 
  2879 					InterpreterRuntime::post_field_access), FSR, cache);
  2880 		__ get_cache_and_index_at_bcp(cache, index, 1);
  2881 		__ bind(L1);
  2885 void TemplateTable::pop_and_check_object(Register r) {
  2886   __ pop_ptr(r);
  2887   __ null_check(r);  // for field access must check obj.
  2888   __ verify_oop(r);
  2891 // used registers : T1, T2, T3, T1
  2892 // T1 : flags
  2893 // T2 : off
  2894 // T3 : obj
  2895 // T1 : field address
  2896 // The flags 31, 30, 29, 28 together build a 4 bit number 0 to 8 with the
  2897 // following mapping to the TosState states:
  2898 // btos: 0
  2899 // ctos: 1
  2900 // stos: 2
  2901 // itos: 3
  2902 // ltos: 4
  2903 // ftos: 5
  2904 // dtos: 6
  2905 // atos: 7
  2906 // vtos: 8
  2907 // see ConstantPoolCacheEntry::set_field for more info
  2908 void TemplateTable::getfield_or_static(int byte_no, bool is_static) {
  2909   transition(vtos, vtos);
  2911   const Register cache = T3;
  2912   const Register index = T0;
  2914   const Register obj   = T3;
  2915   const Register off   = T2;
  2916   const Register flags = T1;
  2917   resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
  2918   //jvmti_post_field_access(cache, index, is_static, false);
  2920   load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
  2922   if (!is_static) pop_and_check_object(obj);
  2923   __ dadd(index, obj, off);
  2926   Label Done, notByte, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
  2928   assert(btos == 0, "change code, btos != 0");
  2929   __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
  2930   __ andi(flags, flags, 0xf);
  2931   __ bne(flags, R0, notByte);
  2932   __ delayed()->nop();
  2934   // btos
  2935   __ lb(FSR, index, 0);	
  2936   __ sd(FSR, SP, - wordSize);
  2938   // Rewrite bytecode to be faster
  2939   if (!is_static) {
  2940     patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2);
  2942   __ b(Done);
  2943   __ delayed()->daddi(SP, SP, - wordSize);
  2945   __ bind(notByte);
  2946   __ move(AT, itos);
  2947   __ bne(flags, AT, notInt);
  2948   __ delayed()->nop();
  2950   // itos
  2951   __ lw(FSR, index, 0);
  2952   __ sd(FSR, SP, - wordSize);
  2954   // Rewrite bytecode to be faster
  2955   if (!is_static) {
  2956     // patch_bytecode(Bytecodes::_fast_igetfield, T3, T2);
  2957     patch_bytecode(Bytecodes::_fast_igetfield, T3, T2);
  2959   __ b(Done);
  2960   __ delayed()->daddi(SP, SP, - wordSize);
  2962   __ bind(notInt);
  2963   __ move(AT, atos);
  2964   __ bne(flags, AT, notObj);
  2965   __ delayed()->nop();
  2967   // atos
  2968   //add for compressedoops
  2969   __ load_heap_oop(FSR, Address(index, 0));
  2970   __ sd(FSR, SP, - wordSize);
  2972   if (!is_static) {
  2973     //patch_bytecode(Bytecodes::_fast_agetfield, T3, T2);
  2974     patch_bytecode(Bytecodes::_fast_agetfield, T3, T2);
  2976   __ b(Done);
  2977   __ delayed()->daddi(SP, SP, - wordSize);
  2979   __ bind(notObj);
  2980   __ move(AT, ctos);
  2981   __ bne(flags, AT, notChar);
  2982   __ delayed()->nop();
  2984   // ctos
  2985   __ lhu(FSR, index, 0);
  2986   __ sd(FSR, SP, - wordSize);
  2988   if (!is_static) {
  2989     patch_bytecode(Bytecodes::_fast_cgetfield, T3, T2);
  2991   __ b(Done);
  2992   __ delayed()->daddi(SP, SP, - wordSize);
  2994   __ bind(notChar);
  2995   __ move(AT, stos);
  2996   __ bne(flags, AT, notShort);
  2997   __ delayed()->nop();
  2999   // stos
  3000   __ lh(FSR, index, 0);
  3001   __ sd(FSR, SP, - wordSize);
  3003   if (!is_static) {
  3004     // patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2);
  3005     patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2);
  3007   __ b(Done);
  3008   __ delayed()->daddi(SP, SP, - wordSize);
  3010   __ bind(notShort);
  3011   __ move(AT, ltos);
  3012   __ bne(flags, AT, notLong);
  3013   __ delayed()->nop();
  3015   // FIXME : the load/store should be atomic, we have no simple method to do this in mips32
  3016   // ltos
  3017   __ ld(FSR, index, 0 * wordSize);
  3018   __ sd(FSR, SP, -2 * wordSize);
  3019   __ sd(R0, SP, -1 * wordSize);
  3021   // Don't rewrite to _fast_lgetfield for potential volatile case.
  3022   __ b(Done);
  3023   __ delayed()->daddi(SP, SP, - 2 * wordSize);
  3025   __ bind(notLong);
  3026   __ move(AT, ftos);
  3027   __ bne(flags, AT, notFloat);
  3028   __ delayed()->nop();
  3030   // ftos
  3031   __ lwc1(FSF, index, 0);
  3032   __ sdc1(FSF, SP, - wordSize);
  3034   if (!is_static) {
  3035     patch_bytecode(Bytecodes::_fast_fgetfield, T3, T2);
  3037   __ b(Done);
  3038   __ delayed()->daddi(SP, SP, - wordSize);
  3040   __ bind(notFloat);
  3041   __ move(AT, dtos);
  3042   __ bne(flags, AT, notDouble);
  3043   __ delayed()->nop();
  3045   // dtos
  3046   __ ldc1(FSF, index, 0 * wordSize);
  3047   __ sdc1(FSF, SP, - 2 * wordSize);
  3048   __ sd(R0, SP, - 1 * wordSize);
  3050   if (!is_static) {
  3051     patch_bytecode(Bytecodes::_fast_dgetfield, T3, T2);
  3053   __ b(Done);
  3054   __ delayed()->daddi(SP, SP, - 2 * wordSize);
  3056   __ bind(notDouble);
  3058   __ stop("Bad state");
  3060   __ bind(Done);
  3063 void TemplateTable::getfield(int byte_no) {
  3064   getfield_or_static(byte_no, false);
  3067 void TemplateTable::getstatic(int byte_no) {
  3068   getfield_or_static(byte_no, true);
  3070 /*
  3071 // used registers : T1, T2, T3, T1
  3072 // T1 : cache & cp entry
  3073 // T2 : obj
  3074 // T3 : flags & value pointer
  3075 // T1 : index
  3076 // see ConstantPoolCacheEntry::set_field for more info
  3077 void TemplateTable::jvmti_post_field_mod(int byte_no, bool is_static) {
  3078  */
  3080 // The registers cache and index expected to be set before call.
  3081 // The function may destroy various registers, just not the cache and index registers.
  3082 void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
  3083 	ByteSize cp_base_offset = ConstantPoolCache::base_offset();
  3085 	if (JvmtiExport::can_post_field_modification()) {
  3086 		// Check to see if a field modification watch has been set before we take
  3087 		// the time to call into the VM.
  3088 		Label L1;
  3089 		assert_different_registers(cache, index, AT);
  3091 		//__ lui(AT, Assembler::split_high((int)JvmtiExport::get_field_modification_count_addr()));
  3092 		//__ lw(FSR, AT, Assembler::split_low((int)JvmtiExport::get_field_modification_count_addr()));
  3093 		__ li(AT, JvmtiExport::get_field_modification_count_addr());
  3094 		__ lw(FSR, AT, 0);
  3095 		__ beq(FSR, R0, L1);
  3096 		__ delayed()->nop();
  3098 		/* // We rely on the bytecode being resolved and the cpCache entry filled in.
  3099 		   resolve_cache_and_index(byte_no, T1, T1);
  3100 		   */
  3101 		// The cache and index registers have been already set.
  3102 		// This allows to eliminate this call but the cache and index
  3103 		// registers have to be correspondingly used after this line.
  3104 		// __ get_cache_and_index_at_bcp(eax, edx, 1);
  3105 		__ get_cache_and_index_at_bcp(T1, T9, 1);
  3107 		if (is_static) {
  3108 			__ move(T2, R0);
  3109 		} else {
  3110 			// Life is harder. The stack holds the value on top, 
  3111 			// followed by the object.
  3112 			// We don't know the size of the value, though; 
  3113 			// it could be one or two words
  3114 			// depending on its type. As a result, we must find 
  3115 			// the type to determine where the object is.
  3116 			Label two_word, valsize_known;
  3117 			__ dsll(AT, T1, 4); 
  3118 			__ dadd(AT, T1, AT);
  3119 			__ lw(T3, AT, in_bytes(cp_base_offset 
  3120 						+ ConstantPoolCacheEntry::flags_offset()));
  3121 			__ move(T2, SP);
  3122 			__ shr(T3, ConstantPoolCacheEntry::tos_state_shift);
  3124 			// Make sure we don't need to mask ecx for tos_state_shift 
  3125 			// after the above shift
  3126 			ConstantPoolCacheEntry::verify_tos_state_shift();
  3127 			__ move(AT, ltos);
  3128 			__ beq(T3, AT, two_word);
  3129 			__ delayed()->nop();
  3130 			__ move(AT, dtos);
  3131 			__ beq(T3, AT, two_word);
  3132 			__ delayed()->nop();
  3133 			__ b(valsize_known);
  3134 			//__ delayed()->daddi(T2, T2, wordSize*1);
  3135 			__ delayed()->daddi(T2, T2,Interpreter::expr_offset_in_bytes(1) );
  3137 			__ bind(two_word);
  3138 			//	__ daddi(T2, T2, wordSize*2);
  3139 			__ daddi(T2, T2,Interpreter::expr_offset_in_bytes(2));
  3141 			__ bind(valsize_known);
  3142 			// setup object pointer
  3143 			__ lw(T2, T2, 0*wordSize);
  3145 		// cache entry pointer
  3146 		__ daddi(T1, T1, in_bytes(cp_base_offset));
  3147 		__ shl(T1, 4); 
  3148 		__ daddu(T1, T1, T1);
  3149 		// object (tos)
  3150 		__ move(T3, SP);
  3151 		// T2: object pointer set up above (NULL if static)
  3152 		// T1: cache entry pointer
  3153 		// T3: jvalue object on the stack
  3154 		__ call_VM(NOREG, CAST_FROM_FN_PTR(address, 
  3155 				InterpreterRuntime::post_field_modification), T2, T1, T3);
  3156 		__ get_cache_and_index_at_bcp(cache, index, 1);
  3157 		__ bind(L1);
  3161 // used registers : T0, T1, T2, T3, T8
  3162 // T1 : flags
  3163 // T2 : off
  3164 // T3 : obj
  3165 // T8 : volatile bit
  3166 // see ConstantPoolCacheEntry::set_field for more info
  3167 void TemplateTable::putfield_or_static(int byte_no, bool is_static) {
  3168   transition(vtos, vtos);
  3170   const Register cache = T3;
  3171   const Register index = T0;
  3172   const Register obj   = T3;
  3173   const Register off   = T2;
  3174   const Register flags = T1;
  3175   const Register bc    = T3;
  3177   resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
  3178   //TODO: LEE
  3179   //jvmti_post_field_mod(cache, index, is_static);
  3180   load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
  3181   // Doug Lea believes this is not needed with current Sparcs (TSO) and Intel (PSO).
  3182   // volatile_barrier( );
  3184   Label notVolatile, Done;
  3185   __ move(AT, 1<<ConstantPoolCacheEntry::is_volatile_shift);
  3186   __ andr(T8, flags, AT);
  3188   Label notByte, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
  3190   assert(btos == 0, "change code, btos != 0");
  3191   // btos
  3192   __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
  3193   __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask);
  3194   __ bne(flags, R0, notByte);
  3195   __ delayed()->nop();
  3197   __ pop(btos);
  3198   if (!is_static) {
  3199     pop_and_check_object(obj); 
  3201   __ dadd(AT, obj, off);
  3202   __ sb(FSR, AT, 0);
  3204   if (!is_static) {
  3205     patch_bytecode(Bytecodes::_fast_bputfield, bc, off, true, byte_no);
  3207   __ b(Done);
  3208   __ delayed()->nop();
  3210   __ bind(notByte);
  3211   // itos
  3212   __ move(AT, itos);
  3213   __ bne(flags, AT, notInt);
  3214   __ delayed()->nop();
  3216   __ pop(itos);
  3217   if (!is_static) {
  3218     pop_and_check_object(obj); 
  3220   __ dadd(AT, obj, off);
  3221   __ sw(FSR, AT, 0);
  3223   if (!is_static) {
  3224     patch_bytecode(Bytecodes::_fast_iputfield, bc, off, true, byte_no);
  3226   __ b(Done);
  3227   __ delayed()->nop();  
  3228   __ bind(notInt);
  3229   // atos
  3230   __ move(AT, atos);
  3231   __ bne(flags, AT, notObj);
  3232   __ delayed()->nop();
  3234   __ pop(atos);
  3235   if (!is_static) {
  3236     pop_and_check_object(obj); 
  3239   __ dadd(AT, obj, off);
  3240   //__ sd(FSR, AT, 0);
  3241   __ store_heap_oop(Address(AT, 0), FSR);
  3242   __ store_check(obj);
  3244   if (!is_static) {
  3245     patch_bytecode(Bytecodes::_fast_aputfield, bc, off, true, byte_no);
  3247   __ b(Done);
  3248   __ delayed()->nop();
  3249   __ bind(notObj);
  3250   // ctos
  3251   __ move(AT, ctos);
  3252   __ bne(flags, AT, notChar);
  3253   __ delayed()->nop();
  3255   __ pop(ctos);
  3256   if (!is_static) {
  3257     pop_and_check_object(obj); 
  3259   __ dadd(AT, obj, off);
  3260   __ sh(FSR, AT, 0);
  3261   if (!is_static) {
  3262     patch_bytecode(Bytecodes::_fast_cputfield, bc, off, true, byte_no);
  3264   __ b(Done);
  3265   __ delayed()->nop();
  3266   __ bind(notChar);
  3267   // stos
  3268   __ move(AT, stos);
  3269   __ bne(flags, AT, notShort);
  3270   __ delayed()->nop();
  3272   __ pop(stos);
  3273   if (!is_static) {
  3274     pop_and_check_object(obj); 
  3276   __ dadd(AT, obj, off);
  3277   __ sh(FSR, AT, 0);
  3278   if (!is_static) {
  3279     patch_bytecode(Bytecodes::_fast_sputfield, bc, off, true, byte_no);
  3281   __ b(Done);
  3282   __ delayed()->nop();
  3283   __ bind(notShort);
  3284   // ltos
  3285   __ move(AT, ltos);
  3286   __ bne(flags, AT, notLong);
  3287   __ delayed()->nop();
  3289   // FIXME: there is no simple method to load/store 64-bit data in a atomic operation
  3290   // we just ignore the volatile flag.
  3291   //Label notVolatileLong;
  3292   //__ beq(T1, R0, notVolatileLong);
  3293   //__ delayed()->nop();
  3295   //addent = 2 * wordSize;
  3296   // no need
  3297   //__ lw(FSR, SP, 0);
  3298   //__ lw(SSR, SP, 1 * wordSize);
  3299   //if (!is_static) {
  3300   //	__ lw(T3, SP, addent);
  3301   //	addent += 1 * wordSize;
  3302   //	__ verify_oop(T3);
  3303   //}
  3305   //__ daddu(AT, T3, T2);
  3307   // Replace with real volatile test
  3308   // NOTE : we assume that sdc1&ldc1 operate in 32-bit, this is true for Godson2 even in 64-bit kernel
  3309   // last modified by yjl 7/12/2005
  3310   //__ ldc1(FSF, SP, 0); 
  3311   //__ sdc1(FSF, AT, 0);
  3312   //volatile_barrier();
  3314   // Don't rewrite volatile version
  3315   //__ b(notVolatile);
  3316   //__ delayed()->addiu(SP, SP, addent);
  3318   //__ bind(notVolatileLong);
  3320   //__ pop(ltos);  // overwrites edx
  3321   //	__ lw(FSR, SP, 0 * wordSize);
  3322   //	__ lw(SSR, SP, 1 * wordSize);
  3323   //	__ daddi(SP, SP, 2*wordSize);
  3324   __ pop(ltos);
  3325   if (!is_static) {
  3326     pop_and_check_object(obj); 
  3328   __ dadd(AT, obj, off);
  3329   __ sd(FSR, AT, 0);
  3330   if (!is_static) {
  3331     patch_bytecode(Bytecodes::_fast_lputfield, bc, off, true, byte_no);
  3333   __ b(notVolatile);
  3334   __ delayed()->nop();
  3336   __ bind(notLong);
  3337   // ftos
  3338   __ move(AT, ftos);
  3339   __ bne(flags, AT, notFloat);
  3340   __ delayed()->nop();
  3342   __ pop(ftos);
  3343   if (!is_static) {
  3344     pop_and_check_object(obj); 
  3346   __ dadd(AT, obj, off);
  3347   __ swc1(FSF, AT, 0);
  3348   if (!is_static) {
  3349     patch_bytecode(Bytecodes::_fast_fputfield, bc, off, true, byte_no);
  3351   __ b(Done);
  3352   __ delayed()->nop();
  3353   __ bind(notFloat);
  3354   // dtos
  3355   __ move(AT, dtos);
  3356   __ bne(flags, AT, notDouble);
  3357   __ delayed()->nop();
  3359   __ pop(dtos);
  3360   if (!is_static) {
  3361     pop_and_check_object(obj); 
  3363   __ dadd(AT, obj, off);
  3364   __ sdc1(FSF, AT, 0);
  3365   if (!is_static) {
  3366     patch_bytecode(Bytecodes::_fast_dputfield, bc, off, true, byte_no);
  3368   __ b(Done);
  3369   __ delayed()->nop();
  3370   __ bind(notDouble);
  3372   __ stop("Bad state");
  3374   __ bind(Done);
  3376   // Check for volatile store
  3377   __ beq(T8, R0, notVolatile);
  3378   __ delayed()->nop();
  3379   volatile_barrier( );
  3380   __ bind(notVolatile);
  3383 void TemplateTable::putfield(int byte_no) {
  3384   putfield_or_static(byte_no, false);
  3387 void TemplateTable::putstatic(int byte_no) {
  3388   putfield_or_static(byte_no, true);
  3391 // used registers : T1, T2, T3
  3392 // T1 : cp_entry
  3393 // T2 : obj
  3394 // T3 : value pointer
  3395 void TemplateTable::jvmti_post_fast_field_mod() {
  3396 	if (JvmtiExport::can_post_field_modification()) {
  3397 		// Check to see if a field modification watch has been set before we take
  3398 		// the time to call into the VM.
  3399 		Label L2;
  3400 		//__ lui(AT, Assembler::split_high((intptr_t)JvmtiExport::get_field_modification_count_addr()));
  3401 		//__ lw(T3, AT, Assembler::split_low((intptr_t)JvmtiExport::get_field_modification_count_addr()));
  3402 		__ li(AT, JvmtiExport::get_field_modification_count_addr());
  3403 		__ lw(T3, AT, 0);
  3404 		__ beq(T3, R0, L2);
  3405 		__ delayed()->nop();
  3406 		//__ pop(T2);
  3407 		__ pop_ptr(T2);
  3408 		//__ lw(T2, SP, 0);
  3409 		__ verify_oop(T2);
  3410 		__ push_ptr(T2);	
  3411 		__ li(AT, -sizeof(jvalue));
  3412 		__ daddu(SP, SP, AT);
  3413 		__ move(T3, SP);
  3414 		//__ push(T2);
  3415 		//__ move(T2, R0);
  3417 		switch (bytecode()) {          // load values into the jvalue object
  3418 			case Bytecodes::_fast_bputfield: 
  3419 				__ sb(FSR, SP, 0); 
  3420 				break;
  3421 			case Bytecodes::_fast_sputfield: 
  3422 				__ sh(FSR, SP, 0);
  3423 				break;
  3424 			case Bytecodes::_fast_cputfield: 
  3425 				__ sh(FSR, SP, 0);
  3426 				break;
  3427 			case Bytecodes::_fast_iputfield: 
  3428 				__ sw(FSR, SP, 0);
  3429 				break;							 
  3430 			case Bytecodes::_fast_lputfield: 
  3431 				__ sd(FSR, SP, 0);
  3432 				break;
  3433 			case Bytecodes::_fast_fputfield: 
  3434 				__ swc1(FSF, SP, 0);
  3435 				break;
  3436 			case Bytecodes::_fast_dputfield: 
  3437 				__ sdc1(FSF, SP, 0);
  3438 				break;
  3439 			case Bytecodes::_fast_aputfield: 
  3440 				__ sd(FSR, SP, 0);
  3441 				break;
  3442 			default:  ShouldNotReachHere();
  3445 		//__ pop(T2);  // restore copy of object pointer
  3447 		// Save eax and sometimes edx because call_VM() will clobber them,
  3448 		// then use them for JVM/DI purposes
  3449 		__ push(FSR);
  3450 		if (bytecode() == Bytecodes::_fast_lputfield) __ push(SSR);
  3451 		// access constant pool cache entry
  3452 		__ get_cache_entry_pointer_at_bcp(T1, T2, 1);
  3453 		// no need, verified ahead
  3454 		__ verify_oop(T2);
  3456 		// ebx: object pointer copied above
  3457 		// eax: cache entry pointer
  3458 		// ecx: jvalue object on the stack
  3459 		__ call_VM(NOREG, CAST_FROM_FN_PTR(address, 
  3460 					InterpreterRuntime::post_field_modification), T2, T1, T3);
  3461 		if (bytecode() == Bytecodes::_fast_lputfield) __ pop(SSR);  // restore high value
  3462 		//__ pop(FSR);     // restore lower value   
  3463 		//__ daddi(SP, SP, sizeof(jvalue));  // release jvalue object space
  3464 		__ lw(FSR, SP, 0);
  3465 		__ daddiu(SP, SP, sizeof(jvalue) + 1 * wordSize);
  3466 		__ bind(L2);
  3470 // used registers : T2, T3, T1
  3471 // T2 : index & off & field address
  3472 // T3 : cache & obj
  3473 // T1 : flags
  3474 void TemplateTable::fast_storefield(TosState state) {
  3475   transition(state, vtos);
  3477   ByteSize base = ConstantPoolCache::base_offset();
  3479   jvmti_post_fast_field_mod();
  3481   // access constant pool cache
  3482   __ get_cache_and_index_at_bcp(T3, T2, 1);
  3484   // test for volatile with edx but edx is tos register for lputfield.
  3485   __ dsll(AT, T2, Address::times_8); 
  3486   __ dadd(AT, T3, AT);
  3487   __ ld(T1, AT, in_bytes(base + ConstantPoolCacheEntry::flags_offset()));
  3489   // replace index with field offset from cache entry
  3490   __ ld(T2, AT, in_bytes(base + ConstantPoolCacheEntry::f2_offset()));
  3492   // Doug Lea believes this is not needed with current Sparcs (TSO) and Intel (PSO).
  3493   // volatile_barrier( );
  3495   Label notVolatile, Done;
  3496   // Check for volatile store
  3497   __ move(AT, 1<<ConstantPoolCacheEntry::is_volatile_shift);
  3498   __ andr(AT, T1, AT);
  3499   __ beq(AT, R0, notVolatile);
  3500   __ delayed()->nop();
  3503   // Get object from stack
  3504   // NOTE : the value in FSR/FSF now
  3505   //	__ pop(T3);
  3506   //	__ verify_oop(T3);
  3507   pop_and_check_object(T3);
  3508   // field addresses
  3509   __ dadd(T2, T3, T2);
  3511   // access field
  3512   switch (bytecode()) {
  3513     case Bytecodes::_fast_bputfield: 
  3514       __ sb(FSR, T2, 0);
  3515       break;
  3516     case Bytecodes::_fast_sputfield: // fall through
  3517     case Bytecodes::_fast_cputfield: 
  3518       __ sh(FSR, T2, 0);
  3519       break;
  3520     case Bytecodes::_fast_iputfield: 
  3521       __ sw(FSR, T2, 0);
  3522       break;
  3523     case Bytecodes::_fast_lputfield: 
  3524       __ sd(FSR, T2, 0 * wordSize);
  3525       break;
  3526     case Bytecodes::_fast_fputfield: 
  3527       __ swc1(FSF, T2, 0);
  3528       break;
  3529     case Bytecodes::_fast_dputfield: 
  3530       __ sdc1(FSF, T2, 0 * wordSize);
  3531       break;
  3532     case Bytecodes::_fast_aputfield: 
  3533       __ store_heap_oop(Address(T2, 0), FSR);
  3534       __ store_check(T3);
  3535       break;
  3536     default:
  3537       ShouldNotReachHere();
  3540   Label done;
  3541   volatile_barrier( );
  3542   __ b(done);
  3543   __ delayed()->nop();
  3545   // Same code as above, but don't need edx to test for volatile.
  3546   __ bind(notVolatile);
  3548   // Get object from stack
  3549   //	__ pop(T3);
  3550   //	__ verify_oop(T3);
  3551   pop_and_check_object(T3);
  3552   //get the field address
  3553   __ dadd(T2, T3, T2);
  3555   // access field
  3556   switch (bytecode()) {
  3557     case Bytecodes::_fast_bputfield: 
  3558       __ sb(FSR, T2, 0); 
  3559       break;
  3560     case Bytecodes::_fast_sputfield: // fall through
  3561     case Bytecodes::_fast_cputfield: 
  3562       __ sh(FSR, T2, 0);
  3563       break;
  3564     case Bytecodes::_fast_iputfield: 
  3565       __ sw(FSR, T2, 0);
  3566       break;
  3567     case Bytecodes::_fast_lputfield: 
  3568       __ sd(FSR, T2, 0 * wordSize);
  3569       break;
  3570     case Bytecodes::_fast_fputfield: 
  3571       __ swc1(FSF, T2, 0);
  3572       break;
  3573     case Bytecodes::_fast_dputfield: 
  3574       __ sdc1(FSF, T2, 0 * wordSize);
  3575       break;
  3576     case Bytecodes::_fast_aputfield: 
  3577       //add for compressedoops
  3578       __ store_heap_oop(Address(T2, 0), FSR);
  3579       __ store_check(T3);
  3580       break;
  3581     default:
  3582       ShouldNotReachHere();
  3584   __ bind(done);
  3587 // used registers : T2, T3, T1
  3588 // T3 : cp_entry & cache
  3589 // T2 : index & offset
  3590 void TemplateTable::fast_accessfield(TosState state) {
  3591   transition(atos, state);
  3593   // do the JVMTI work here to avoid disturbing the register state below
  3594   if (JvmtiExport::can_post_field_access()) {
  3595     // Check to see if a field access watch has been set before we take
  3596     // the time to call into the VM.
  3597     Label L1;
  3598     __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr());
  3599     __ lw(T3, AT, 0);
  3600     __ beq(T3, R0, L1);
  3601     __ delayed()->nop();
  3602     // access constant pool cache entry
  3603     __ get_cache_entry_pointer_at_bcp(T3, T1, 1);
  3604     __ move(TSR, FSR);
  3605     __ verify_oop(FSR);
  3606     // FSR: object pointer copied above
  3607     // T3: cache entry pointer
  3608     __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access),
  3609 	FSR, T3);
  3610     __ move(FSR, TSR);
  3611     __ bind(L1);
  3614   // access constant pool cache
  3615   __ get_cache_and_index_at_bcp(T3, T2, 1);
  3616   // replace index with field offset from cache entry
  3617   __ dsll(AT, T2, Address::times_8);
  3618   //__ dsll(AT, T2, 4);
  3619   __ dadd(AT, T3, AT);
  3620   __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset() 
  3621 	+ ConstantPoolCacheEntry::f2_offset()));
  3623   // eax: object
  3624   __ verify_oop(FSR);
  3625   // __ null_check(FSR, 0);
  3626   __ null_check(FSR);
  3627   // field addresses
  3628   __ dadd(FSR, FSR, T2);
  3630   // access field
  3631   switch (bytecode()) {
  3632     case Bytecodes::_fast_bgetfield: 
  3633       __ lb(FSR, FSR, 0);
  3634       break;
  3635     case Bytecodes::_fast_sgetfield: 
  3636       __ lh(FSR, FSR, 0);
  3637       break;
  3638     case Bytecodes::_fast_cgetfield: 
  3639       __ lhu(FSR, FSR, 0);
  3640       break;
  3641     case Bytecodes::_fast_igetfield:
  3642       __ lw(FSR, FSR, 0);
  3643       break;
  3644     case Bytecodes::_fast_lgetfield: 
  3645       __ stop("should not be rewritten");  
  3646       break;
  3647     case Bytecodes::_fast_fgetfield: 
  3648       __ lwc1(FSF, FSR, 0);
  3649       break;
  3650     case Bytecodes::_fast_dgetfield: 
  3651       __ ldc1(FSF, FSR, 0);
  3652       break;
  3653     case Bytecodes::_fast_agetfield:
  3654       //add for compressedoops
  3655       __ load_heap_oop(FSR, Address(FSR, 0));
  3656       __ verify_oop(FSR);
  3657       break;
  3658     default:
  3659       ShouldNotReachHere();
  3662   // Doug Lea believes this is not needed with current Sparcs(TSO) and Intel(PSO)
  3663   // volatile_barrier( );
  3666 // generator for _fast_iaccess_0, _fast_aaccess_0, _fast_faccess_0
  3667 // used registers : T1, T2, T3, T1
  3668 // T1 : obj & field address
  3669 // T2 : off
  3670 // T3 : cache
  3671 // T1 : index
  3672 void TemplateTable::fast_xaccess(TosState state) {
  3673   transition(vtos, state);
  3674   // get receiver
  3675   __ ld(T1, aaddress(0));
  3676   // access constant pool cache
  3677   __ get_cache_and_index_at_bcp(T3, T2, 2);
  3678   __ dsll(AT, T2, Address::times_8);
  3679   __ dadd(AT, T3, AT);
  3680   __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset() 
  3681 	+ ConstantPoolCacheEntry::f2_offset()));
  3683   // make sure exception is reported in correct bcp range (getfield is next instruction)
  3684   __ daddi(BCP, BCP, 1);
  3685   //	__ null_check(T1, 0);
  3686   __ null_check(T1);
  3687   __ dadd(T1, T1, T2);
  3689   if (state == itos) {
  3690     __ lw(FSR, T1, 0);
  3691   } else if (state == atos) {
  3692     //__ ld(FSR, T1, 0);
  3693     __ load_heap_oop(FSR, Address(T1, 0));
  3694     __ verify_oop(FSR);
  3695   } else if (state == ftos) {
  3696     __ lwc1(FSF, T1, 0);
  3697   } else {
  3698     ShouldNotReachHere();
  3700   __ daddi(BCP, BCP, -1);
  3703 //---------------------------------------------------
  3704 //-------------------------------------------------
  3705 // Calls
  3707 void TemplateTable::count_calls(Register method, Register temp) {  
  3708 	// implemented elsewhere
  3709 	ShouldNotReachHere();
  3712 // method, index, recv, flags: T1, T2, T3, T1
  3713 // byte_no = 2 for _invokevirtual, 1 else
  3714 // T0 : return address
  3715 // get the method & index of the invoke, and push the return address of 
  3716 // the invoke(first word in the frame)
  3717 // this address is where the return code jmp to.
  3718 // NOTE : this method will set T3&T1 as recv&flags
  3719 void TemplateTable::prepare_invoke(int byte_no,
  3720                                    Register method, //linked method (or i-klass)
  3721                                    Register index, //itable index, MethodType ,etc.
  3722                                    Register recv, // if caller wants to see it
  3723                                    Register flags // if caller wants to test it
  3724 		                   ) {
  3725   // determine flags
  3726   const Bytecodes::Code code = bytecode();
  3727   const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
  3728   const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
  3729   const bool is_invokehandle     = code == Bytecodes::_invokehandle;
  3730   const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
  3731   const bool is_invokespecial    = code == Bytecodes::_invokespecial;
  3732   const bool load_receiver       = (recv  != noreg);
  3733   const bool save_flags          = (flags != noreg);
  3734   assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic),"");
  3735   assert(save_flags    == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
  3736   assert(flags == noreg || flags == T1, "error flags reg.");
  3737   assert(recv  == noreg || recv  == T3, "error recv reg.");
  3738   // setup registers & access constant pool cache
  3739   if(recv == noreg) recv  = T3;
  3740   if(flags == noreg) flags  = T1;
  3742   assert_different_registers(method, index, recv, flags);
  3744   // save 'interpreter return address'
  3745   __ save_bcp();
  3747   load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
  3748   if (is_invokedynamic || is_invokehandle) {
  3749    Label L_no_push;
  3750      __ move(AT, (1 << ConstantPoolCacheEntry::has_appendix_shift));
  3751      __ andr(AT, AT, flags);
  3752      __ beq(AT, R0, L_no_push);
  3753      __ delayed()->nop();
  3754      // Push the appendix as a trailing parameter.
  3755      // This must be done before we get the receiver,
  3756      // since the parameter_size includes it.
  3757      Register tmp = SSR;
  3758      __ push(tmp);
  3759      __ move(tmp, index);
  3760      assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0");
  3761      __ load_resolved_reference_at_index(index, tmp);
  3762      __ pop(tmp);
  3763      __ push(index);  // push appendix (MethodType, CallSite, etc.)
  3764      __ bind(L_no_push);
  3768 // load receiver if needed (after appendix is pushed so parameter size is correct)
  3769 // Note: no return address pushed yet
  3770   if (load_receiver) {
  3771 	 __ move(AT, ConstantPoolCacheEntry::parameter_size_mask);
  3772 	 __ andr(recv, flags, AT);
  3773          // 2014/07/31 Fu: Since we won't push RA on stack, no_return_pc_pushed_yet should be 0.
  3774 	 const int no_return_pc_pushed_yet = 0;  // argument slot correction before we push return address
  3775 	 const int receiver_is_at_end      = -1;  // back off one slot to get receiver
  3776 	 Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end);
  3778 	 __ ld(recv, recv_addr);
  3779 	 __ verify_oop(recv);	
  3781   if(save_flags) {
  3782     //__ movl(r13, flags);
  3783     __ move(BCP, flags);
  3785   // compute return type
  3786   __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
  3787   __ andi(flags, flags, 0xf);
  3789   // Make sure we don't need to mask flags for tos_state_shift after the above shift
  3790   ConstantPoolCacheEntry::verify_tos_state_shift();
  3791   // load return address
  3793     const address table = (address) Interpreter::invoke_return_entry_table_for(code);
  3794     __ li(AT, (long)table);
  3795     __ dsll(flags, flags, LogBytesPerWord);
  3796     __ dadd(AT, AT, flags);
  3797     __ ld(RA, AT, 0);
  3800   if (save_flags) {
  3801     __ move(flags, BCP);
  3802     __ restore_bcp();
  3806 // used registers : T0, T3, T1, T2
  3807 // T3 : recv, this two register using convention is by prepare_invoke
  3808 // T1 : flags, klass
  3809 // Rmethod : method, index must be Rmethod
  3810 void TemplateTable::invokevirtual_helper(Register index, Register recv,
  3811 		Register flags) {
  3813   assert_different_registers(index, recv, flags, T2);
  3815   // Test for an invoke of a final method
  3816   Label notFinal;
  3817   __ move(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
  3818   __ andr(AT, flags, AT);
  3819   __ beq(AT, R0, notFinal);
  3820   __ delayed()->nop();
  3822   Register method = index;  // method must be Rmethod
  3823   assert(method == Rmethod, "methodOop must be Rmethod for interpreter calling convention");
  3825   // do the call - the index is actually the method to call
  3826   // the index is indeed methodOop, for this is vfinal, 
  3827   // see ConstantPoolCacheEntry::set_method for more info
  3829   __ verify_oop(method);
  3831   // It's final, need a null check here!
  3832   __ null_check(recv);
  3834   // profile this call
  3835   __ profile_final_call(T2);
  3837   // 2014/11/24 Fu 
  3838   // T2: tmp, used for mdp
  3839   // method: callee
  3840   // T9: tmp
  3841   // is_virtual: true 
  3842   __ profile_arguments_type(T2, method, T9, true);
  3844 //  __ move(T0, recv);
  3845   __ jump_from_interpreted(method, T2);
  3847   __ bind(notFinal);
  3849   // get receiver klass
  3850   __ null_check(recv, oopDesc::klass_offset_in_bytes());
  3851   // Keep recv in ecx for callee expects it there
  3852   __ load_klass(T2, recv);
  3853   __ verify_oop(T2);
  3854   // profile this call
  3855   __ profile_virtual_call(T2, T0, T1);
  3857   // get target methodOop & entry point
  3858   const int base = InstanceKlass::vtable_start_offset() * wordSize;    
  3859   assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
  3860   __ dsll(AT, index, Address::times_ptr);
  3861   // T2: receiver
  3862   __ dadd(AT, T2, AT);
  3863   //this is a ualign read 
  3864   __ ld(method, AT, base + vtableEntry::method_offset_in_bytes());
  3865   __ profile_arguments_type(T2, method, T9, true);
  3866   __ jump_from_interpreted(method, T2);
  3870 void TemplateTable::invokevirtual(int byte_no) {
  3871   transition(vtos, vtos);
  3872   assert(byte_no == f2_byte, "use this argument");
  3873   prepare_invoke(byte_no, Rmethod, NOREG, T3, T1);
  3874   // now recv & flags in T3, T1
  3875   invokevirtual_helper(Rmethod, T3, T1);
  3878 // T9 : entry
  3879 // Rmethod : method
  3880 void TemplateTable::invokespecial(int byte_no) {
  3881   transition(vtos, vtos);
  3882   assert(byte_no == f1_byte, "use this argument");
  3883   prepare_invoke(byte_no, Rmethod, NOREG, T3);
  3884   // now recv & flags in T3, T1
  3885   __ verify_oop(T3);
  3886   __ null_check(T3);
  3887   __ profile_call(T9);
  3889   // 2014/11/24 Fu 
  3890   // T8: tmp, used for mdp
  3891   // Rmethod: callee
  3892   // T9: tmp
  3893   // is_virtual: false 
  3894   __ profile_arguments_type(T8, Rmethod, T9, false);
  3896   __ jump_from_interpreted(Rmethod, T9);
  3897   __ move(T0, T3);//aoqi ?
  3900 void TemplateTable::invokestatic(int byte_no) {
  3901   transition(vtos, vtos);
  3902   assert(byte_no == f1_byte, "use this argument");
  3903   prepare_invoke(byte_no, Rmethod, NOREG);
  3904   __ verify_oop(Rmethod);
  3906   __ profile_call(T9);
  3908   // 2014/11/24 Fu 
  3909   // T8: tmp, used for mdp
  3910   // Rmethod: callee
  3911   // T9: tmp
  3912   // is_virtual: false 
  3913   __ profile_arguments_type(T8, Rmethod, T9, false);
  3915   __ jump_from_interpreted(Rmethod, T9);
  3918 // i have no idea what to do here, now. for future change. FIXME. 
  3919 void TemplateTable::fast_invokevfinal(int byte_no) {
  3920 	transition(vtos, vtos);
  3921 	assert(byte_no == f2_byte, "use this argument");
  3922 	__ stop("fast_invokevfinal not used on x86");
  3925 // used registers : T0, T1, T2, T3, T1, A7
  3926 // T0 : itable, vtable, entry
  3927 // T1 : interface
  3928 // T3 : receiver
  3929 // T1 : flags, klass
  3930 // Rmethod : index, method, this is required by interpreter_entry
  3931 void TemplateTable::invokeinterface(int byte_no) {
  3932   transition(vtos, vtos);
  3933   //this method will use T1-T4 and T0
  3934   assert(byte_no == f1_byte, "use this argument");
  3935   prepare_invoke(byte_no, T2, Rmethod, T3, T1);
  3936   // T2: Interface
  3937   // Rmethod: index
  3938   // T3: receiver    
  3939   // T1: flags
  3940   Label notMethod;
  3941   __ move(AT, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift));
  3942   __ andr(AT, T1, AT);
  3943   __ beq(AT, R0, notMethod);
  3944   __ delayed()->nop();
  3946   // Special case of invokeinterface called for virtual method of
  3947   // java.lang.Object.  See cpCacheOop.cpp for details.
  3948   // This code isn't produced by javac, but could be produced by
  3949   // another compliant java compiler.
  3950   invokevirtual_helper(Rmethod, T3, T1);
  3952   __ bind(notMethod);
  3953   // Get receiver klass into T1 - also a null check
  3954   //__ ld(T1, T3, oopDesc::klass_offset_in_bytes());
  3955   //add for compressedoops
  3956   //__ restore_locals();
  3957   //__ null_check(T3, oopDesc::klass_offset_in_bytes());
  3958   __ load_klass(T1, T3);
  3959   __ verify_oop(T1);
  3961   // profile this call
  3962   __ profile_virtual_call(T1, T0, FSR);
  3964   // Compute start of first itableOffsetEntry (which is at the end of the vtable)
  3965   // TODO: x86 add a new method lookup_interface_method  // LEE
  3966   const int base = InstanceKlass::vtable_start_offset() * wordSize;    
  3967   assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below");
  3968   __ lw(AT, T1, InstanceKlass::vtable_length_offset() * wordSize);
  3969   __ dsll(AT, AT, Address::times_8);
  3970   __ dadd(T0, T1, AT);
  3971   __ daddi(T0, T0, base);
  3972   if (HeapWordsPerLong > 1) {
  3973     // Round up to align_object_offset boundary
  3974     __ round_to(T0, BytesPerLong);
  3976   // now T0 is the begin of the itable
  3978   Label entry, search, interface_ok;
  3980   ///__ jmp(entry);   
  3981   __ b(entry);
  3982   __ delayed()->nop();
  3984   __ bind(search);
  3985   __ increment(T0, itableOffsetEntry::size() * wordSize);
  3987   __ bind(entry);
  3989   // Check that the entry is non-null.  A null entry means that the receiver
  3990   // class doesn't implement the interface, and wasn't the same as the
  3991   // receiver class checked when the interface was resolved.
  3992   __ ld(AT, T0, itableOffsetEntry::interface_offset_in_bytes());
  3993   __ bne(AT, R0, interface_ok);
  3994   __ delayed()->nop();
  3995   // throw exception
  3996   // the call_VM checks for exception, so we should never return here.
  3998   //__ pop();//FIXME here,			
  3999   // pop return address (pushed by prepare_invoke). 
  4000   // no need now, we just save the value in RA now
  4002   __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeError));
  4003   __ should_not_reach_here();
  4005   __ bind(interface_ok);
  4006   //NOTICE here, no pop as x86 do	
  4007   //__ lw(AT, T0, itableOffsetEntry::interface_offset_in_bytes());
  4008   __ bne(AT, T2, search);
  4009   __ delayed()->nop();
  4011   // now we get vtable of the interface
  4012   __ ld(T0, T0, itableOffsetEntry::offset_offset_in_bytes());
  4013   __ daddu(T0, T1, T0);
  4014   assert(itableMethodEntry::size() * wordSize == 8, "adjust the scaling in the code below");
  4015   __ dsll(AT, Rmethod, Address::times_8);
  4016   __ daddu(AT, T0, AT);
  4017   // now we get the method
  4018   __ ld(Rmethod, AT, 0);
  4019   // Rnext: methodOop to call
  4020   // T3: receiver
  4021   // Check for abstract method error
  4022   // Note: This should be done more efficiently via a throw_abstract_method_error
  4023   //       interpreter entry point and a conditional jump to it in case of a null
  4024   //       method.
  4026     Label L;
  4027     ///__ testl(ebx, ebx);
  4028     ///__ jcc(Assembler::notZero, L);
  4029     __ bne(Rmethod, R0, L);
  4030     __ delayed()->nop();
  4032     // throw exception
  4033     // note: must restore interpreter registers to canonical
  4034     //       state for exception handling to work correctly!
  4035     ///__ popl(ebx);          // pop return address (pushed by prepare_invoke)
  4036     //__ restore_bcp();      // esi must be correct for exception handler   
  4037     //(was destroyed)
  4038     //__ restore_locals();   // make sure locals pointer 
  4039     //is correct as well (was destroyed)
  4040     ///__ call_VM(noreg, CAST_FROM_FN_PTR(address, 
  4041     //InterpreterRuntime::throw_AbstractMethodError));
  4042     __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
  4043     // the call_VM checks for exception, so we should never return here.
  4044     __ should_not_reach_here();
  4045     __ bind(L);
  4048   // 2014/11/24 Fu 
  4049   // T8: tmp, used for mdp
  4050   // Rmethod: callee
  4051   // T9: tmp
  4052   // is_virtual: true
  4053   __ profile_arguments_type(T8, Rmethod, T9, true);
  4055   __ jump_from_interpreted(Rmethod, T9);
  4058 void TemplateTable::invokehandle(int byte_no) {
  4059   transition(vtos, vtos);
  4060   assert(byte_no == f1_byte, "use this argument");
  4061   const Register T2_method = Rmethod;
  4062   const Register FSR_mtype  = FSR;
  4063   const Register T3_recv   = T3;
  4065   if (!EnableInvokeDynamic) {
  4066      // rewriter does not generate this bytecode
  4067      __ should_not_reach_here();
  4068      return;
  4071    prepare_invoke(byte_no, T2_method, FSR_mtype, T3_recv);
  4072    //??__ verify_method_ptr(T2_method);
  4073    __ verify_oop(T3_recv);
  4074    __ null_check(T3_recv);
  4076    // rax: MethodType object (from cpool->resolved_references[f1], if necessary)
  4077    // rbx: MH.invokeExact_MT method (from f2)
  4079    // Note:  rax_mtype is already pushed (if necessary) by prepare_invoke
  4081    // FIXME: profile the LambdaForm also
  4082    __ profile_final_call(T9);
  4084    // 2014/11/24 Fu 
  4085    // T8: tmp, used for mdp
  4086    // T2_method: callee
  4087    // T9: tmp
  4088    // is_virtual: true
  4089    __ profile_arguments_type(T8, T2_method, T9, true);
  4091   __ jump_from_interpreted(T2_method, T9);
  4094  void TemplateTable::invokedynamic(int byte_no) {
  4095    transition(vtos, vtos);
  4096    assert(byte_no == f1_byte, "use this argument");
  4098    if (!EnableInvokeDynamic) {
  4099      // We should not encounter this bytecode if !EnableInvokeDynamic.
  4100      // The verifier will stop it.  However, if we get past the verifier,
  4101      // this will stop the thread in a reasonable way, without crashing the JVM.
  4102      __ call_VM(noreg, CAST_FROM_FN_PTR(address,
  4103                       InterpreterRuntime::throw_IncompatibleClassChangeError));
  4104      // the call_VM checks for exception, so we should never return here.
  4105      __ should_not_reach_here();
  4106      return;
  4109    //const Register Rmethod   = T2;
  4110    const Register T2_callsite = T2;
  4112    prepare_invoke(byte_no, Rmethod, T2_callsite);
  4114    // rax: CallSite object (from cpool->resolved_references[f1])
  4115    // rbx: MH.linkToCallSite method (from f2)
  4117    // Note:  rax_callsite is already pushed by prepare_invoke
  4118    // %%% should make a type profile for any invokedynamic that takes a ref argument
  4119    // profile this call
  4120    __ profile_call(T9);
  4122    // 2014/11/24 Fu 
  4123    // T8: tmp, used for mdp
  4124    // Rmethod: callee
  4125    // T9: tmp
  4126    // is_virtual: false 
  4127    __ profile_arguments_type(T8, Rmethod, T9, false);
  4129    __ verify_oop(T2_callsite);
  4131    __ jump_from_interpreted(Rmethod, T9);
  4134 //----------------------------------------------------------------------------------------------------
  4135 // Allocation
  4136 // T1 : tags & buffer end & thread
  4137 // T2 : object end
  4138 // T3 : klass
  4139 // T1 : object size
  4140 // A1 : cpool
  4141 // A2 : cp index
  4142 // return object in FSR
  4143 void TemplateTable::_new() {
  4144   transition(vtos, atos);
  4145   __ get_2_byte_integer_at_bcp(A2, AT, 1);
  4146   __ huswap(A2);
  4148   Label slow_case;
  4149   Label done;
  4150   Label initialize_header;
  4151   Label initialize_object;  // including clearing the fields
  4152   Label allocate_shared;
  4154   // get InstanceKlass in T3
  4155   __ get_cpool_and_tags(A1, T1);
  4156   __ dsll(AT, A2, Address::times_8);
  4157   __ dadd(AT, A1, AT);
  4158   __ ld(T3, AT, sizeof(ConstantPool));
  4160   // make sure the class we're about to instantiate has been resolved. 
  4161   // Note: slow_case does a pop of stack, which is why we loaded class/pushed above
  4162   const int tags_offset = Array<u1>::base_offset_in_bytes();
  4163   __ dadd(T1, T1, A2);
  4164   __ lb(AT, T1, tags_offset);
  4165   //__ addiu(AT, AT, - (int)JVM_CONSTANT_UnresolvedClass);
  4166   __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
  4167   //__ beq(AT, R0, slow_case);
  4168   __ bne(AT, R0, slow_case);
  4169   __ delayed()->nop();
  4171   /*make sure klass is initialized & doesn't have finalizer*/
  4173   // make sure klass is fully initialized
  4174   __ lhu(T1, T3, in_bytes(InstanceKlass::init_state_offset()));
  4175   __ daddiu(AT, T1, - (int)InstanceKlass::fully_initialized);
  4176   __ bne(AT, R0, slow_case);
  4177   __ delayed()->nop();
  4179   // has_finalizer
  4180   //__ lw(T1, T3, Klass::access_flags_offset() + sizeof(oopDesc));
  4181   //__ move(AT, JVM_ACC_CAN_BE_FASTPATH_ALLOCATED);
  4182   //__ andr(AT, T1, AT);
  4183   __ lw(T1, T3, in_bytes(Klass::layout_helper_offset()) );
  4184   __ andi(AT, T1, Klass::_lh_instance_slow_path_bit);
  4185   __ bne(AT, R0, slow_case);
  4186   __ delayed()->nop();
  4188   // get instance_size in InstanceKlass (already aligned) in T0, 
  4189   // be sure to preserve this value 
  4190   //__ lw(T0, T3, Klass::size_helper_offset_in_bytes() + sizeof(oopDesc));
  4191   //Klass::_size_helper is renamed Klass::_layout_helper. aoqi 
  4192   __ lw(T0, T3, in_bytes(Klass::layout_helper_offset()) );
  4194   // 
  4195   // Allocate the instance
  4196   // 1) Try to allocate in the TLAB
  4197   // 2) if fail and the object is large allocate in the shared Eden
  4198   // 3) if the above fails (or is not applicable), go to a slow case
  4199   // (creates a new TLAB, etc.)
  4201   const bool allow_shared_alloc =
  4202     Universe::heap()->supports_inline_contig_alloc() && !CMSIncrementalMode;
  4204   if (UseTLAB) {
  4205 #ifndef OPT_THREAD
  4206     const Register thread = T8;
  4207     __ get_thread(thread);
  4208 #else
  4209     const Register thread = TREG;
  4210 #endif
  4211     // get tlab_top
  4212     __ ld(FSR, thread, in_bytes(JavaThread::tlab_top_offset()));
  4213     __ dadd(T2, FSR, T0);
  4214     // get tlab_end
  4215     __ ld(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
  4216     __ slt(AT, AT, T2);
  4217     //		__ bne(AT, R0, allocate_shared);
  4218     __ bne(AT, R0, allow_shared_alloc ? allocate_shared : slow_case);
  4219     __ delayed()->nop();
  4220     __ sd(T2, thread, in_bytes(JavaThread::tlab_top_offset()));
  4222     if (ZeroTLAB) {
  4223       // the fields have been already cleared
  4224       __ b_far(initialize_header);
  4225     } else {
  4226       // initialize both the header and fields
  4227       __ b_far(initialize_object);
  4229     __ delayed()->nop();
  4230     /*
  4232        if (CMSIncrementalMode) {
  4233     // No allocation in shared eden. 
  4234     ///__ jmp(slow_case);
  4235     __ b(slow_case);
  4236     __ delayed()->nop();
  4238      */ 
  4241   // Allocation in the shared Eden , if allowed
  4242   // T0 : instance size in words
  4243   if(allow_shared_alloc){ 
  4244     __ bind(allocate_shared);
  4245     Label retry;
  4246     //Address heap_top(T1, (int)Universe::heap()->top_addr());
  4247     Address heap_top(T1);
  4248     //__ lui(T1, Assembler::split_high((int)Universe::heap()->top_addr()));
  4249     __ li(T1, (long)Universe::heap()->top_addr());
  4251     __ ld(FSR, heap_top);
  4252     __ bind(retry);
  4253     __ dadd(T2, FSR, T0);
  4254     //__ lui(AT, Assembler::split_high((int)Universe::heap()->end_addr()));
  4255     //__ lw(AT, AT, Assembler::split_low((int)Universe::heap()->end_addr()));
  4256     __ li(AT, (long)Universe::heap()->end_addr());
  4257     __ ld(AT, AT, 0);
  4258     __ slt(AT, AT, T2);
  4259     __ bne(AT, R0, slow_case);
  4260     __ delayed()->nop();
  4262     // Compare FSR with the top addr, and if still equal, store the new
  4263     // top addr in ebx at the address of the top addr pointer. Sets ZF if was
  4264     // equal, and clears it otherwise. Use lock prefix for atomicity on MPs.
  4265     //
  4266     // FSR: object begin
  4267     // T2: object end
  4268     // T0: instance size in words
  4270     // if someone beat us on the allocation, try again, otherwise continue 
  4271     //__ lui(T1, Assembler::split_high((int)Universe::heap()->top_addr()));
  4272     __ cmpxchg(T2, heap_top, FSR);
  4273     __ beq(AT, R0, retry);
  4274     __ delayed()->nop();
  4277   if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
  4278     // The object is initialized before the header.  If the object size is
  4279     // zero, go directly to the header initialization.
  4280     __ bind(initialize_object);
  4281     __ li(AT, - sizeof(oopDesc));
  4282     __ daddu(T0, T0, AT);
  4283     __ beq_far(T0, R0, initialize_header);
  4284     __ delayed()->nop();
  4287     // T0 must have been multiple of 2
  4288 #ifdef ASSERT
  4289     // make sure T0 was multiple of 2
  4290     Label L;
  4291     __ andi(AT, T0, 1);
  4292     __ beq(AT, R0, L);
  4293     __ delayed()->nop();
  4294     __ stop("object size is not multiple of 2 - adjust this code");
  4295     __ bind(L);
  4296     // edx must be > 0, no extra check needed here
  4297 #endif
  4299     // initialize remaining object fields: T0 is a multiple of 2
  4301       Label loop;
  4302       __ dadd(T1, FSR, T0);
  4303       __ daddi(T1, T1, -oopSize);
  4305       __ bind(loop);
  4306       __ sd(R0, T1, sizeof(oopDesc) + 0 * oopSize);
  4307 //      __ sd(R0, T1, sizeof(oopDesc) + 1 * oopSize);
  4308       __ bne(T1, FSR, loop); //dont clear header
  4309       __ delayed()->daddi(T1, T1, -oopSize);
  4310       // actually sizeof(oopDesc)==8, so we can move  
  4311       // __ addiu(AT, AT, -8) to delay slot, and compare FSR with T1
  4313     //klass in T3, 
  4314     // initialize object header only.
  4315     __ bind(initialize_header);
  4316     if (UseBiasedLocking) {
  4317       // __ popl(ecx);   // get saved klass back in the register.
  4318       // __ movl(ebx, Address(ecx, Klass::prototype_header_offset_in_bytes() 
  4319       // + klassOopDesc::klass_part_offset_in_bytes()));
  4320       __ ld(AT, T3, in_bytes(Klass::prototype_header_offset())); 
  4321       // __ movl(Address(eax, oopDesc::mark_offset_in_bytes ()), ebx);
  4322       __ sd(AT, FSR, oopDesc::mark_offset_in_bytes ());    
  4323     } else {
  4324       __ li(AT, (long)markOopDesc::prototype());
  4325       __ sd(AT, FSR, oopDesc::mark_offset_in_bytes());
  4328     //__ sd(T3, FSR, oopDesc::klass_offset_in_bytes());
  4329     __ store_klass_gap(FSR, R0);
  4330     __ store_klass(FSR, T3);
  4333       SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0);
  4334       // Trigger dtrace event for fastpath
  4335       __ push(atos);
  4336       __ call_VM_leaf(
  4337 	  CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), FSR);
  4338       __ pop(atos);
  4340     __ b(done);
  4341     __ delayed()->nop();
  4343   // slow case
  4344   __ bind(slow_case);
  4345   call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), A1, A2);
  4347   // continue
  4348   __ bind(done);
  4349   __ sync();
  4352 void TemplateTable::newarray() {
  4353 	transition(itos, atos);
  4354 	__ lbu(A1, at_bcp(1));
  4355 	//type, count
  4356 	call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR);
  4357         __ sync();
  4360 void TemplateTable::anewarray() {
  4361   transition(itos, atos);
  4362   __ get_2_byte_integer_at_bcp(A2, AT, 1);
  4363   __ huswap(A2);
  4364   __ get_constant_pool(A1);
  4365   // cp, index, count
  4366   call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), A1, A2, FSR);
  4367   __ sync();
  4370 void TemplateTable::arraylength() {
  4371   transition(atos, itos);
  4372   __ null_check(FSR, arrayOopDesc::length_offset_in_bytes());
  4373   __ lw(FSR, FSR, arrayOopDesc::length_offset_in_bytes());
  4376 // i use T2 as ebx, T3 as ecx, T1 as edx
  4377 // when invoke gen_subtype_check, super in T3, sub in T2, object in FSR(it's always)
  4378 // T2 : sub klass
  4379 // T3 : cpool
  4380 // T3 : super klass
  4381 void TemplateTable::checkcast() {
  4382   transition(atos, atos);
  4383   Label done, is_null, ok_is_subtype, quicked, resolved;
  4384   __ beq(FSR, R0, is_null);
  4385   __ delayed()->nop();
  4387   // Get cpool & tags index
  4388   __ get_cpool_and_tags(T3, T1);
  4389   __ get_2_byte_integer_at_bcp(T2, AT, 1);
  4390   __ huswap(T2);
  4392   // See if bytecode has already been quicked
  4393   __ dadd(AT, T1, T2);
  4394   __ lb(AT, AT, Array<u1>::base_offset_in_bytes());
  4395   __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
  4396   __ beq(AT, R0, quicked);
  4397   __ delayed()->nop();
  4399   /* 2012/6/2 Jin: In InterpreterRuntime::quicken_io_cc, lots of new classes may be loaded.
  4400    *  Then, GC will move the object in V0 to another places in heap.
  4401    *  Therefore, We should never save such an object in register.
  4402    *  Instead, we should save it in the stack. It can be modified automatically by the GC thread.
  4403    *  After GC, the object address in FSR is changed to a new place.
  4404    */
  4405   __ push(atos);
  4406   const Register thread = TREG;
  4407 #ifndef OPT_THREAD
  4408   __ get_thread(thread);
  4409 #endif
  4410   call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
  4411   __ get_vm_result_2(T3, thread);
  4412   __ pop_ptr(FSR);
  4413   __ b(resolved);
  4414   __ delayed()->nop();
  4416   // klass already in cp, get superklass in T3
  4417   __ bind(quicked);
  4418   __ dsll(AT, T2, Address::times_8);
  4419   __ dadd(AT, T3, AT);
  4420   __ ld(T3, AT, sizeof(ConstantPool));
  4422   __ bind(resolved);
  4424   // get subklass in T2
  4425   //__ ld(T2, FSR, oopDesc::klass_offset_in_bytes());
  4426   //add for compressedoops
  4427   __ load_klass(T2, FSR);
  4428   // Superklass in T3.  Subklass in T2.
  4429   __ gen_subtype_check(T3, T2, ok_is_subtype);
  4431   // Come here on failure
  4432   // object is at FSR
  4433   __ jmp(Interpreter::_throw_ClassCastException_entry);
  4434   __ delayed()->nop();
  4436   // Come here on success
  4437   __ bind(ok_is_subtype);
  4439   // Collect counts on whether this check-cast sees NULLs a lot or not.
  4440   if (ProfileInterpreter) {
  4441 	__ b(done);
  4442 	__ delayed()->nop();
  4443 	__ bind(is_null);
  4444 	__ profile_null_seen(T3);
  4445   } else {
  4446 	__ bind(is_null);
  4448   __ bind(done);
  4451 // i use T3 as cpool, T1 as tags, T2 as index
  4452 // object always in FSR, superklass in T3, subklass in T2
  4453 void TemplateTable::instanceof() {
  4454   transition(atos, itos);
  4455   Label done, is_null, ok_is_subtype, quicked, resolved;
  4457   __ beq(FSR, R0, is_null);
  4458   __ delayed()->nop();
  4460   // Get cpool & tags index
  4461   __ get_cpool_and_tags(T3, T1);
  4462   // get index
  4463   __ get_2_byte_integer_at_bcp(T2, AT, 1);
  4464   __ hswap(T2);
  4466   // See if bytecode has already been quicked
  4467   // quicked
  4468   __ daddu(AT, T1, T2);
  4469   __ lb(AT, AT, Array<u1>::base_offset_in_bytes());
  4470   __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
  4471   __ beq(AT, R0, quicked);
  4472   __ delayed()->nop();
  4474   // get superklass in T3
  4475   //__ move(TSR, FSR);
  4476   // sometimes S2 may be changed during the call, 
  4477   // be careful if u use TSR as a saving place
  4478   //__ push(FSR);
  4479   __ push(atos);
  4480   const Register thread = TREG;
  4481 #ifndef OPT_THREAD
  4482   __ get_thread(thread);
  4483 #endif
  4484   call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
  4485   __ get_vm_result_2(T3, thread);
  4486   //__ lw(FSR, SP, 0);
  4487   __ pop_ptr(FSR);	
  4488   __ b(resolved);
  4489   __ delayed()->nop();
  4490   //__ move(FSR, TSR);
  4492   // get superklass in T3, subklass in T2
  4493   __ bind(quicked);
  4494   __ dsll(AT, T2, Address::times_8);
  4495   __ daddu(AT, T3, AT);
  4496   __ ld(T3, AT, sizeof(ConstantPool)); 
  4498   __ bind(resolved);
  4499   // get subklass in T2
  4500   //__ ld(T2, FSR, oopDesc::klass_offset_in_bytes());
  4501   //add for compressedoops
  4502   __ load_klass(T2, FSR);
  4504   // Superklass in T3.  Subklass in T2.
  4505   __ gen_subtype_check(T3, T2, ok_is_subtype);
  4506   // Come here on failure
  4507   __ b(done);
  4508   __ delayed(); __ move(FSR, R0);
  4510   // Come here on success
  4511   __ bind(ok_is_subtype);
  4512   __ move(FSR, 1);
  4514   // Collect counts on whether this test sees NULLs a lot or not.
  4515   if (ProfileInterpreter) {
  4516      __ beq(R0, R0, done);
  4517      __ nop();
  4518      __ bind(is_null);
  4519      __ profile_null_seen(T3);
  4520   } else {
  4521      __ bind(is_null);   // same as 'done'
  4523   __ bind(done);
  4524   // FSR = 0: obj == NULL or  obj is not an instanceof the specified klass
  4525   // FSR = 1: obj != NULL and obj is     an instanceof the specified klass
  4528 //--------------------------------------------------------
  4529 //--------------------------------------------
  4530 // Breakpoints
  4531 void TemplateTable::_breakpoint() {
  4533 	// Note: We get here even if we are single stepping..
  4534 	// jbug inists on setting breakpoints at every bytecode 
  4535 	// even if we are in single step mode.  
  4537 	transition(vtos, vtos);
  4539 	// get the unpatched byte code
  4540 	///__ get_method(ecx);
  4541 	///__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::get_original_bytecode_at)
  4542 	//, ecx, esi);
  4543 	///__ movl(ebx, eax);
  4544 	__ get_method(A1);
  4545 	__ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::get_original_bytecode_at), 
  4546 			A1, BCP);
  4547 	__ move(Rnext, V0); // Jin: Rnext will be used in dispatch_only_normal
  4549 	// post the breakpoint event
  4550 	///__ get_method(ecx);
  4551 	///__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), ecx, esi);
  4552 	__ get_method(A1);
  4553 	__ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), A1, BCP);
  4555 	// complete the execution of original bytecode
  4556 	__ dispatch_only_normal(vtos);
  4559 //----------------------------------------------------------------------------------------------------
  4560 // Exceptions
  4562 void TemplateTable::athrow() {
  4563 	transition(atos, vtos);
  4564 	__ null_check(FSR);
  4565 	__ jmp(Interpreter::throw_exception_entry());
  4566 	__ delayed()->nop();
  4569 //----------------------------------------------------------------------------------------------------
  4570 // Synchronization
  4571 //
  4572 // Note: monitorenter & exit are symmetric routines; which is reflected
  4573 //       in the assembly code structure as well
  4574 //
  4575 // Stack layout:
  4576 //
  4577 // [expressions  ] <--- SP               = expression stack top
  4578 // ..
  4579 // [expressions  ]
  4580 // [monitor entry] <--- monitor block top = expression stack bot
  4581 // ..
  4582 // [monitor entry]
  4583 // [frame data   ] <--- monitor block bot
  4584 // ...
  4585 // [return addr  ] <--- FP
  4587 // we use T2 as monitor entry pointer, T3 as monitor top pointer, c_rarg0 as free slot pointer
  4588 // object always in FSR
  4589 void TemplateTable::monitorenter() {
  4590   transition(atos, vtos);
  4591   // check for NULL object
  4592   __ null_check(FSR);
  4594   const Address monitor_block_top(FP, frame::interpreter_frame_monitor_block_top_offset 
  4595       * wordSize);
  4596   const int entry_size = (frame::interpreter_frame_monitor_size()* wordSize);
  4597   Label allocated;
  4599   // initialize entry pointer
  4600   __ move(c_rarg0, R0);
  4602   // find a free slot in the monitor block (result in edx)
  4604     Label entry, loop, exit, next;
  4605     __ ld(T2, monitor_block_top);
  4606     __ b(entry);
  4607     __ delayed()->daddi(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
  4609     // free slot?
  4610     __ bind(loop);
  4611     __ ld(AT, T2, BasicObjectLock::obj_offset_in_bytes());
  4612     __ bne(AT, R0, next);
  4613     __ delayed()->nop();
  4614     __ move(c_rarg0, T2);
  4616     __ bind(next);
  4617     __ beq(FSR, AT, exit);
  4618     __ delayed()->nop();
  4619     __ daddi(T2, T2, entry_size);
  4621     __ bind(entry);
  4622     __ bne(T3, T2, loop);
  4623     __ delayed()->nop();
  4624     __ bind(exit);
  4627   __ bne(c_rarg0, R0, allocated);
  4628   __ delayed()->nop();
  4630   // allocate one if there's no free slot
  4632     Label entry, loop;
  4633     // 1. compute new pointers                   // SP: old expression stack top
  4634     __ ld(c_rarg0, monitor_block_top);
  4635     __ daddi(SP, SP, - entry_size);
  4636     __ daddi(c_rarg0, c_rarg0, - entry_size);
  4637     __ sd(c_rarg0, monitor_block_top);
  4638     __ b(entry);
  4639     __ delayed(); __ move(T3, SP);
  4641     // 2. move expression stack contents
  4642     __ bind(loop);
  4643     __ ld(AT, T3, entry_size);
  4644     __ sd(AT, T3, 0);
  4645     __ daddi(T3, T3, wordSize); 
  4646     __ bind(entry);
  4647     __ bne(T3, c_rarg0, loop);
  4648     __ delayed()->nop();
  4651   __ bind(allocated);
  4652   // Increment bcp to point to the next bytecode, 
  4653   // so exception handling for async. exceptions work correctly. 
  4654   // The object has already been poped from the stack, so the 
  4655   // expression stack looks correct.
  4656   __ daddi(BCP, BCP, 1); 
  4657   __ sd(FSR, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
  4658   __ lock_object(c_rarg0);
  4659   // check to make sure this monitor doesn't cause stack overflow after locking
  4660   __ save_bcp();  // in case of exception
  4661   __ generate_stack_overflow_check(0);
  4662   // The bcp has already been incremented. Just need to dispatch to next instruction.
  4664   __ dispatch_next(vtos);
  4667 // T2 : top
  4668 // c_rarg0 : entry
  4669 void TemplateTable::monitorexit() {
  4670   transition(atos, vtos);
  4672   __ null_check(FSR);
  4674   const int entry_size =(frame::interpreter_frame_monitor_size()* wordSize);
  4675   Label found;
  4677   // find matching slot
  4679     Label entry, loop;
  4680     __ ld(c_rarg0, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
  4681     __ b(entry);
  4682     __ delayed()->daddiu(T2, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
  4684     __ bind(loop);
  4685     __ ld(AT, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
  4686     __ beq(FSR, AT, found);
  4687     __ delayed()->nop();
  4688     __ daddiu(c_rarg0, c_rarg0, entry_size);
  4689     __ bind(entry);
  4690     __ bne(T2, c_rarg0, loop);
  4691     __ delayed()->nop();
  4694   // error handling. Unlocking was not block-structured
  4695   Label end;
  4696   __ call_VM(NOREG, CAST_FROM_FN_PTR(address, 
  4697 	InterpreterRuntime::throw_illegal_monitor_state_exception));
  4698   __ should_not_reach_here();
  4700   // call run-time routine
  4701   // c_rarg0: points to monitor entry
  4702   __ bind(found);
  4703   __ move(TSR, FSR);
  4704   __ unlock_object(c_rarg0);
  4705   __ move(FSR, TSR);
  4706   __ bind(end);
  4709 //--------------------------------------------------------------------------------------------------// Wide instructions
  4711 void TemplateTable::wide() {
  4712   transition(vtos, vtos);
  4713   // Note: the esi increment step is part of the individual wide bytecode implementations
  4714   __ lbu(Rnext, at_bcp(1));
  4715   __ dsll(T9, Rnext, Address::times_8);
  4716   __ li(AT, (long)Interpreter::_wentry_point);
  4717   __ dadd(AT, T9, AT);
  4718   __ ld(T9, AT, 0);
  4719   __ jr(T9);
  4720   __ delayed()->nop();
  4723 //--------------------------------------------------------------------------------------------------// Multi arrays
  4725 void TemplateTable::multianewarray() {
  4726   transition(vtos, atos);
  4727   // last dim is on top of stack; we want address of first one:
  4728   // first_addr = last_addr + (ndims - 1) * wordSize
  4729   __ lbu(A1, at_bcp(3));	// dimension
  4730   __ daddi(A1, A1, -1);	
  4731   __ dsll(A1, A1, Address::times_8);
  4732   __ dadd(A1, SP, A1);		// now A1 pointer to the count array on the stack
  4733   call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), A1);
  4734   __ lbu(AT, at_bcp(3));
  4735   __ dsll(AT, AT, Address::times_8);
  4736   __ dadd(SP, SP, AT);
  4737   __ sync();
  4740 #endif // !CC_INTERP

mercurial