src/cpu/mips/vm/templateTable_mips_64.cpp

Thu, 21 Sep 2017 11:09:31 +0800

author
fujie
date
Thu, 21 Sep 2017 11:09:31 +0800
changeset 6889
a1eb29ee98ab
parent 6888
b6a542947da3
child 6890
d911cc184106
permissions
-rw-r--r--

[Interpreter] Optimize TemplateTable::ldc2_w.

     1 /*
     2  * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
     3  * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
     4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     5  *
     6  * This code is free software; you can redistribute it and/or modify it
     7  * under the terms of the GNU General Public License version 2 only, as
     8  * published by the Free Software Foundation.
     9  *
    10  * This code is distributed in the hope that it will be useful, but WITHOUT
    11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    13  * version 2 for more details (a copy is included in the LICENSE file that
    14  * accompanied this code).
    15  *
    16  * You should have received a copy of the GNU General Public License version
    17  * 2 along with this work; if not, write to the Free Software Foundation,
    18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    19  *
    20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    21  * or visit www.oracle.com if you need additional information or have any
    22  * questions.
    23  *
    24  */
    26 #include "precompiled.hpp"
    27 #include "asm/macroAssembler.hpp"
    28 #include "interpreter/interpreter.hpp"
    29 #include "interpreter/interpreterRuntime.hpp"
    30 #include "interpreter/templateTable.hpp"
    31 #include "memory/universe.inline.hpp"
    32 #include "oops/methodData.hpp"
    33 #include "oops/objArrayKlass.hpp"
    34 #include "oops/oop.inline.hpp"
    35 #include "prims/methodHandles.hpp"
    36 #include "runtime/sharedRuntime.hpp"
    37 #include "runtime/stubRoutines.hpp"
    38 #include "runtime/synchronizer.hpp"
    41 #ifndef CC_INTERP
    43 #define __ _masm->
    45 // Platform-dependent initialization
    47 void TemplateTable::pd_initialize() {
    48   // No mips specific initialization
    49 }
    51 // Address computation: local variables
    53 static inline Address iaddress(int n) {
    54   return Address(LVP, Interpreter::local_offset_in_bytes(n));
    55 }
    57 static inline Address laddress(int n) {
    58   return iaddress(n + 1);
    59 }
    61 static inline Address faddress(int n) {
    62   return iaddress(n);
    63 }
    65 static inline Address daddress(int n) {
    66   return laddress(n);
    67 }
    69 static inline Address aaddress(int n) {
    70   return iaddress(n);
    71 }
    72 static inline Address haddress(int n)            { return iaddress(n + 0); }
    75 static inline Address at_sp()             {  return Address(SP,   0); }
    76 static inline Address at_sp_p1()          { return Address(SP,  1 * wordSize); }
    77 static inline Address at_sp_p2()          { return Address(SP,  2 * wordSize); }
    79 // At top of Java expression stack which may be different than esp().  It
    80 // isn't for category 1 objects.
    81 static inline Address at_tos   () {
    82   Address tos = Address(SP,  Interpreter::expr_offset_in_bytes(0));
    83   return tos;
    84 }
    86 static inline Address at_tos_p1() {
    87   return Address(SP,  Interpreter::expr_offset_in_bytes(1));
    88 }
    90 static inline Address at_tos_p2() {
    91   return Address(SP,  Interpreter::expr_offset_in_bytes(2));
    92 }
    94 static inline Address at_tos_p3() {
    95   return Address(SP,  Interpreter::expr_offset_in_bytes(3));
    96 }
    98 // we use S0 as bcp, be sure you have bcp in S0 before you call any of the Template generator
    99 Address TemplateTable::at_bcp(int offset) {
   100   assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
   101   return Address(BCP, offset);
   102 }
   104 // bytecode folding
   105 void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
   106                                    Register tmp_reg, bool load_bc_into_bc_reg/*=true*/,
   107                                    int byte_no) {
   108   if (!RewriteBytecodes)  return;
   109   Label L_patch_done;
   111   switch (bc) {
   112   case Bytecodes::_fast_aputfield:
   113   case Bytecodes::_fast_bputfield:
   114   case Bytecodes::_fast_cputfield:
   115   case Bytecodes::_fast_dputfield:
   116   case Bytecodes::_fast_fputfield:
   117   case Bytecodes::_fast_iputfield:
   118   case Bytecodes::_fast_lputfield:
   119   case Bytecodes::_fast_sputfield:
   120     {
   121       // We skip bytecode quickening for putfield instructions when
   122       // the put_code written to the constant pool cache is zero.
   123       // This is required so that every execution of this instruction
   124       // calls out to InterpreterRuntime::resolve_get_put to do
   125       // additional, required work.
   126       assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
   127       assert(load_bc_into_bc_reg, "we use bc_reg as temp");
   128       __ get_cache_and_index_and_bytecode_at_bcp(tmp_reg, bc_reg, tmp_reg, byte_no, 1);
   129       __ daddi(bc_reg, R0, bc);
   130       __ beq(tmp_reg, R0, L_patch_done);
   131       __ delayed()->nop();
   132     }
   133     break;
   134   default:
   135     assert(byte_no == -1, "sanity");
   136     // the pair bytecodes have already done the load.
   137     if (load_bc_into_bc_reg) {
   138       __ move(bc_reg, bc);
   139     }
   140   }
   142   if (JvmtiExport::can_post_breakpoint()) {
   143     Label L_fast_patch;
   144     // if a breakpoint is present we can't rewrite the stream directly
   145     __ lbu(tmp_reg, at_bcp(0));
   146     __ move(AT, Bytecodes::_breakpoint);
   147     __ bne(tmp_reg, AT, L_fast_patch);
   148     __ delayed()->nop();
   150     __ get_method(tmp_reg);
   151     // Let breakpoint table handling rewrite to quicker bytecode
   152     __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
   153     InterpreterRuntime::set_original_bytecode_at), tmp_reg, BCP, bc_reg);
   155     __ b(L_patch_done);
   156     __ delayed()->nop();
   157     __ bind(L_fast_patch);
   158   }
   160 #ifdef ASSERT
   161   Label L_okay;
   162   __ lbu(tmp_reg, at_bcp(0));
   163   __ move(AT, (int)Bytecodes::java_code(bc));
   164   __ beq(tmp_reg, AT, L_okay);
   165   __ delayed()->nop();
   166   __ beq(tmp_reg, bc_reg, L_patch_done);
   167   __ delayed()->nop();
   168   __ stop("patching the wrong bytecode");
   169   __ bind(L_okay);
   170 #endif
   172   // patch bytecode
   173   __ sb(bc_reg, at_bcp(0));
   174   __ bind(L_patch_done);
   175 }
   178 // Individual instructions
   180 void TemplateTable::nop() {
   181   transition(vtos, vtos);
   182   // nothing to do
   183 }
   185 void TemplateTable::shouldnotreachhere() {
   186   transition(vtos, vtos);
   187   __ stop("shouldnotreachhere bytecode");
   188 }
   190 void TemplateTable::aconst_null() {
   191   transition(vtos, atos);
   192   __ move(FSR, R0);
   193 }
   195 void TemplateTable::iconst(int value) {
   196   transition(vtos, itos);
   197   if (value == 0) {
   198     __ move(FSR, R0);
   199   } else {
   200     __ move(FSR, value);
   201   }
   202 }
   204 void TemplateTable::lconst(int value) {
   205   transition(vtos, ltos);
   206   if (value == 0) {
   207     __ move(FSR, R0);
   208   } else {
   209     __ move(FSR, value);
   210   }
   211 }
   213 void TemplateTable::fconst(int value) {
   214   transition(vtos, ftos);
   215   switch( value ) {
   216     case 0:  __ mtc1(R0, FSF);    return;
   217     case 1:  __ addiu(AT, R0, 1); break;
   218     case 2:  __ addiu(AT, R0, 2); break;
   219     default: ShouldNotReachHere();
   220   }
   221   __ mtc1(AT, FSF);
   222   __ cvt_s_w(FSF, FSF);
   223 }
   225 void TemplateTable::dconst(int value) {
   226   transition(vtos, dtos);
   227   switch( value ) {
   228     case 0:  __ dmtc1(R0, FSF);  
   229              return;
   230     case 1:  __ daddiu(AT, R0, 1);
   231              __ dmtc1(AT, FSF);
   232              __ cvt_d_w(FSF, FSF);
   233              break;
   234     default: ShouldNotReachHere();
   235   }
   236 }
   238 void TemplateTable::bipush() {
   239   transition(vtos, itos);
   240   __ lb(FSR, at_bcp(1));
   241 }
   243 void TemplateTable::sipush() {
   244   transition(vtos, itos);
   245   __ lb(FSR, BCP, 1);
   246   __ lbu(AT, BCP, 2);
   247   __ dsll(FSR, FSR, 8); 
   248   __ orr(FSR, FSR, AT);
   249 }
   251 // T1 : tags
   252 // T2 : index
   253 // T3 : cpool
   254 // T8 : tag
   255 void TemplateTable::ldc(bool wide) {
   256   transition(vtos, vtos);
   257   Label call_ldc, notFloat, notClass, Done;
   258   // get index in cpool
   259   if (wide) {
   260     __ get_unsigned_2_byte_index_at_bcp(T2, 1);
   261   } else {
   262     __ lbu(T2, at_bcp(1));
   263   }
   265   __ get_cpool_and_tags(T3, T1);
   267   const int base_offset = ConstantPool::header_size() * wordSize;
   268   const int tags_offset = Array<u1>::base_offset_in_bytes();
   270   // get type
   271   if (UseLoongsonISA && Assembler::is_simm(sizeof(tags_offset), 8)) {
   272     __ gslbx(T1, T1, T2, tags_offset);
   273   } else {
   274     __ dadd(AT, T1, T2);
   275     __ lb(T1, AT, tags_offset);
   276   }
   277   //now T1 is the tag
   279   // unresolved class - get the resolved class
   280   __ daddiu(AT, T1, - JVM_CONSTANT_UnresolvedClass);
   281   __ beq(AT, R0, call_ldc);
   282   __ delayed()->nop();
   284   // unresolved class in error (resolution failed) - call into runtime
   285   // so that the same error from first resolution attempt is thrown.
   286   __ daddiu(AT, T1, -JVM_CONSTANT_UnresolvedClassInError);
   287   __ beq(AT, R0, call_ldc);
   288   __ delayed()->nop();
   290   // resolved class - need to call vm to get java mirror of the class
   291   __ daddiu(AT, T1, - JVM_CONSTANT_Class);
   292   __ bne(AT, R0, notClass);
   293   __ delayed()->dsll(T2, T2, Address::times_8);
   295   __ bind(call_ldc);
   296   __ move(A1, wide);
   297   call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), A1);
   298   //__ push(atos);
   299   __ sd(FSR, SP, - Interpreter::stackElementSize);
   300   __ b(Done);
   301   __ delayed()->daddiu(SP, SP, - Interpreter::stackElementSize);
   302   __ nop(); // added for performance issue
   304   __ bind(notClass);
   305   __ daddiu(AT, T1, -JVM_CONSTANT_Float);
   306   __ bne(AT, R0, notFloat);
   307   __ delayed()->nop();
   308   // ftos
   309   if (UseLoongsonISA && Assembler::is_simm(sizeof(base_offset), 8)) {
   310     __ gslwxc1(FSF, T3, T2, base_offset);
   311   } else {
   312     __ dadd(AT, T3, T2);
   313     __ lwc1(FSF, AT, base_offset);
   314   }
   315   //__ push_f();
   316   __ swc1(FSF, SP, - Interpreter::stackElementSize);
   317   __ b(Done);
   318   __ delayed()->daddiu(SP, SP, - Interpreter::stackElementSize);
   320   __ bind(notFloat);
   321 #ifdef ASSERT
   322   {
   323     Label L;
   324     __ daddiu(AT, T1, -JVM_CONSTANT_Integer);
   325     __ beq(AT, R0, L);
   326     __ delayed()->nop();
   327     __ stop("unexpected tag type in ldc");
   328     __ bind(L);
   329   }
   330 #endif
   331   // itos JVM_CONSTANT_Integer only
   332   if (UseLoongsonISA && Assembler::is_simm(sizeof(base_offset), 8)) {
   333     __ gslwx(FSR, T3, T2, base_offset);
   334   } else {
   335     __ dadd(T0, T3, T2);
   336     __ lw(FSR, T0, base_offset);
   337   }
   338   __ push(itos);
   339   __ bind(Done);
   340 }
   342 // Fast path for caching oop constants.
   343 void TemplateTable::fast_aldc(bool wide) {
   344   transition(vtos, atos);
   346   Register result = FSR;
   347   Register tmp = SSR;
   348   int index_size = wide ? sizeof(u2) : sizeof(u1);
   350   Label resolved;
   352   // We are resolved if the resolved reference cache entry contains a
   353   // non-null object (String, MethodType, etc.)
   354   assert_different_registers(result, tmp);
   355   __ get_cache_index_at_bcp(tmp, 1, index_size);
   356   __ load_resolved_reference_at_index(result, tmp);
   357   __ bne(result, R0, resolved);
   358   __ delayed()->nop();
   360   address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
   361   // first time invocation - must resolve first
   362   int i = (int)bytecode();
   363   __ move(tmp, i);
   364   __ call_VM(result, entry, tmp);
   366   __ bind(resolved);
   368   if (VerifyOops) {
   369     __ verify_oop(result);
   370   }
   371 }
   374 // used register: T2, T3, T1
   375 // T2 : index
   376 // T3 : cpool
   377 // T1 : tag
   378 void TemplateTable::ldc2_w() {
   379   transition(vtos, vtos);
   380   Label Long, Done;
   382   // get index in cpool
   383   __ get_unsigned_2_byte_index_at_bcp(T2, 1);
   385   __ get_cpool_and_tags(T3, T1);
   387   const int base_offset = ConstantPool::header_size() * wordSize;
   388   const int tags_offset = Array<u1>::base_offset_in_bytes();
   390   // get type in T1
   391   if (UseLoongsonISA && Assembler::is_simm(tags_offset, 8)) {
   392     __ gslbx(T1, T1, T2, tags_offset);
   393   } else {
   394     __ dadd(AT, T1, T2);
   395     __ lb(T1, AT, tags_offset);
   396   }
   398   __ daddiu(AT, T1, - JVM_CONSTANT_Double);
   399   __ bne(AT, R0, Long);
   400   __ delayed()->dsll(T2, T2, Address::times_8);
   402   // dtos
   403   if (UseLoongsonISA && Assembler::is_simm(base_offset, 8)) {
   404     __ gsldxc1(FSF, T3, T2, base_offset);
   405   } else {
   406     __ daddu(AT, T3, T2);
   407     __ ldc1(FSF, AT, base_offset);
   408   }
   409   __ sdc1(FSF, SP, - 2 * wordSize);
   410   __ b(Done);
   411   __ delayed()->daddi(SP, SP, - 2 * wordSize);
   413   // ltos
   414   __ bind(Long);
   415   if (UseLoongsonISA && Assembler::is_simm(base_offset, 8)) {
   416     __ gsldx(FSR, T3, T2, base_offset);
   417   } else {
   418     __ dadd(AT, T3, T2);
   419     __ ld(FSR, AT, base_offset);
   420   }
   421   __ push(ltos);
   423   __ bind(Done);
   424 }
   426 // we compute the actual local variable address here
   427 // the x86 dont do so for it has scaled index memory access model, we dont have, so do here
   428 void TemplateTable::locals_index(Register reg, int offset) {
   429   __ lbu(reg, at_bcp(offset));
   430   __ dsll(reg, reg, Address::times_8);
   431   __ dsub(reg, LVP, reg);
   432 }
   434 // this method will do bytecode folding of the two form:
   435 // iload iload      iload caload
   436 // used register : T2, T3
   437 // T2 : bytecode
   438 // T3 : folded code
   439 void TemplateTable::iload() {
   440   transition(vtos, itos);
   441   if (RewriteFrequentPairs) {
   442     Label rewrite, done;
   443     // get the next bytecode in T2
   444     __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
   445     // if _iload, wait to rewrite to iload2.  We only want to rewrite the
   446     // last two iloads in a pair.  Comparing against fast_iload means that
   447     // the next bytecode is neither an iload or a caload, and therefore
   448     // an iload pair.
   449     __ move(AT, Bytecodes::_iload);
   450     __ beq(AT, T2, done);
   451     __ delayed()->nop();
   453     __ move(T3, Bytecodes::_fast_iload2);
   454     __ move(AT, Bytecodes::_fast_iload);
   455     __ beq(AT, T2, rewrite);
   456     __ delayed()->nop();
   458     // if _caload, rewrite to fast_icaload
   459     __ move(T3, Bytecodes::_fast_icaload);
   460     __ move(AT, Bytecodes::_caload);
   461     __ beq(AT, T2, rewrite);
   462     __ delayed()->nop();
   464     // rewrite so iload doesn't check again.
   465     __ move(T3, Bytecodes::_fast_iload);
   467     // rewrite
   468     // T3 : fast bytecode
   469     __ bind(rewrite);
   470     patch_bytecode(Bytecodes::_iload, T3, T2, false);
   471     __ bind(done);
   472   }
   474   // Get the local value into tos
   475   locals_index(T2);
   476   __ lw(FSR, T2, 0);
   477 }
   479 // used register T2
   480 // T2 : index
   481 void TemplateTable::fast_iload2() {
   482   transition(vtos, itos);
   483   locals_index(T2);
   484   __ lw(FSR, T2, 0);
   485   __ push(itos);
   486   locals_index(T2, 3);
   487   __ lw(FSR, T2, 0);
   488 }
   490 // used register T2
   491 // T2 : index
   492 void TemplateTable::fast_iload() {
   493   transition(vtos, itos);
   494   locals_index(T2);
   495   __ lw(FSR, T2, 0);
   496 }
   498 // used register T2
   499 // T2 : index
   500 void TemplateTable::lload() {
   501   transition(vtos, ltos);
   502   locals_index(T2);
   503   __ ld(FSR, T2, -wordSize);
   504 }
   506 // used register T2
   507 // T2 : index
   508 void TemplateTable::fload() {
   509   transition(vtos, ftos);
   510   locals_index(T2);
   511   __ lwc1(FSF, T2, 0);
   512 }
   514 // used register T2
   515 // T2 : index
   516 void TemplateTable::dload() {
   517   transition(vtos, dtos);
   518   locals_index(T2);
   519   __ ldc1(FSF, T2, -wordSize);
   520 }
   522 // used register T2
   523 // T2 : index
   524 void TemplateTable::aload() {
   525   transition(vtos, atos);
   526   locals_index(T2);
   527   __ ld(FSR, T2, 0);
   528 }
   530 void TemplateTable::locals_index_wide(Register reg) {
   531   __ get_2_byte_integer_at_bcp(reg, AT, 2);
   532   __ huswap(reg);
   533   __ dsll(reg, reg, Address::times_8);
   534   __ dsub(reg, LVP, reg);
   535 }
   537 // used register T2
   538 // T2 : index
   539 void TemplateTable::wide_iload() {
   540   transition(vtos, itos);
   541   locals_index_wide(T2);
   542   __ ld(FSR, T2, 0);
   543 }
   545 // used register T2
   546 // T2 : index
   547 void TemplateTable::wide_lload() {
   548   transition(vtos, ltos);
   549   locals_index_wide(T2);
   550   __ ld(FSR, T2, -wordSize);
   551 }
   553 // used register T2
   554 // T2 : index
   555 void TemplateTable::wide_fload() {
   556   transition(vtos, ftos);
   557   locals_index_wide(T2);
   558   __ lwc1(FSF, T2, 0);
   559 }
   561 // used register T2
   562 // T2 : index
   563 void TemplateTable::wide_dload() {
   564   transition(vtos, dtos);
   565   locals_index_wide(T2);
   566   __ ldc1(FSF, T2, -wordSize);
   567 }
   569 // used register T2
   570 // T2 : index
   571 void TemplateTable::wide_aload() {
   572   transition(vtos, atos);
   573   locals_index_wide(T2);
   574   __ ld(FSR, T2, 0);
   575 }
   577 // we use A2 as the regiser for index, BE CAREFUL!
   578 // we dont use our tge 29 now, for later optimization
   579 void TemplateTable::index_check(Register array, Register index) {
   580   // Pop ptr into array
   581   __ pop_ptr(array);
   582   index_check_without_pop(array, index);
   583 }
   585 void TemplateTable::index_check_without_pop(Register array, Register index) {
   586   // destroys ebx
   587   // check array
   588   __ null_check(array, arrayOopDesc::length_offset_in_bytes());
   590 #ifdef _LP64
   591   // sign extend since tos (index) might contain garbage in upper bits
   592   __ sll(index, index, 0);
   593 #endif // _LP64
   595   // check index
   596   Label ok;
   597   __ lw(AT, array, arrayOopDesc::length_offset_in_bytes());
   598 #ifndef OPT_RANGECHECK
   599   __ sltu(AT, index, AT);
   600   __ bne(AT, R0, ok);
   601   __ delayed()->nop();
   603   //throw_ArrayIndexOutOfBoundsException assume abberrant index in A2
   604   if (A2 != index) __ move(A2, index);
   605   __ jmp(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
   606   __ delayed()->nop();
   607   __ bind(ok);
   608 #else
   609   __ lw(AT, array, arrayOopDesc::length_offset_in_bytes());
   610   __ move(A2, index);
   611   __ tgeu(A2, AT, 29);
   612 #endif
   613 }
   615 void TemplateTable::iaload() {
   616   transition(itos, itos);
   617   if(UseBoundCheckInstruction) {
   618     __ pop(SSR); //SSR:array    FSR: index
   619     __ dsll(FSR, FSR, 2);
   620     __ dadd(FSR, SSR, FSR);
   621     __ addi(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT));
   623     __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
   624     __ dsll(AT, AT, 2);
   625     __ dadd(AT, SSR, AT);
   626     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT));
   628     __ gslwle(FSR, FSR, AT);
   629   } else {
   630     index_check(SSR, FSR);
   631     __ dsll(FSR, FSR, 2);
   632     __ dadd(FSR, SSR, FSR);
   633     //FSR: index
   634     __ lw(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT));
   635   }
   636 }
   638 void TemplateTable::laload() {
   639   transition(itos, ltos);
   640   if(UseBoundCheckInstruction) {
   641     __ pop(SSR); //SSR:array    FSR: index
   642     __ dsll(FSR, FSR, Address::times_8);
   643     __ dadd(FSR, SSR, FSR);
   644     __ addi(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);
   646     __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
   647     __ dsll(AT, AT, Address::times_8);
   648     __ dadd(AT, SSR, AT);
   649     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);
   651     __ gsldle(FSR, FSR, AT);
   652   } else {
   653     index_check(SSR, FSR);
   654     __ dsll(AT, FSR, Address::times_8);
   655     __ dadd(AT, SSR, AT);
   656     __ ld(FSR, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);
   657   }
   658 }
   660 void TemplateTable::faload() {
   661   transition(itos, ftos);
   662   if(UseBoundCheckInstruction) {
   663     __ pop(SSR); //SSR:array    FSR: index
   664     __ shl(FSR, 2);
   665     __ dadd(FSR, SSR, FSR);
   666     __ addi(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
   668     __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
   669     __ shl(AT, 2);
   670     __ dadd(AT, SSR, AT);
   671     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
   673     __ gslwlec1(FSF, FSR, AT);
   674   } else {
   675     index_check(SSR, FSR);
   676     __ shl(FSR, 2);
   677     __ dadd(FSR, SSR, FSR);
   678     __ lwc1(FSF, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
   679   }
   680 }
   682 void TemplateTable::daload() {
   683   transition(itos, dtos);
   684   if(UseBoundCheckInstruction) {
   685     __ pop(SSR); //SSR:array    FSR: index
   686     __ dsll(FSR, FSR, 3);
   687     __ dadd(FSR, SSR, FSR);
   688     __ addi(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
   690     __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
   691     __ dsll(AT, AT, 3);
   692     __ dadd(AT, SSR, AT);
   693     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
   695     __ gsldlec1(FSF, FSR, AT);
   696   } else {
   697     index_check(SSR, FSR);
   698     __ dsll(AT, FSR, 3);
   699     __ dadd(AT, SSR, AT);
   700     __ ldc1(FSF, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
   701   }
   702 }
   704 void TemplateTable::aaload() {
   705   transition(itos, atos);
   706   index_check(SSR, FSR);
   707   __ dsll(FSR, FSR, UseCompressedOops ? Address::times_4 : Address::times_8);
   708   __ dadd(FSR, SSR, FSR);
   709   //add for compressedoops
   710   __ load_heap_oop(FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
   711 }
   713 void TemplateTable::baload() {
   714   transition(itos, itos);
   715   if(UseBoundCheckInstruction) {
   716     __ pop(SSR); //SSR:array   FSR:index
   717     __ dadd(FSR, SSR, FSR);
   718     __ addi(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //base
   720     __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());
   721     __ dadd(AT, SSR, AT);
   722     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //bound
   724     __ gslble(FSR, FSR, AT);
   725   } else {
   726     index_check(SSR, FSR);
   727     __ dadd(FSR, SSR, FSR);
   728     __ lb(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));
   729   }
   730 }
   732 void TemplateTable::caload() {
   733   transition(itos, itos);
   734   index_check(SSR, FSR);
   735   __ dsll(FSR, FSR, Address::times_2);
   736   __ dadd(FSR, SSR, FSR);
   737   __ lhu(FSR, FSR,  arrayOopDesc::base_offset_in_bytes(T_CHAR));
   738 }
   740 // iload followed by caload frequent pair
   741 // used register : T2
   742 // T2 : index
   743 void TemplateTable::fast_icaload() {
   744   transition(vtos, itos);
   745   // load index out of locals
   746   locals_index(T2);
   747   __ lw(FSR, T2, 0);
   748   index_check(SSR, FSR);
   749   __ dsll(FSR, FSR, 1);
   750   __ dadd(FSR, SSR, FSR);
   751   __ lhu(FSR, FSR,  arrayOopDesc::base_offset_in_bytes(T_CHAR));
   752 }
   754 void TemplateTable::saload() {
   755   transition(itos, itos);
   756   if(UseBoundCheckInstruction) {
   757     __ pop(SSR); //SSR:array    FSR: index
   758     __ dsll(FSR, FSR, Address::times_2);
   759     __ dadd(FSR, SSR, FSR);
   760     __ addi(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT));
   762     __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
   763     __ dsll(AT, AT, Address::times_2);
   764     __ dadd(AT, SSR, AT);
   765     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_SHORT));
   767     __ gslhle(FSR, FSR, AT);
   768   } else {
   769     index_check(SSR, FSR);
   770     __ dsll(FSR, FSR, Address::times_2);
   771     __ dadd(FSR, SSR, FSR);
   772     __ lh(FSR, FSR,  arrayOopDesc::base_offset_in_bytes(T_SHORT));
   773   }
   774 }
   776 void TemplateTable::iload(int n) {
   777   transition(vtos, itos);
   778   __ lw(FSR, iaddress(n));
   779 }
   781 void TemplateTable::lload(int n) {
   782   transition(vtos, ltos);
   783   __ ld(FSR, laddress(n));
   784 }
   786 void TemplateTable::fload(int n) {
   787   transition(vtos, ftos);
   788   __ lwc1(FSF, faddress(n));
   789 }
   791 void TemplateTable::dload(int n) {
   792   transition(vtos, dtos);
   793   __ ldc1(FSF, laddress(n));
   794 }
   796 void TemplateTable::aload(int n) {
   797   transition(vtos, atos);
   798   __ ld(FSR, aaddress(n));
   799 }
   801 // used register : T2, T3
   802 // T2 : bytecode
   803 // T3 : folded code
   804 void TemplateTable::aload_0() {
   805   transition(vtos, atos);
   806   // According to bytecode histograms, the pairs:
   807   //
   808   // _aload_0, _fast_igetfield
   809   // _aload_0, _fast_agetfield
   810   // _aload_0, _fast_fgetfield
   811   //
   812   // occur frequently. If RewriteFrequentPairs is set, the (slow)
   813   // _aload_0 bytecode checks if the next bytecode is either
   814   // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
   815   // rewrites the current bytecode into a pair bytecode; otherwise it
   816   // rewrites the current bytecode into _fast_aload_0 that doesn't do
   817   // the pair check anymore.
   818   //
   819   // Note: If the next bytecode is _getfield, the rewrite must be
   820   //       delayed, otherwise we may miss an opportunity for a pair.
   821   //
   822   // Also rewrite frequent pairs
   823   //   aload_0, aload_1
   824   //   aload_0, iload_1
   825   // These bytecodes with a small amount of code are most profitable
   826   // to rewrite
   827   if (RewriteFrequentPairs) {
   828     Label rewrite, done;
   829     // get the next bytecode in T2
   830     __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
   832     // do actual aload_0
   833     aload(0);
   835     // if _getfield then wait with rewrite
   836     __ move(AT, Bytecodes::_getfield);
   837     __ beq(AT, T2, done);
   838     __ delayed()->nop();
   840     // if _igetfield then reqrite to _fast_iaccess_0
   841     assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) ==
   842         Bytecodes::_aload_0,
   843         "fix bytecode definition");
   844     __ move(T3, Bytecodes::_fast_iaccess_0);
   845     __ move(AT, Bytecodes::_fast_igetfield);
   846     __ beq(AT, T2, rewrite);
   847     __ delayed()->nop();
   849     // if _agetfield then reqrite to _fast_aaccess_0
   850     assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) ==
   851         Bytecodes::_aload_0,
   852         "fix bytecode definition");
   853     __ move(T3, Bytecodes::_fast_aaccess_0);
   854     __ move(AT, Bytecodes::_fast_agetfield);
   855     __ beq(AT, T2, rewrite);
   856     __ delayed()->nop();
   858     // if _fgetfield then reqrite to _fast_faccess_0
   859     assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) ==
   860         Bytecodes::_aload_0,
   861         "fix bytecode definition");
   862     __ move(T3, Bytecodes::_fast_faccess_0);
   863     __ move(AT, Bytecodes::_fast_fgetfield);
   864     __ beq(AT, T2, rewrite);
   865     __ delayed()->nop();
   867     // else rewrite to _fast_aload0
   868     assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) ==
   869         Bytecodes::_aload_0,
   870         "fix bytecode definition");
   871     __ move(T3, Bytecodes::_fast_aload_0);
   873     // rewrite
   874     __ bind(rewrite);
   875     patch_bytecode(Bytecodes::_aload_0, T3, T2, false);
   877     __ bind(done);
   878   } else {
   879     aload(0);
   880   }
   881 }
   883 void TemplateTable::istore() {
   884   transition(itos, vtos);
   885   locals_index(T2);
   886   __ sw(FSR, T2, 0);
   887 }
   889 void TemplateTable::lstore() {
   890   transition(ltos, vtos);
   891   locals_index(T2);
   892   __ sd(FSR, T2, -wordSize);
   893 }
   895 void TemplateTable::fstore() {
   896   transition(ftos, vtos);
   897   locals_index(T2);
   898   __ swc1(FSF, T2, 0);
   899 }
   901 void TemplateTable::dstore() {
   902   transition(dtos, vtos);
   903   locals_index(T2);
   904   __ sdc1(FSF, T2, -wordSize);
   905 }
   907 void TemplateTable::astore() {
   908   transition(vtos, vtos);
   909   __ pop_ptr(FSR);
   910   locals_index(T2);
   911   __ sd(FSR, T2, 0);
   912 }
   914 void TemplateTable::wide_istore() {
   915   transition(vtos, vtos);
   916   __ pop_i(FSR);
   917   locals_index_wide(T2);
   918   __ sd(FSR, T2, 0);
   919 }
   921 void TemplateTable::wide_lstore() {
   922   transition(vtos, vtos);
   923   __ pop_l(FSR);
   924   locals_index_wide(T2);
   925   __ sd(FSR, T2, -wordSize);
   926 }
   928 void TemplateTable::wide_fstore() {
   929   wide_istore();
   930 }
   932 void TemplateTable::wide_dstore() {
   933   wide_lstore();
   934 }
   936 void TemplateTable::wide_astore() {
   937   transition(vtos, vtos);
   938   __ pop_ptr(FSR);
   939   locals_index_wide(T2);
   940   __ sd(FSR, T2, 0);
   941 }
   943 // used register : T2
   944 void TemplateTable::iastore() {
   945   transition(itos, vtos);
   946   __ pop_i(SSR);   // T2: array  SSR: index
   947   if(UseBoundCheckInstruction) {
   948     __ pop_ptr(T2);
   949     __ dsll(SSR, SSR, Address::times_4);
   950     __ dadd(SSR, T2, SSR);
   951     __ addi(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_INT));  // base
   953     __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
   954     __ dsll(AT, AT, Address::times_4);
   955     __ dadd(AT, T2, AT);
   956     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT));  //bound
   958     __ gsswle(FSR, SSR, AT);
   959   } else {
   960     index_check(T2, SSR);  // prefer index in ebx
   961     __ dsll(SSR, SSR, Address::times_4);
   962     __ dadd(T2, T2, SSR);
   963     __ sw(FSR, T2, arrayOopDesc::base_offset_in_bytes(T_INT));
   964   }
   965 }
   969 // used register T2, T3
   970 void TemplateTable::lastore() {
   971   transition(ltos, vtos);
   972   __ pop_i (T2);
   973   if(UseBoundCheckInstruction) {
   974     __ pop_ptr(T3);
   975     __ dsll(T2, T2, Address::times_8);
   976     __ dadd(T2, T3, T2);
   977     __ addi(T2, T2, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);  // base
   979     __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes());
   980     __ dsll(AT, AT, Address::times_8);
   981     __ dadd(AT, T3, AT);
   982     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);  //bound
   984     __ gssdle(FSR, T2, AT);
   985   } else {
   986     index_check(T3, T2);
   987     __ dsll(T2, T2, Address::times_8);
   988     __ dadd(T3, T3, T2);
   989     __ sd(FSR, T3, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);
   990   }
   991 }
   993 // used register T2
   994 void TemplateTable::fastore() {
   995   transition(ftos, vtos);
   996   __ pop_i(SSR);
   997   if(UseBoundCheckInstruction) {
   998     __ pop_ptr(T2);
   999     __ dsll(SSR, SSR, Address::times_4);
  1000     __ dadd(SSR, T2, SSR);
  1001     __ addi(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));  // base
  1003     __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
  1004     __ dsll(AT, AT, Address::times_4);
  1005     __ dadd(AT, T2, AT);
  1006     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT));  //bound
  1008     __ gsswlec1(FSF, SSR, AT);
  1009   } else {
  1010     index_check(T2, SSR);
  1011     __ dsll(SSR, SSR, Address::times_4);
  1012     __ dadd(T2, T2, SSR);
  1013     __ swc1(FSF, T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
  1017 // used register T2, T3
  1018 void TemplateTable::dastore() {
  1019   transition(dtos, vtos);
  1020   __ pop_i (T2);
  1021   if(UseBoundCheckInstruction) {
  1022     __ pop_ptr(T3);
  1023     __ dsll(T2, T2, Address::times_8);
  1024     __ dadd(T2, T3, T2);
  1025     __ addi(T2, T2, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);  // base
  1027     __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes());
  1028     __ dsll(AT, AT, Address::times_8);
  1029     __ dadd(AT, T3, AT);
  1030     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);  //bound
  1032     __ gssdlec1(FSF, T2, AT);
  1033   } else {
  1034     index_check(T3, T2);
  1035     __ dsll(T2, T2, Address::times_8);
  1036     __ daddu(T3, T3, T2);
  1037     __ sdc1(FSF, T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
  1041 // used register : T2, T3, T8
  1042 // T2 : array
  1043 // T3 : subklass
  1044 // T8 : supklass
  1045 void TemplateTable::aastore() {
  1046   Label is_null, ok_is_subtype, done;
  1047   transition(vtos, vtos);
  1048   // stack: ..., array, index, value
  1049   __ ld(FSR, at_tos());     // Value
  1050   __ lw(SSR, at_tos_p1());  // Index
  1051   __ ld(T2, at_tos_p2());  // Array
  1053   // index_check(T2, SSR);
  1054   index_check_without_pop(T2, SSR);
  1055   // do array store check - check for NULL value first
  1056   __ beq(FSR, R0, is_null);
  1057   __ delayed()->nop();
  1059   // Move subklass into T3
  1060   //add for compressedoops
  1061   __ load_klass(T3, FSR);
  1062   // Move superklass into T8
  1063   //add for compressedoops
  1064   __ load_klass(T8, T2);
  1065   __ ld(T8, Address(T8,  ObjArrayKlass::element_klass_offset()));
  1066   // Compress array+index*4+12 into a single register. T2
  1067   __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8);
  1068   __ dadd(T2, T2, AT);
  1069   __ daddi(T2, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
  1071   // Generate subtype check.
  1072   // Superklass in T8.  Subklass in T3.
  1073   __ gen_subtype_check(T8, T3, ok_is_subtype);        // <-- Jin
  1074   // Come here on failure
  1075   // object is at FSR
  1076   __ jmp(Interpreter::_throw_ArrayStoreException_entry);    // <-- Jin
  1077   __ delayed()->nop();
  1078   // Come here on success
  1079   __ bind(ok_is_subtype);
  1080   //replace with do_oop_store->store_heap_oop
  1081   __ store_heap_oop(Address(T2, 0), FSR);          // <-- Jin
  1082   __ store_check(T2);
  1083   __ b(done);
  1084   __ delayed()->nop();
  1086   // Have a NULL in FSR, EDX=T2, SSR=index.  Store NULL at ary[idx]
  1087   __ bind(is_null);
  1088   __ profile_null_seen(T9);
  1089   __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8);
  1090   __ dadd(T2, T2, AT);
  1091   __ store_heap_oop(Address(T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), FSR);  /* FSR is null here */
  1093   __ bind(done);
  1094   __ daddi(SP, SP, 3 * Interpreter::stackElementSize);
  1097 void TemplateTable::bastore() {
  1098   transition(itos, vtos);
  1099   __ pop_i(SSR);
  1100   if(UseBoundCheckInstruction) {
  1101     __ pop_ptr(T2);
  1102     __ dadd(SSR, T2, SSR);
  1103     __ addi(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));  // base
  1105     __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
  1106     __ dadd(AT, T2, AT);
  1107     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE));  //bound
  1109     __ gssble(FSR, SSR, AT);
  1110   } else {
  1111     index_check(T2, SSR);
  1112     __ dadd(SSR, T2, SSR);
  1113     __ sb(FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));
  1117 void TemplateTable::castore() {
  1118   transition(itos, vtos);
  1119   __ pop_i(SSR);
  1120   if(UseBoundCheckInstruction) {
  1121     __ pop_ptr(T2);
  1122     __ dsll(SSR, SSR, Address::times_2);
  1123     __ dadd(SSR, T2, SSR);
  1124     __ addi(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR));  // base
  1126     __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
  1127     __ dsll(AT, AT, Address::times_2);
  1128     __ dadd(AT, T2, AT);
  1129     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_CHAR));  //bound
  1131     __ gsshle(FSR, SSR, AT);
  1132   } else {
  1133     index_check(T2, SSR);
  1134     __ dsll(SSR, SSR, Address::times_2);
  1135     __ dadd(SSR, T2, SSR);
  1136     __ sh(FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR));
  1140 void TemplateTable::sastore() {
  1141   castore();
  1144 void TemplateTable::istore(int n) {
  1145   transition(itos, vtos);
  1146   __ sw(FSR, iaddress(n));
  1149 void TemplateTable::lstore(int n) {
  1150   transition(ltos, vtos);
  1151   __ sd(FSR, laddress(n));
  1154 void TemplateTable::fstore(int n) {
  1155   transition(ftos, vtos);
  1156   __ swc1(FSF, faddress(n));
  1159 void TemplateTable::dstore(int n) {
  1160   transition(dtos, vtos);
  1161   __ sdc1(FSF, laddress(n));
  1164 void TemplateTable::astore(int n) {
  1165   transition(vtos, vtos);
  1166   __ pop_ptr(FSR);
  1167   __ sd(FSR, aaddress(n));
  1170 void TemplateTable::pop() {
  1171   transition(vtos, vtos);
  1172   __ daddi(SP, SP, Interpreter::stackElementSize);
  1175 void TemplateTable::pop2() {
  1176   transition(vtos, vtos);
  1177   __ daddi(SP, SP, 2 * Interpreter::stackElementSize);
  1180 void TemplateTable::dup() {
  1181   transition(vtos, vtos);
  1182   // stack: ..., a
  1183   __ load_ptr(0, FSR);
  1184   __ push_ptr(FSR);
  1185   // stack: ..., a, a
  1188 // blows FSR
  1189 void TemplateTable::dup_x1() {
  1190   transition(vtos, vtos);
  1191   // stack: ..., a, b
  1192   __ load_ptr(0, FSR);  // load b
  1193   __ load_ptr(1, A5);  // load a
  1194   __ store_ptr(1, FSR); // store b
  1195   __ store_ptr(0, A5); // store a
  1196   __ push_ptr(FSR);             // push b
  1197   // stack: ..., b, a, b
  1200 // blows FSR
  1201 void TemplateTable::dup_x2() {
  1202   transition(vtos, vtos);
  1203   // stack: ..., a, b, c
  1204   __ load_ptr(0, FSR);  // load c
  1205   __ load_ptr(2, A5);  // load a
  1206   __ store_ptr(2, FSR); // store c in a
  1207   __ push_ptr(FSR);             // push c
  1208   // stack: ..., c, b, c, c
  1209   __ load_ptr(2, FSR);  // load b
  1210   __ store_ptr(2, A5); // store a in b
  1211   // stack: ..., c, a, c, c
  1212   __ store_ptr(1, FSR); // store b in c
  1213   // stack: ..., c, a, b, c
  1216 // blows FSR
  1217 void TemplateTable::dup2() {
  1218   transition(vtos, vtos);
  1219   // stack: ..., a, b
  1220   __ load_ptr(1, FSR);  // load a
  1221   __ push_ptr(FSR);             // push a
  1222   __ load_ptr(1, FSR);  // load b
  1223   __ push_ptr(FSR);             // push b
  1224   // stack: ..., a, b, a, b
  1227 // blows FSR
  1228 void TemplateTable::dup2_x1() {
  1229   transition(vtos, vtos);
  1230   // stack: ..., a, b, c
  1231   __ load_ptr(0, T2);  // load c
  1232   __ load_ptr(1, FSR);  // load b
  1233   __ push_ptr(FSR);             // push b
  1234   __ push_ptr(T2);             // push c
  1235   // stack: ..., a, b, c, b, c
  1236   __ store_ptr(3, T2); // store c in b
  1237   // stack: ..., a, c, c, b, c
  1238   __ load_ptr(4, T2);  // load a
  1239   __ store_ptr(2, T2); // store a in 2nd c
  1240   // stack: ..., a, c, a, b, c
  1241   __ store_ptr(4, FSR); // store b in a
  1242   // stack: ..., b, c, a, b, c
  1244   // stack: ..., b, c, a, b, c
  1247 // blows FSR, SSR
  1248 void TemplateTable::dup2_x2() {
  1249   transition(vtos, vtos);
  1250   // stack: ..., a, b, c, d
  1251   // stack: ..., a, b, c, d
  1252   __ load_ptr(0, T2);  // load d
  1253   __ load_ptr(1, FSR);  // load c
  1254   __ push_ptr(FSR);             // push c
  1255   __ push_ptr(T2);             // push d
  1256   // stack: ..., a, b, c, d, c, d
  1257   __ load_ptr(4, FSR);  // load b
  1258   __ store_ptr(2, FSR); // store b in d
  1259   __ store_ptr(4, T2); // store d in b
  1260   // stack: ..., a, d, c, b, c, d
  1261   __ load_ptr(5, T2);  // load a
  1262   __ load_ptr(3, FSR);  // load c
  1263   __ store_ptr(3, T2); // store a in c
  1264   __ store_ptr(5, FSR); // store c in a
  1265   // stack: ..., c, d, a, b, c, d
  1267   // stack: ..., c, d, a, b, c, d
  1270 // blows FSR
  1271 void TemplateTable::swap() {
  1272   transition(vtos, vtos);
  1273   // stack: ..., a, b
  1275   __ load_ptr(1, A5);  // load a
  1276   __ load_ptr(0, FSR);  // load b
  1277   __ store_ptr(0, A5); // store a in b
  1278   __ store_ptr(1, FSR); // store b in a
  1280   // stack: ..., b, a
  1283 void TemplateTable::iop2(Operation op) {
  1284   transition(itos, itos);
  1286   __ pop_i(SSR);
  1287   switch (op) {
  1288     case add  : __ addu32(FSR, SSR, FSR); break;
  1289     case sub  : __ subu32(FSR, SSR, FSR); break;
  1290     case mul  : __ mul(FSR, SSR, FSR);    break;
  1291     case _and : __ andr(FSR, SSR, FSR);   break;
  1292     case _or  : __ orr(FSR, SSR, FSR);    break;
  1293     case _xor : __ xorr(FSR, SSR, FSR);   break;
  1294     case shl  : __ sllv(FSR, SSR, FSR);   break; // implicit masking of lower 5 bits by Intel shift instr. mips also
  1295     case shr  : __ srav(FSR, SSR, FSR);   break; // implicit masking of lower 5 bits by Intel shift instr. mips also
  1296     case ushr : __ srlv(FSR, SSR, FSR);   break; // implicit masking of lower 5 bits by Intel shift instr. mips also
  1297     default   : ShouldNotReachHere();
  1301 // the result stored in FSR, SSR,
  1302 // used registers : T2, T3
  1303 void TemplateTable::lop2(Operation op) {
  1304   transition(ltos, ltos);
  1305   __ pop_l(T2, T3);
  1306 #ifdef ASSERT
  1308     Label  L;
  1309     __ beq(T3, R0, L);
  1310     __ delayed()->nop();
  1311     __ bind(L);
  1313 #endif
  1314   switch (op) {
  1315     case add : __ daddu(FSR, T2, FSR); break;
  1316     case sub : __ dsubu(FSR, T2, FSR); break;
  1317     case _and: __ andr(FSR, T2, FSR);  break;
  1318     case _or : __ orr(FSR, T2, FSR);   break;
  1319     case _xor: __ xorr(FSR, T2, FSR);  break;
  1320     default : ShouldNotReachHere();
  1324 // java require this bytecode could handle 0x80000000/-1, dont cause a overflow exception,
  1325 // the result is 0x80000000
  1326 // the godson2 cpu do the same, so we need not handle this specially like x86
  1327 void TemplateTable::idiv() {
  1328   transition(itos, itos);
  1329   Label not_zero;
  1331   __ bne(FSR, R0, not_zero);
  1332   __ delayed()->nop();
  1333   __ jmp(Interpreter::_throw_ArithmeticException_entry);
  1334   __ delayed()->nop();
  1335   __ bind(not_zero);
  1337   __ pop_i(SSR);
  1338   if (UseLoongsonISA) {
  1339     __ gsdiv(FSR, SSR, FSR);
  1340   } else {
  1341     __ div(SSR, FSR);
  1342     __ mflo(FSR);
  1346 void TemplateTable::irem() {
  1347   transition(itos, itos);
  1348   Label not_zero;
  1349   __ pop_i(SSR);
  1350   __ div(SSR, FSR);
  1352   __ bne(FSR, R0, not_zero);
  1353   __ delayed()->nop();
  1354   //__ brk(7);
  1355   __ jmp(Interpreter::_throw_ArithmeticException_entry);
  1356   __ delayed()->nop();
  1358   __ bind(not_zero);
  1359   __ mfhi(FSR);
  1362 void TemplateTable::lmul() {
  1363   transition(ltos, ltos);
  1364   __ pop_l(T2);
  1365   if(UseLoongsonISA){
  1366     __ gsdmult(FSR, T2, FSR);
  1367   } else {
  1368     __ dmult(T2, FSR);
  1369     __ mflo(FSR);
  1373 // NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
  1374 void TemplateTable::ldiv() {
  1375   transition(ltos, ltos);
  1376   Label normal;
  1378   __ bne(FSR, R0, normal);
  1379   __ delayed()->nop();
  1381   //__ brk(7);    //generate FPE
  1382   __ jmp(Interpreter::_throw_ArithmeticException_entry);
  1383   __ delayed()->nop();
  1385   __ bind(normal);
  1386   __ pop_l(A2, A3);
  1387   if (UseLoongsonISA) {
  1388     __ gsddiv(FSR, A2, FSR);
  1389   } else {
  1390     __ ddiv(A2, FSR);
  1391     __ mflo(FSR);
  1395 // NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
  1396 void TemplateTable::lrem() {
  1397   transition(ltos, ltos);
  1398   Label normal;
  1400   __ bne(FSR, R0, normal);
  1401   __ delayed()->nop();
  1403   __ jmp(Interpreter::_throw_ArithmeticException_entry);
  1404   __ delayed()->nop();
  1406   __ bind(normal);
  1407   __ pop_l (A2, A3);
  1409   if(UseLoongsonISA){
  1410     __ gsdmod(FSR, A2, FSR);
  1411   } else {
  1412     __ ddiv(A2, FSR);
  1413     __ mfhi(FSR);
  1417 // result in FSR
  1418 // used registers : T0
  1419 void TemplateTable::lshl() {
  1420   transition(itos, ltos);
  1421   __ pop_l(T0, T1);
  1422 #ifdef ASSERT
  1424     Label  L;
  1425     __ beq(T1, R0, L);
  1426     __ delayed()->nop();
  1427     //__ stop("lshl, wrong stack");  // <-- Fu 20130930
  1428     __ bind(L);
  1430 #endif
  1431   __ dsllv(FSR, T0, FSR);
  1434 // used registers : T0
  1435 void TemplateTable::lshr() {
  1436   transition(itos, ltos);
  1437   __ pop_l(T0, T1);
  1438 #ifdef ASSERT
  1440     Label  L;
  1441     __ beq(T1, R0, L);
  1442     __ delayed()->nop();
  1443     __ stop("lshr, wrong stack");
  1444     __ bind(L);
  1446 #endif
  1447   __ dsrav(FSR, T0, FSR);
  1450 // used registers : T0
  1451 void TemplateTable::lushr() {
  1452   transition(itos, ltos);
  1453   __ pop_l(T0, T1);
  1454 #ifdef ASSERT
  1456     Label  L;
  1457     __ beq(T1, R0, L);
  1458     __ delayed()->nop();
  1459     __ stop("lushr, wrong stack");
  1460     __ bind(L);
  1462 #endif
  1463   __ dsrlv(FSR, T0, FSR);
  1466 // result in FSF
  1467 void TemplateTable::fop2(Operation op) {
  1468   transition(ftos, ftos);
  1469   switch (op) {
  1470     case add:
  1471       __ lwc1(FTF, at_sp());
  1472       __ add_s(FSF, FTF, FSF);
  1473       break;
  1474     case sub:
  1475       __ lwc1(FTF, at_sp());
  1476       __ sub_s(FSF, FTF, FSF);
  1477       break;
  1478     case mul:
  1479       __ lwc1(FTF, at_sp());
  1480       __ mul_s(FSF, FTF, FSF);
  1481       break;
  1482     case div:
  1483       __ lwc1(FTF, at_sp());
  1484       __ div_s(FSF, FTF, FSF);
  1485       break;
  1486     case rem:
  1487       __ mov_s(F13, FSF);
  1488       __ lwc1(F12, at_sp());
  1489        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2);
  1490       break;
  1491     default : ShouldNotReachHere();
  1494   __ daddi(SP, SP, 1 * wordSize);
  1497 // result in SSF||FSF
  1498 // i dont handle the strict flags
  1499 void TemplateTable::dop2(Operation op) {
  1500   transition(dtos, dtos);
  1501   switch (op) {
  1502     case add:
  1503       __ ldc1(FTF, at_sp());
  1504       __ add_d(FSF, FTF, FSF);
  1505       break;
  1506     case sub:
  1507       __ ldc1(FTF, at_sp());
  1508       __ sub_d(FSF, FTF, FSF);
  1509       break;
  1510     case mul:
  1511       __ ldc1(FTF, at_sp());
  1512       __ mul_d(FSF, FTF, FSF);
  1513       break;
  1514     case div:
  1515       __ ldc1(FTF, at_sp());
  1516       __ div_d(FSF, FTF, FSF);
  1517       break;
  1518     case rem:
  1519       __ mov_d(F13, FSF);
  1520       __ ldc1(F12, at_sp());
  1521       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2);
  1522       break;
  1523     default : ShouldNotReachHere();
  1526   __ daddi(SP, SP, 2 * wordSize);
  1529 void TemplateTable::ineg() {
  1530   transition(itos, itos);
  1531   __ neg(FSR);
  1534 void TemplateTable::lneg() {
  1535   transition(ltos, ltos);
  1536   __ dsubu(FSR, R0, FSR);
  1539 void TemplateTable::fneg() {
  1540   transition(ftos, ftos);
  1541   __ neg_s(FSF, FSF);
  1544 void TemplateTable::dneg() {
  1545   transition(dtos, dtos);
  1546   __ neg_d(FSF, FSF);
  1549 // used registers : T2
  1550 void TemplateTable::iinc() {
  1551   transition(vtos, vtos);
  1552   locals_index(T2);
  1553   __ lw(FSR, T2, 0);
  1554   __ lb(AT, at_bcp(2));           // get constant
  1555   __ daddu(FSR, FSR, AT);
  1556   __ sw(FSR, T2, 0);
  1559 // used register : T2
  1560 void TemplateTable::wide_iinc() {
  1561   transition(vtos, vtos);
  1562   locals_index_wide(T2);
  1563   __ get_2_byte_integer_at_bcp(FSR, AT, 4);
  1564   __ hswap(FSR);
  1565   __ lw(AT, T2, 0);
  1566   __ daddu(FSR, AT, FSR);
  1567   __ sw(FSR, T2, 0);
  1570 void TemplateTable::convert() {
  1571   // Checking
  1572 #ifdef ASSERT
  1574     TosState tos_in  = ilgl;
  1575     TosState tos_out = ilgl;
  1576     switch (bytecode()) {
  1577       case Bytecodes::_i2l: // fall through
  1578       case Bytecodes::_i2f: // fall through
  1579       case Bytecodes::_i2d: // fall through
  1580       case Bytecodes::_i2b: // fall through
  1581       case Bytecodes::_i2c: // fall through
  1582       case Bytecodes::_i2s: tos_in = itos; break;
  1583       case Bytecodes::_l2i: // fall through
  1584       case Bytecodes::_l2f: // fall through
  1585       case Bytecodes::_l2d: tos_in = ltos; break;
  1586       case Bytecodes::_f2i: // fall through
  1587       case Bytecodes::_f2l: // fall through
  1588       case Bytecodes::_f2d: tos_in = ftos; break;
  1589       case Bytecodes::_d2i: // fall through
  1590       case Bytecodes::_d2l: // fall through
  1591       case Bytecodes::_d2f: tos_in = dtos; break;
  1592       default             : ShouldNotReachHere();
  1594     switch (bytecode()) {
  1595       case Bytecodes::_l2i: // fall through
  1596       case Bytecodes::_f2i: // fall through
  1597       case Bytecodes::_d2i: // fall through
  1598       case Bytecodes::_i2b: // fall through
  1599       case Bytecodes::_i2c: // fall through
  1600       case Bytecodes::_i2s: tos_out = itos; break;
  1601       case Bytecodes::_i2l: // fall through
  1602       case Bytecodes::_f2l: // fall through
  1603       case Bytecodes::_d2l: tos_out = ltos; break;
  1604       case Bytecodes::_i2f: // fall through
  1605       case Bytecodes::_l2f: // fall through
  1606       case Bytecodes::_d2f: tos_out = ftos; break;
  1607       case Bytecodes::_i2d: // fall through
  1608       case Bytecodes::_l2d: // fall through
  1609       case Bytecodes::_f2d: tos_out = dtos; break;
  1610       default             : ShouldNotReachHere();
  1612     transition(tos_in, tos_out);
  1614 #endif // ASSERT
  1616   // Conversion
  1617   // (Note: use pushl(ecx)/popl(ecx) for 1/2-word stack-ptr manipulation)
  1618   switch (bytecode()) {
  1619     case Bytecodes::_i2l:
  1620       __ sll(FSR, FSR, 0);
  1621       break;
  1622     case Bytecodes::_i2f:
  1623       __ mtc1(FSR, FSF);
  1624       __ cvt_s_w(FSF, FSF);
  1625       break;
  1626     case Bytecodes::_i2d:
  1627       __ mtc1(FSR, FSF);
  1628       __ cvt_d_w(FSF, FSF);
  1629       break;
  1630     case Bytecodes::_i2b:
  1631       __ seb(FSR, FSR);
  1632       break;
  1633     case Bytecodes::_i2c:
  1634       __ andi(FSR, FSR, 0xFFFF);  // truncate upper 56 bits
  1635       break;
  1636     case Bytecodes::_i2s:
  1637       __ seh(FSR, FSR);
  1638       break;
  1639     case Bytecodes::_l2i:
  1640       __ sll(FSR, FSR, 0);
  1641       break;
  1642     case Bytecodes::_l2f:
  1643       __ dmtc1(FSR, FSF);
  1644       __ cvt_s_l(FSF, FSF);
  1645       break;
  1646     case Bytecodes::_l2d:
  1647       __ dmtc1(FSR, FSF);
  1648       __ cvt_d_l(FSF, FSF);
  1649       break;
  1650     case Bytecodes::_f2i:
  1652       Label L;
  1654       __ trunc_w_s(F12, FSF);
  1655       __ move(AT, 0x7fffffff);
  1656       __ mfc1(FSR, F12);
  1657       __ c_un_s(FSF, FSF);    //NaN?
  1658       __ movt(FSR, R0);
  1660       __ bne(AT, FSR, L);
  1661       __ delayed()->lui(T9, 0x8000);
  1663       __ mfc1(AT, FSF);
  1664       __ andr(AT, AT, T9);
  1666       __ movn(FSR, T9, AT);
  1668       __ bind(L);
  1670       break;
  1671     case Bytecodes::_f2l:
  1673       Label L;
  1675       __ trunc_l_s(F12, FSF);
  1676       __ daddiu(AT, R0, -1);
  1677       __ dsrl(AT, AT, 1);
  1678       __ dmfc1(FSR, F12);
  1679       __ c_un_s(FSF, FSF);    //NaN?
  1680       __ movt(FSR, R0);
  1682       __ bne(AT, FSR, L);
  1683       __ delayed()->lui(T9, 0x8000);
  1685       __ mfc1(AT, FSF);
  1686       __ andr(AT, AT, T9);
  1688       __ dsll32(T9, T9, 0);
  1689       __ movn(FSR, T9, AT);
  1691       __ bind(L);
  1693       break;
  1694     case Bytecodes::_f2d:
  1695       __ cvt_d_s(FSF, FSF);
  1696       break;
  1697     case Bytecodes::_d2i:
  1699       Label L;
  1701       __ trunc_w_d(F12, FSF);
  1702       __ move(AT, 0x7fffffff);
  1703       __ mfc1(FSR, F12);
  1705       __ bne(FSR, AT, L);
  1706       __ delayed()->mtc1(R0, F12);
  1708       __ cvt_d_w(F12, F12);
  1709       __ c_ult_d(FSF, F12);
  1710       __ bc1f(L);
  1711       __ delayed()->addiu(T9, R0, -1);
  1713       __ c_un_d(FSF, FSF);    //NaN?
  1714       __ subu32(FSR, T9, AT);
  1715       __ movt(FSR, R0);
  1717       __ bind(L);
  1719       break;
  1720     case Bytecodes::_d2l:
  1722       Label L;
  1724       __ trunc_l_d(F12, FSF);
  1725       __ daddiu(AT, R0, -1);
  1726       __ dsrl(AT, AT, 1);
  1727       __ dmfc1(FSR, F12);
  1729       __ bne(FSR, AT, L);
  1730       __ delayed()->mtc1(R0, F12);
  1732       __ cvt_d_w(F12, F12);
  1733       __ c_ult_d(FSF, F12);
  1734       __ bc1f(L);
  1735       __ delayed()->daddiu(T9, R0, -1);
  1737       __ c_un_d(FSF, FSF);    //NaN?
  1738       __ subu(FSR, T9, AT);
  1739       __ movt(FSR, R0);
  1741     __ bind(L);
  1743       break;
  1744     case Bytecodes::_d2f:
  1745       __ cvt_s_d(FSF, FSF);
  1746       break;
  1747     default             :
  1748       ShouldNotReachHere();
  1752 void TemplateTable::lcmp() {
  1753   transition(ltos, itos);
  1755   Label low, high, done;
  1756   __ pop(T0);
  1757   __ pop(R0);
  1758   __ slt(AT, T0, FSR);
  1759   __ bne(AT, R0, low);
  1760   __ delayed()->nop();
  1762   __ bne(T0, FSR, high);
  1763   __ delayed()->nop();
  1765   __ li(FSR, (long)0);
  1766   __ b(done);
  1767   __ delayed()->nop();
  1769   __ bind(low);
  1770   __ li(FSR, (long)-1);
  1771   __ b(done);
  1772   __ delayed()->nop();
  1774   __ bind(high);
  1775   __ li(FSR, (long)1);
  1776   __ b(done);
  1777   __ delayed()->nop();
  1779   __ bind(done);
  1782 void TemplateTable::float_cmp(bool is_float, int unordered_result) {
  1783   Label less, done;
  1785   __ move(FSR, R0);
  1787   if (is_float) {
  1788     __ lwc1(FTF, at_sp());
  1789     __ c_eq_s(FTF, FSF);
  1790     __ bc1t(done);
  1791     __ delayed()->daddi(SP, SP, 1 * wordSize);
  1793     if (unordered_result<0)
  1794       __ c_ult_s(FTF, FSF);
  1795     else
  1796       __ c_olt_s(FTF, FSF);
  1797   } else {
  1798     __ ldc1(FTF, at_sp());
  1799     __ c_eq_d(FTF, FSF);
  1800     __ bc1t(done);
  1801     __ delayed()->daddi(SP, SP, 2 * wordSize);
  1803     if (unordered_result<0)
  1804       __ c_ult_d(FTF, FSF);
  1805     else
  1806       __ c_olt_d(FTF, FSF);
  1808   __ bc1t(less);
  1809   __ delayed()->nop();
  1810   __ move(FSR, 1);
  1811   __ b(done);
  1812   __ delayed()->nop();
  1813   __ bind(less);
  1814   __ move(FSR, -1);
  1815   __ bind(done);
  1819 // used registers : T3, A7, Rnext
  1820 // FSR : return bci, this is defined by the vm specification
  1821 // T2 : MDO taken count
  1822 // T3 : method
  1823 // A7 : offset
  1824 // Rnext : next bytecode, this is required by dispatch_base
  1825 void TemplateTable::branch(bool is_jsr, bool is_wide) {
  1826   __ get_method(T3);
  1827   __ profile_taken_branch(A7, T2);    // only C2 meaningful
  1829 #ifndef CORE
  1830   const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
  1831                              InvocationCounter::counter_offset();
  1832   const ByteSize inv_offset = MethodCounters::invocation_counter_offset() +
  1833                               InvocationCounter::counter_offset();
  1834 #endif // CORE
  1836   // Load up T4 with the branch displacement
  1837   if (!is_wide) {
  1838     __ get_2_byte_integer_at_bcp(A7, AT, 1);
  1839     __ hswap(A7);
  1840   } else {
  1841     __ get_4_byte_integer_at_bcp(A7, AT, 1);
  1842     __ swap(A7);
  1845   // Handle all the JSR stuff here, then exit.
  1846   // It's much shorter and cleaner than intermingling with the non-JSR
  1847   // normal-branch stuff occuring below.
  1848   if (is_jsr) {
  1849     // Pre-load the next target bytecode into Rnext
  1850     __ dadd(AT, BCP, A7);
  1851     __ lbu(Rnext, AT, 0);
  1853     // compute return address as bci in FSR
  1854     __ daddi(FSR, BCP, (is_wide?5:3) - in_bytes(ConstMethod::codes_offset()));
  1855     __ ld(AT, T3, in_bytes(Method::const_offset()));
  1856     __ dsub(FSR, FSR, AT);
  1857     // Adjust the bcp in BCP by the displacement in A7
  1858     __ dadd(BCP, BCP, A7);
  1859     // jsr returns atos that is not an oop
  1860     // Push return address
  1861     __ push_i(FSR);
  1862     // jsr returns vtos
  1863     __ dispatch_only_noverify(vtos);
  1865     return;
  1868   // Normal (non-jsr) branch handling
  1870   // Adjust the bcp in S0 by the displacement in T4
  1871   __ dadd(BCP, BCP, A7);
  1873 #ifdef CORE
  1874   // Pre-load the next target bytecode into EBX
  1875   __ lbu(Rnext, BCP, 0);
  1876   // continue with the bytecode @ target
  1877   __ dispatch_only(vtos);
  1878 #else
  1879   assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters");
  1880   Label backedge_counter_overflow;
  1881   Label profile_method;
  1882   Label dispatch;
  1883   if (UseLoopCounter) {
  1884     // increment backedge counter for backward branches
  1885     // eax: MDO
  1886     // ebx: MDO bumped taken-count
  1887     // T3: method
  1888     // T4: target offset
  1889     // BCP: target bcp
  1890     // LVP: locals pointer
  1891     __ bgtz(A7, dispatch);  // check if forward or backward branch
  1892     __ delayed()->nop();
  1894     // check if MethodCounters exists
  1895     Label has_counters;
  1896     __ ld(AT, T3, in_bytes(Method::method_counters_offset()));  // use AT as MDO, TEMP
  1897     __ bne(AT, R0, has_counters);
  1898     __ nop();
  1899     __ push(T3);
  1900     //__ push(A7);
  1901     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters),
  1902                T3);
  1903     //__ pop(A7);
  1904     __ pop(T3);
  1905     __ ld(AT, T3, in_bytes(Method::method_counters_offset()));  // use AT as MDO, TEMP
  1906     __ beq(AT, R0, dispatch);
  1907     __ nop();
  1908     __ bind(has_counters);
  1910     // increment back edge counter
  1911     __ ld(T1, T3, in_bytes(Method::method_counters_offset()));
  1912     __ lw(T0, T1, in_bytes(be_offset));
  1913     __ increment(T0, InvocationCounter::count_increment);
  1914     __ sw(T0, T1, in_bytes(be_offset));
  1916     // load invocation counter
  1917     __ lw(T1, T1, in_bytes(inv_offset));
  1918     // buffer bit added, mask no needed
  1920     // dadd backedge counter & invocation counter
  1921     __ dadd(T1, T1, T0);
  1923     if (ProfileInterpreter) {
  1924       // Test to see if we should create a method data oop
  1925       //__ lui(AT, Assembler::split_high(int(&InvocationCounter::InterpreterProfileLimit)));
  1926       //__ lw(AT, AT, Assembler::split_low(int(&InvocationCounter::InterpreterProfileLimit)));
  1927       // T1 : backedge counter & invocation counter
  1928       __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit);
  1929       __ lw(AT, AT, 0);
  1930       __ slt(AT, T1, AT);
  1931       __ bne(AT, R0, dispatch);
  1932       __ delayed()->nop();
  1934       // if no method data exists, go to profile method
  1935       __ test_method_data_pointer(T1, profile_method);
  1937       if (UseOnStackReplacement) {
  1938         // check for overflow against ebx which is the MDO taken count
  1939         __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
  1940         __ lw(AT, AT, 0);
  1941         // the value Rnext Is get from the beginning profile_taken_branch
  1942         __ slt(AT, T2, AT);
  1943         __ bne(AT, R0, dispatch);
  1944         __ delayed()->nop();
  1946         // When ProfileInterpreter is on, the backedge_count comes
  1947         // from the methodDataOop, which value does not get reset on
  1948         // the call to  frequency_counter_overflow().
  1949         // To avoid excessive calls to the overflow routine while
  1950         // the method is being compiled, dadd a second test to make
  1951         // sure the overflow function is called only once every
  1952         // overflow_frequency.
  1953         const int overflow_frequency = 1024;
  1954         __ andi(AT, T2, overflow_frequency-1);
  1955         __ beq(AT, R0, backedge_counter_overflow);
  1956         __ delayed()->nop();
  1958     } else {
  1959       if (UseOnStackReplacement) {
  1960         // check for overflow against eax, which is the sum of the counters
  1961         __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
  1962         __ lw(AT, AT, 0);
  1963         __ slt(AT, T1, AT);
  1964         __ beq(AT, R0, backedge_counter_overflow);
  1965         __ delayed()->nop();
  1968     __ bind(dispatch);
  1971   // Pre-load the next target bytecode into Rnext
  1972   __ lbu(Rnext, BCP, 0);
  1974   // continue with the bytecode @ target
  1975   // FSR: return bci for jsr's, unused otherwise
  1976   // Rnext: target bytecode
  1977   // BCP: target bcp
  1978   __ dispatch_only(vtos);
  1980   if (UseLoopCounter) {
  1981     if (ProfileInterpreter) {
  1982       // Out-of-line code to allocate method data oop.
  1983       __ bind(profile_method);
  1984       __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
  1985       __ lbu(Rnext, BCP, 0);
  1986       __ set_method_data_pointer_for_bcp();
  1987       __ b(dispatch);
  1988       __ delayed()->nop();
  1991     if (UseOnStackReplacement) {
  1992       // invocation counter overflow
  1993       __ bind(backedge_counter_overflow);
  1994       __ sub(A7, BCP, A7);  // branch bcp
  1995       call_VM(NOREG, CAST_FROM_FN_PTR(address,
  1996       InterpreterRuntime::frequency_counter_overflow), A7);
  1997       __ lbu(Rnext, BCP, 0);
  1999       // V0: osr nmethod (osr ok) or NULL (osr not possible)
  2000       // V1: osr adapter frame return address
  2001       // Rnext: target bytecode
  2002       // LVP: locals pointer
  2003       // BCP: bcp
  2004       __ beq(V0, R0, dispatch);
  2005       __ delayed()->nop();
  2006       // nmethod may have been invalidated (VM may block upon call_VM return)
  2007       __ lw(T3, V0, nmethod::entry_bci_offset());
  2008       __ move(AT, InvalidOSREntryBci);
  2009       __ beq(AT, T3, dispatch);
  2010       __ delayed()->nop();
  2011       // We need to prepare to execute the OSR method. First we must
  2012       // migrate the locals and monitors off of the stack.
  2013       //eax V0: osr nmethod (osr ok) or NULL (osr not possible)
  2014       //ebx V1: osr adapter frame return address
  2015       //edx  Rnext: target bytecode
  2016       //edi  LVP: locals pointer
  2017       //esi  BCP: bcp
  2018       __ move(BCP, V0);
  2019       // const Register thread = ecx;
  2020       const Register thread = TREG;
  2021 #ifndef OPT_THREAD
  2022       __ get_thread(thread);
  2023 #endif
  2024       call_VM(noreg, CAST_FROM_FN_PTR(address,
  2025       SharedRuntime::OSR_migration_begin));
  2026       // eax is OSR buffer, move it to expected parameter location
  2027       //refer to osrBufferPointer in c1_LIRAssembler_mips.cpp
  2028       __ move(T0, V0);
  2030       // pop the interpreter frame
  2031       __ ld(A7, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
  2032       //FIXME, shall we keep the return address on the stack?
  2033       __ leave();                                // remove frame anchor
  2034       __ move(LVP, RA);
  2035       __ move(SP, A7);
  2037       __ move(AT, -(StackAlignmentInBytes));
  2038       __ andr(SP , SP , AT);
  2040       // push the (possibly adjusted) return address
  2041       //refer to osr_entry in c1_LIRAssembler_mips.cpp
  2042       __ ld(AT, BCP, nmethod::osr_entry_point_offset());
  2043       __ jr(AT);
  2044       __ delayed()->nop();
  2047 #endif // not CORE
  2051 void TemplateTable::if_0cmp(Condition cc) {
  2052   transition(itos, vtos);
  2053   // assume branch is more often taken than not (loops use backward branches)
  2054   Label not_taken;
  2055   switch(cc) {
  2056     case not_equal:
  2057       __ beq(FSR, R0, not_taken);
  2058       break;
  2059     case equal:
  2060       __ bne(FSR, R0, not_taken);
  2061       break;
  2062     case less:
  2063       __ bgez(FSR, not_taken);
  2064       break;
  2065     case less_equal:
  2066       __ bgtz(FSR, not_taken);
  2067       break;
  2068     case greater:
  2069       __ blez(FSR, not_taken);
  2070       break;
  2071     case greater_equal:
  2072       __ bltz(FSR, not_taken);
  2073       break;
  2075   __ delayed()->nop();
  2077   branch(false, false);
  2079   __ bind(not_taken);
  2080   __ profile_not_taken_branch(FSR);
  2083 void TemplateTable::if_icmp(Condition cc) {
  2084   transition(itos, vtos);
  2085   // assume branch is more often taken than not (loops use backward branches)
  2086   Label not_taken;
  2088   __ pop_i(SSR);
  2089   switch(cc) {
  2090     case not_equal:
  2091       __ beq(SSR, FSR, not_taken);
  2092       break;
  2093     case equal:
  2094       __ bne(SSR, FSR, not_taken);
  2095       break;
  2096     case less:
  2097       __ slt(AT, SSR, FSR);
  2098       __ beq(AT, R0, not_taken);
  2099       break;
  2100     case less_equal:
  2101       __ slt(AT, FSR, SSR);
  2102       __ bne(AT, R0, not_taken);
  2103       break;
  2104     case greater:
  2105       __ slt(AT, FSR, SSR);
  2106       __ beq(AT, R0, not_taken);
  2107       break;
  2108     case greater_equal:
  2109       __ slt(AT, SSR, FSR);
  2110       __ bne(AT, R0, not_taken);
  2111       break;
  2113   __ delayed()->nop();
  2115   branch(false, false);
  2116   __ bind(not_taken);
  2117   __ profile_not_taken_branch(FSR);
  2120 void TemplateTable::if_nullcmp(Condition cc) {
  2121   transition(atos, vtos);
  2122   // assume branch is more often taken than not (loops use backward branches)
  2123   Label not_taken;
  2124   switch(cc) {
  2125     case not_equal:
  2126       __ beq(FSR, R0, not_taken);
  2127       break;
  2128     case equal:
  2129       __ bne(FSR, R0, not_taken);
  2130       break;
  2131     default:
  2132       ShouldNotReachHere();
  2134   __ delayed()->nop();
  2136   branch(false, false);
  2137   __ bind(not_taken);
  2138   __ profile_not_taken_branch(FSR);
  2142 void TemplateTable::if_acmp(Condition cc) {
  2143   transition(atos, vtos);
  2144   // assume branch is more often taken than not (loops use backward branches)
  2145   Label not_taken;
  2146   //  __ lw(SSR, SP, 0);
  2147   __ pop_ptr(SSR);
  2148   switch(cc) {
  2149     case not_equal:
  2150       __ beq(SSR, FSR, not_taken);
  2151       break;
  2152     case equal:
  2153       __ bne(SSR, FSR, not_taken);
  2154       break;
  2155     default:
  2156       ShouldNotReachHere();
  2158   __ delayed()->nop();
  2160   branch(false, false);
  2162   __ bind(not_taken);
  2163   __ profile_not_taken_branch(FSR);
  2166 // used registers : T1, T2, T3
  2167 // T1 : method
  2168 // T2 : returb bci
  2169 void TemplateTable::ret() {
  2170   transition(vtos, vtos);
  2172   locals_index(T2);
  2173   __ ld(T2, T2, 0);
  2174   __ profile_ret(T2, T3);
  2176   __ get_method(T1);
  2177   __ ld(BCP, T1, in_bytes(Method::const_offset()));
  2178   __ dadd(BCP, BCP, T2);
  2179   __ daddi(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
  2181   __ dispatch_next(vtos);
  2184 // used registers : T1, T2, T3
  2185 // T1 : method
  2186 // T2 : returb bci
  2187 void TemplateTable::wide_ret() {
  2188   transition(vtos, vtos);
  2190   locals_index_wide(T2);
  2191   __ ld(T2, T2, 0);                   // get return bci, compute return bcp
  2192   __ profile_ret(T2, T3);
  2194   __ get_method(T1);
  2195   __ ld(BCP, T1, in_bytes(Method::const_offset()));
  2196   __ dadd(BCP, BCP, T2);
  2197   __ daddi(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
  2199   __ dispatch_next(vtos);
  2202 // used register T2, T3, A7, Rnext
  2203 // T2 : bytecode pointer
  2204 // T3 : low
  2205 // A7 : high
  2206 // Rnext : dest bytecode, required by dispatch_base
  2207 void TemplateTable::tableswitch() {
  2208   Label default_case, continue_execution;
  2209   transition(itos, vtos);
  2211   // align BCP
  2212   __ daddi(T2, BCP, BytesPerInt);
  2213   __ li(AT, -BytesPerInt);
  2214   __ andr(T2, T2, AT);
  2216   // load lo & hi
  2217   __ lw(T3, T2, 1 * BytesPerInt);
  2218   __ swap(T3);
  2219   __ lw(A7, T2, 2 * BytesPerInt);
  2220   __ swap(A7);
  2222   // check against lo & hi
  2223   __ slt(AT, FSR, T3);
  2224   __ bne(AT, R0, default_case);
  2225   __ delayed()->nop();
  2227   __ slt(AT, A7, FSR);
  2228   __ bne(AT, R0, default_case);
  2229   __ delayed()->nop();
  2231   // lookup dispatch offset, in A7 big endian
  2232   __ dsub(FSR, FSR, T3);
  2233   __ dsll(AT, FSR, Address::times_4);
  2234   __ dadd(AT, T2, AT);
  2235   __ lw(A7, AT, 3 * BytesPerInt);
  2236   __ profile_switch_case(FSR, T9, T3);
  2238   __ bind(continue_execution);
  2239   __ swap(A7);
  2240   __ dadd(BCP, BCP, A7);
  2241   __ lbu(Rnext, BCP, 0);
  2242   __ dispatch_only(vtos);
  2244   // handle default
  2245   __ bind(default_case);
  2246   __ profile_switch_default(FSR);
  2247   __ lw(A7, T2, 0);
  2248   __ b(continue_execution);
  2249   __ delayed()->nop();
  2252 void TemplateTable::lookupswitch() {
  2253   transition(itos, itos);
  2254   __ stop("lookupswitch bytecode should have been rewritten");
  2257 // used registers : T2, T3, A7, Rnext
  2258 // T2 : bytecode pointer
  2259 // T3 : pair index
  2260 // A7 : offset
  2261 // Rnext : dest bytecode
  2262 // the data after the opcode is the same as lookupswitch
  2263 // see Rewriter::rewrite_method for more information
  2264 void TemplateTable::fast_linearswitch() {
  2265   transition(itos, vtos);
  2266   Label loop_entry, loop, found, continue_execution;
  2268   // swap eax so we can avoid swapping the table entries
  2269   __ swap(FSR);
  2271   // align BCP
  2272   __ daddi(T2, BCP, BytesPerInt);
  2273   __ li(AT, -BytesPerInt);
  2274   __ andr(T2, T2, AT);
  2276   // set counter
  2277   __ lw(T3, T2, BytesPerInt);
  2278   __ swap(T3);
  2279   __ b(loop_entry);
  2280   __ delayed()->nop();
  2282   // table search
  2283   __ bind(loop);
  2284   // get the entry value
  2285   __ dsll(AT, T3, Address::times_8);
  2286   __ dadd(AT, T2, AT);
  2287   __ lw(AT, AT, 2 * BytesPerInt);
  2289   // found?
  2290   __ beq(FSR, AT, found);
  2291   __ delayed()->nop();
  2293   __ bind(loop_entry);
  2294   __ bgtz(T3, loop);
  2295   __ delayed()->daddiu(T3, T3, -1);
  2297   // default case
  2298   __ profile_switch_default(FSR);
  2299   __ lw(A7, T2, 0);
  2300   __ b(continue_execution);
  2301   __ delayed()->nop();
  2303   // entry found -> get offset
  2304   __ bind(found);
  2305   __ dsll(AT, T3, Address::times_8);
  2306   __ dadd(AT, T2, AT);
  2307   __ lw(A7, AT, 3 * BytesPerInt);
  2308   __ profile_switch_case(T3, FSR, T2);
  2310   // continue execution
  2311   __ bind(continue_execution);
  2312   __ swap(A7);
  2313   __ dadd(BCP, BCP, A7);
  2314   __ lbu(Rnext, BCP, 0);
  2315   __ dispatch_only(vtos);
  2318 // used registers : T0, T1, T2, T3, A7, Rnext
  2319 // T2 : pairs address(array)
  2320 // Rnext : dest bytecode
  2321 // the data after the opcode is the same as lookupswitch
  2322 // see Rewriter::rewrite_method for more information
  2323 void TemplateTable::fast_binaryswitch() {
  2324   transition(itos, vtos);
  2325   // Implementation using the following core algorithm:
  2326   //
  2327   // int binary_search(int key, LookupswitchPair* array, int n) {
  2328   //   // Binary search according to "Methodik des Programmierens" by
  2329   //   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
  2330   //   int i = 0;
  2331   //   int j = n;
  2332   //   while (i+1 < j) {
  2333   //     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
  2334   //     // with      Q: for all i: 0 <= i < n: key < a[i]
  2335   //     // where a stands for the array and assuming that the (inexisting)
  2336   //     // element a[n] is infinitely big.
  2337   //     int h = (i + j) >> 1;
  2338   //     // i < h < j
  2339   //     if (key < array[h].fast_match()) {
  2340   //       j = h;
  2341   //     } else {
  2342   //       i = h;
  2343   //     }
  2344   //   }
  2345   //   // R: a[i] <= key < a[i+1] or Q
  2346   //   // (i.e., if key is within array, i is the correct index)
  2347   //   return i;
  2348   // }
  2350   // register allocation
  2351   const Register array = T2;
  2352   const Register i = T3, j = A7;
  2353   const Register h = T1;
  2354   const Register temp = T0;
  2355   const Register key = FSR;
  2357   // setup array
  2358   __ daddi(array, BCP, 3*BytesPerInt);
  2359   __ li(AT, -BytesPerInt);
  2360   __ andr(array, array, AT);
  2362   // initialize i & j
  2363   __ move(i, R0);
  2364   __ lw(j, array, - 1 * BytesPerInt);
  2365   // Convert j into native byteordering
  2366   __ swap(j);
  2368   // and start
  2369   Label entry;
  2370   __ b(entry);
  2371   __ delayed()->nop();
  2373   // binary search loop
  2375     Label loop;
  2376     __ bind(loop);
  2377     // int h = (i + j) >> 1;
  2378     __ dadd(h, i, j);
  2379     __ dsrl(h, h, 1);
  2380     // if (key < array[h].fast_match()) {
  2381     //   j = h;
  2382     // } else {
  2383     //   i = h;
  2384     // }
  2385     // Convert array[h].match to native byte-ordering before compare
  2386     __ dsll(AT, h, Address::times_8);
  2387     __ dadd(AT, array, AT);
  2388     __ lw(temp, AT, 0 * BytesPerInt);
  2389     __ swap(temp);
  2392       Label set_i, end_of_if;
  2393       __ slt(AT, key, temp);
  2394       __ beq(AT, R0, set_i);
  2395       __ delayed()->nop();
  2397       __ b(end_of_if);
  2398       __ delayed(); __ move(j, h);
  2400       __ bind(set_i);
  2401       __ move(i, h);
  2403       __ bind(end_of_if);
  2405     // while (i+1 < j)
  2406     __ bind(entry);
  2407     __ daddi(h, i, 1);
  2408     __ slt(AT, h, j);
  2409     __ bne(AT, R0, loop);
  2410     __ delayed()->nop();
  2413   // end of binary search, result index is i (must check again!)
  2414   Label default_case;
  2415   // Convert array[i].match to native byte-ordering before compare
  2416   __ dsll(AT, i, Address::times_8);
  2417   __ dadd(AT, array, AT);
  2418   __ lw(temp, AT, 0 * BytesPerInt);
  2419   __ swap(temp);
  2420   __ bne(key, temp, default_case);
  2421   __ delayed()->nop();
  2423   // entry found -> j = offset
  2424   __ dsll(AT, i, Address::times_8);
  2425   __ dadd(AT, array, AT);
  2426   __ lw(j, AT, 1 * BytesPerInt);
  2427   __ profile_switch_case(i, key, array);
  2428   __ swap(j);
  2430   __ dadd(BCP, BCP, j);
  2431   __ lbu(Rnext, BCP, 0);
  2432   __ dispatch_only(vtos);
  2434   // default case -> j = default offset
  2435   __ bind(default_case);
  2436   __ profile_switch_default(i);
  2437   __ lw(j, array, - 2 * BytesPerInt);
  2438   __ swap(j);
  2439   __ dadd(BCP, BCP, j);
  2440   __ lbu(Rnext, BCP, 0);
  2441   __ dispatch_only(vtos);
  2444 void TemplateTable::_return(TosState state) {
  2445   transition(state, state);
  2446   assert(_desc->calls_vm(),
  2447       "inconsistent calls_vm information"); // call in remove_activation
  2449   if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
  2450     assert(state == vtos, "only valid state");
  2451     __ ld(T1, aaddress(0));
  2452     __ load_klass(LVP, T1);
  2453     __ lw(LVP, LVP, in_bytes(Klass::access_flags_offset()));
  2454     __ move(AT, JVM_ACC_HAS_FINALIZER);
  2455     __ andr(AT, AT, LVP);//by_css
  2456     Label skip_register_finalizer;
  2457     __ beq(AT, R0, skip_register_finalizer);
  2458     __ delayed()->nop();
  2459     __ call_VM(noreg, CAST_FROM_FN_PTR(address,
  2460     InterpreterRuntime::register_finalizer), T1);
  2461     __ bind(skip_register_finalizer);
  2463   __ remove_activation(state, T9);
  2464   __ sync();
  2466   __ jr(T9);
  2467   __ delayed()->nop();
  2470 // ----------------------------------------------------------------------------
  2471 // Volatile variables demand their effects be made known to all CPU's
  2472 // in order.  Store buffers on most chips allow reads & writes to
  2473 // reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode
  2474 // without some kind of memory barrier (i.e., it's not sufficient that
  2475 // the interpreter does not reorder volatile references, the hardware
  2476 // also must not reorder them).
  2477 //
  2478 // According to the new Java Memory Model (JMM):
  2479 // (1) All volatiles are serialized wrt to each other.  ALSO reads &
  2480 //     writes act as aquire & release, so:
  2481 // (2) A read cannot let unrelated NON-volatile memory refs that
  2482 //     happen after the read float up to before the read.  It's OK for
  2483 //     non-volatile memory refs that happen before the volatile read to
  2484 //     float down below it.
  2485 // (3) Similar a volatile write cannot let unrelated NON-volatile
  2486 //     memory refs that happen BEFORE the write float down to after the
  2487 //     write.  It's OK for non-volatile memory refs that happen after the
  2488 //     volatile write to float up before it.
  2489 //
  2490 // We only put in barriers around volatile refs (they are expensive),
  2491 // not _between_ memory refs (that would require us to track the
  2492 // flavor of the previous memory refs).  Requirements (2) and (3)
  2493 // require some barriers before volatile stores and after volatile
  2494 // loads.  These nearly cover requirement (1) but miss the
  2495 // volatile-store-volatile-load case.  This final case is placed after
  2496 // volatile-stores although it could just as well go before
  2497 // volatile-loads.
  2498 //void TemplateTable::volatile_barrier(Assembler::Membar_mask_bits
  2499 //                                     order_constraint) {
  2500 void TemplateTable::volatile_barrier( ) {
  2501   // Helper function to insert a is-volatile test and memory barrier
  2502   //if (os::is_MP()) { // Not needed on single CPU
  2503   //  __ membar(order_constraint);
  2504   //}
  2505   if( !os::is_MP() ) return;  // Not needed on single CPU
  2506   __ sync();
  2509 // we dont shift left 2 bits in get_cache_and_index_at_bcp
  2510 // for we always need shift the index we use it. the ConstantPoolCacheEntry
  2511 // is 16-byte long, index is the index in
  2512 // ConstantPoolCache, so cache + base_offset() + index * 16 is
  2513 // the corresponding ConstantPoolCacheEntry
  2514 // used registers : T2
  2515 // NOTE : the returned index need also shift left 4 to get the address!
  2516 void TemplateTable::resolve_cache_and_index(int byte_no,
  2517                                             Register Rcache,
  2518                                             Register index,
  2519                                             size_t index_size) {
  2520   assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
  2521   const Register temp = A1;
  2522   assert_different_registers(Rcache, index);
  2524   Label resolved;
  2525   __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
  2526   // is resolved?
  2527   int i = (int)bytecode();
  2528   __ addi(temp, temp, -i);
  2529   __ beq(temp, R0, resolved);
  2530   __ delayed()->nop();
  2531   // resolve first time through
  2532   address entry;
  2533   switch (bytecode()) {
  2534     case Bytecodes::_getstatic      : // fall through
  2535     case Bytecodes::_putstatic      : // fall through
  2536     case Bytecodes::_getfield       : // fall through
  2537     case Bytecodes::_putfield       :
  2538       entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put);
  2539       break;
  2540     case Bytecodes::_invokevirtual  : // fall through
  2541     case Bytecodes::_invokespecial  : // fall through
  2542     case Bytecodes::_invokestatic   : // fall through
  2543     case Bytecodes::_invokeinterface:
  2544       entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);
  2545       break;
  2546     case Bytecodes::_invokehandle:
  2547       entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokehandle);
  2548       break;
  2549     case Bytecodes::_invokedynamic:
  2550       entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic);
  2551       break;
  2552     default                          :
  2553       fatal(err_msg("unexpected bytecode: %s", Bytecodes::name(bytecode())));
  2554       break;
  2557   __ move(temp, i);
  2558   __ call_VM(NOREG, entry, temp);
  2560   // Update registers with resolved info
  2561   __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
  2562   __ bind(resolved);
  2565 // The Rcache and index registers must be set before call
  2566 void TemplateTable::load_field_cp_cache_entry(Register obj,
  2567                                               Register cache,
  2568                                               Register index,
  2569                                               Register off,
  2570                                               Register flags,
  2571                                               bool is_static = false) {
  2572   assert_different_registers(cache, index, flags, off);
  2574   ByteSize cp_base_offset = ConstantPoolCache::base_offset();
  2575   // Field offset
  2576   __ dsll(AT, index, Address::times_ptr);
  2577   __ dadd(AT, cache, AT);
  2578   __ ld(off, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset()));
  2579   // Flags
  2580   __ ld(flags, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset()));
  2582   // klass overwrite register
  2583   if (is_static) {
  2584     __ ld(obj, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset()));
  2585     const int mirror_offset = in_bytes(Klass::java_mirror_offset());
  2586     __ ld(obj, Address(obj, mirror_offset));
  2588     __ verify_oop(obj);
  2592 // get the method, itable_index and flags of the current invoke
  2593 void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
  2594                                                Register method,
  2595                                                Register itable_index,
  2596                                                Register flags,
  2597                                                bool is_invokevirtual,
  2598                                                bool is_invokevfinal, /*unused*/
  2599                                                bool is_invokedynamic) {
  2600   // setup registers
  2601   const Register cache = T3;
  2602   const Register index = T1;
  2603   assert_different_registers(method, flags);
  2604   assert_different_registers(method, cache, index);
  2605   assert_different_registers(itable_index, flags);
  2606   assert_different_registers(itable_index, cache, index);
  2607   assert(is_invokevirtual == (byte_no == f2_byte), "is invokevirtual flag redundant");
  2608   // determine constant pool cache field offsets
  2609   const int method_offset = in_bytes(
  2610     ConstantPoolCache::base_offset() +
  2611       ((byte_no == f2_byte)
  2612        ? ConstantPoolCacheEntry::f2_offset()
  2613        : ConstantPoolCacheEntry::f1_offset()));
  2614   const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
  2615                                     ConstantPoolCacheEntry::flags_offset());
  2616   // access constant pool cache fields
  2617   const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
  2618                                     ConstantPoolCacheEntry::f2_offset());
  2620   size_t index_size = (is_invokedynamic ? sizeof(u4): sizeof(u2));
  2621   resolve_cache_and_index(byte_no, cache, index, index_size);
  2623   //assert(wordSize == 8, "adjust code below");
  2624   // note we shift 4 not 2, for we get is the true inde
  2625   // of ConstantPoolCacheEntry, not the shifted 2-bit index as x86 version
  2626   __ dsll(AT, index, Address::times_ptr);
  2627   __ dadd(AT, cache, AT);
  2628   __ ld(method, AT, method_offset);
  2630   if (itable_index != NOREG) {
  2631     __ ld(itable_index, AT, index_offset);
  2633   __ ld(flags, AT, flags_offset);
  2636 // The registers cache and index expected to be set before call.
  2637 // Correct values of the cache and index registers are preserved.
  2638 void TemplateTable::jvmti_post_field_access(Register cache, Register index,
  2639                                             bool is_static, bool has_tos) {
  2640   // do the JVMTI work here to avoid disturbing the register state below
  2641   // We use c_rarg registers here because we want to use the register used in
  2642   // the call to the VM
  2643   if (JvmtiExport::can_post_field_access()) {
  2644     // Check to see if a field access watch has been set before we
  2645     // take the time to call into the VM.
  2646     Label L1;
  2647     // kill FSR
  2648     Register tmp1 = T2;
  2649     Register tmp2 = T1;
  2650     Register tmp3 = T3;
  2651     assert_different_registers(cache, index, AT);
  2652     __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr());
  2653     __ lw(AT, AT, 0);
  2654     __ beq(AT, R0, L1);
  2655     __ delayed()->nop();
  2657     __ get_cache_and_index_at_bcp(tmp2, tmp3, 1);
  2659     // cache entry pointer
  2660     __ daddi(tmp2, tmp2, in_bytes(ConstantPoolCache::base_offset()));
  2661     __ shl(tmp3, LogBytesPerWord);
  2662     __ dadd(tmp2, tmp2, tmp3);
  2663     if (is_static) {
  2664       __ move(tmp1, R0);
  2665     } else {
  2666       __ ld(tmp1, SP, 0);
  2667       __ verify_oop(tmp1);
  2669     // tmp1: object pointer or NULL
  2670     // tmp2: cache entry pointer
  2671     // tmp3: jvalue object on the stack
  2672     __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
  2673                                        InterpreterRuntime::post_field_access),
  2674                tmp1, tmp2, tmp3);
  2675     __ get_cache_and_index_at_bcp(cache, index, 1);
  2676     __ bind(L1);
  2680 void TemplateTable::pop_and_check_object(Register r) {
  2681   __ pop_ptr(r);
  2682   __ null_check(r);  // for field access must check obj.
  2683   __ verify_oop(r);
  2686 // used registers : T1, T2, T3, T1
  2687 // T1 : flags
  2688 // T2 : off
  2689 // T3 : obj
  2690 // T1 : field address
  2691 // The flags 31, 30, 29, 28 together build a 4 bit number 0 to 8 with the
  2692 // following mapping to the TosState states:
  2693 // btos: 0
  2694 // ctos: 1
  2695 // stos: 2
  2696 // itos: 3
  2697 // ltos: 4
  2698 // ftos: 5
  2699 // dtos: 6
  2700 // atos: 7
  2701 // vtos: 8
  2702 // see ConstantPoolCacheEntry::set_field for more info
  2703 void TemplateTable::getfield_or_static(int byte_no, bool is_static) {
  2704   transition(vtos, vtos);
  2706   const Register cache = T3;
  2707   const Register index = T0;
  2709   const Register obj   = T3;
  2710   const Register off   = T2;
  2711   const Register flags = T1;
  2712   resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
  2713   jvmti_post_field_access(cache, index, is_static, false);
  2714   load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
  2716   if (!is_static) pop_and_check_object(obj);
  2717   __ dadd(index, obj, off);
  2720   Label Done, notByte, notInt, notShort, notChar,
  2721               notLong, notFloat, notObj, notDouble;
  2723   assert(btos == 0, "change code, btos != 0");
  2724   __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
  2725   __ andi(flags, flags, 0xf);
  2726   __ bne(flags, R0, notByte);
  2727   __ delayed()->nop();
  2729   // btos
  2730   __ lb(FSR, index, 0);
  2731   __ sd(FSR, SP, - wordSize);
  2733   // Rewrite bytecode to be faster
  2734   if (!is_static) {
  2735     patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2);
  2737   __ b(Done);
  2738   __ delayed()->daddi(SP, SP, - wordSize);
  2740   __ bind(notByte);
  2741   __ move(AT, itos);
  2742   __ bne(flags, AT, notInt);
  2743   __ delayed()->nop();
  2745   // itos
  2746   __ lw(FSR, index, 0);
  2747   __ sd(FSR, SP, - wordSize);
  2749   // Rewrite bytecode to be faster
  2750   if (!is_static) {
  2751     // patch_bytecode(Bytecodes::_fast_igetfield, T3, T2);
  2752     patch_bytecode(Bytecodes::_fast_igetfield, T3, T2);
  2754   __ b(Done);
  2755   __ delayed()->daddi(SP, SP, - wordSize);
  2757   __ bind(notInt);
  2758   __ move(AT, atos);
  2759   __ bne(flags, AT, notObj);
  2760   __ delayed()->nop();
  2762   // atos
  2763   //add for compressedoops
  2764   __ load_heap_oop(FSR, Address(index, 0));
  2765   __ sd(FSR, SP, - wordSize);
  2767   if (!is_static) {
  2768     //patch_bytecode(Bytecodes::_fast_agetfield, T3, T2);
  2769     patch_bytecode(Bytecodes::_fast_agetfield, T3, T2);
  2771   __ b(Done);
  2772   __ delayed()->daddi(SP, SP, - wordSize);
  2774   __ bind(notObj);
  2775   __ move(AT, ctos);
  2776   __ bne(flags, AT, notChar);
  2777   __ delayed()->nop();
  2779   // ctos
  2780   __ lhu(FSR, index, 0);
  2781   __ sd(FSR, SP, - wordSize);
  2783   if (!is_static) {
  2784     patch_bytecode(Bytecodes::_fast_cgetfield, T3, T2);
  2786   __ b(Done);
  2787   __ delayed()->daddi(SP, SP, - wordSize);
  2789   __ bind(notChar);
  2790   __ move(AT, stos);
  2791   __ bne(flags, AT, notShort);
  2792   __ delayed()->nop();
  2794   // stos
  2795   __ lh(FSR, index, 0);
  2796   __ sd(FSR, SP, - wordSize);
  2798   if (!is_static) {
  2799     patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2);
  2801   __ b(Done);
  2802   __ delayed()->daddi(SP, SP, - wordSize);
  2804   __ bind(notShort);
  2805   __ move(AT, ltos);
  2806   __ bne(flags, AT, notLong);
  2807   __ delayed()->nop();
  2809   // FIXME : the load/store should be atomic, we have no simple method to do this in mips32
  2810   // ltos
  2811   __ ld(FSR, index, 0 * wordSize);
  2812   __ sd(FSR, SP, -2 * wordSize);
  2813   __ sd(R0, SP, -1 * wordSize);
  2815   // Don't rewrite to _fast_lgetfield for potential volatile case.
  2816   __ b(Done);
  2817   __ delayed()->daddi(SP, SP, - 2 * wordSize);
  2819   __ bind(notLong);
  2820   __ move(AT, ftos);
  2821   __ bne(flags, AT, notFloat);
  2822   __ delayed()->nop();
  2824   // ftos
  2825   __ lwc1(FSF, index, 0);
  2826   __ sdc1(FSF, SP, - wordSize);
  2828   if (!is_static) {
  2829     patch_bytecode(Bytecodes::_fast_fgetfield, T3, T2);
  2831   __ b(Done);
  2832   __ delayed()->daddi(SP, SP, - wordSize);
  2834   __ bind(notFloat);
  2835   __ move(AT, dtos);
  2836   __ bne(flags, AT, notDouble);
  2837   __ delayed()->nop();
  2839   // dtos
  2840   __ ldc1(FSF, index, 0 * wordSize);
  2841   __ sdc1(FSF, SP, - 2 * wordSize);
  2842   __ sd(R0, SP, - 1 * wordSize);
  2844   if (!is_static) {
  2845     patch_bytecode(Bytecodes::_fast_dgetfield, T3, T2);
  2847   __ b(Done);
  2848   __ delayed()->daddi(SP, SP, - 2 * wordSize);
  2850   __ bind(notDouble);
  2852   __ stop("Bad state");
  2854   __ bind(Done);
  2858 void TemplateTable::getfield(int byte_no) {
  2859   getfield_or_static(byte_no, false);
  2862 void TemplateTable::getstatic(int byte_no) {
  2863   getfield_or_static(byte_no, true);
  2866 // The registers cache and index expected to be set before call.
  2867 // The function may destroy various registers, just not the cache and index registers.
  2868 void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
  2869   transition(vtos, vtos);
  2871   ByteSize cp_base_offset = ConstantPoolCache::base_offset();
  2873   if (JvmtiExport::can_post_field_modification()) {
  2874     // Check to see if a field modification watch has been set before
  2875     // we take the time to call into the VM.
  2876     Label L1;
  2877     //kill AT, T1, T2, T3, T9
  2878     Register tmp1 = T2;
  2879     Register tmp2 = T1;
  2880     Register tmp3 = T3;
  2881     Register tmp4 = T9;
  2882     assert_different_registers(cache, index, tmp4);
  2884     __ li(AT, JvmtiExport::get_field_modification_count_addr());
  2885     __ lw(AT, AT, 0);
  2886     __ beq(AT, R0, L1);
  2887     __ delayed()->nop();
  2889     __ get_cache_and_index_at_bcp(tmp2, tmp4, 1);
  2891     if (is_static) {
  2892       __ move(tmp1, R0);
  2893     } else {
  2894       // Life is harder. The stack holds the value on top, followed by
  2895       // the object.  We don't know the size of the value, though; it
  2896       // could be one or two words depending on its type. As a result,
  2897       // we must find the type to determine where the object is.
  2898       Label two_word, valsize_known;
  2899       __ dsll(AT, tmp4, Address::times_8);
  2900       __ dadd(AT, tmp2, AT);
  2901       __ ld(tmp3, AT, in_bytes(cp_base_offset +
  2902                                ConstantPoolCacheEntry::flags_offset()));
  2903       __ shr(tmp3, ConstantPoolCacheEntry::tos_state_shift);
  2905       // Make sure we don't need to mask ecx for tos_state_shift
  2906       // after the above shift
  2907       ConstantPoolCacheEntry::verify_tos_state_shift();
  2908       __ move(tmp1, SP);
  2909       __ move(AT, ltos);
  2910       __ beq(tmp3, AT, two_word);
  2911       __ delayed()->nop();
  2912       __ move(AT, dtos);
  2913       __ beq(tmp3, AT, two_word);
  2914       __ delayed()->nop();
  2915       __ b(valsize_known);
  2916       __ delayed()->daddi(tmp1, tmp1, Interpreter::expr_offset_in_bytes(1) );
  2918       __ bind(two_word);
  2919       __ daddi(tmp1, tmp1, Interpreter::expr_offset_in_bytes(2));
  2921       __ bind(valsize_known);
  2922       // setup object pointer
  2923       __ ld(tmp1, tmp1, 0*wordSize);
  2925     // cache entry pointer
  2926     __ daddi(tmp2, tmp2, in_bytes(cp_base_offset));
  2927     __ shl(tmp4, LogBytesPerWord);
  2928     __ daddu(tmp2, tmp2, tmp4);
  2929     // object (tos)
  2930     __ move(tmp3, SP);
  2931     // tmp1: object pointer set up above (NULL if static)
  2932     // tmp2: cache entry pointer
  2933     // tmp3: jvalue object on the stack
  2934     __ call_VM(NOREG,
  2935                CAST_FROM_FN_PTR(address,
  2936                                 InterpreterRuntime::post_field_modification),
  2937                tmp1, tmp2, tmp3);
  2938     __ get_cache_and_index_at_bcp(cache, index, 1);
  2939     __ bind(L1);
  2943 // used registers : T0, T1, T2, T3, T8
  2944 // T1 : flags
  2945 // T2 : off
  2946 // T3 : obj
  2947 // T8 : volatile bit
  2948 // see ConstantPoolCacheEntry::set_field for more info
  2949 void TemplateTable::putfield_or_static(int byte_no, bool is_static) {
  2950   transition(vtos, vtos);
  2952   const Register cache = T3;
  2953   const Register index = T0;
  2954   const Register obj   = T3;
  2955   const Register off   = T2;
  2956   const Register flags = T1;
  2957   const Register bc    = T3;
  2959   resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
  2960   jvmti_post_field_mod(cache, index, is_static);
  2961   load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
  2963   Label notVolatile, Done;
  2964   __ move(AT, 1<<ConstantPoolCacheEntry::is_volatile_shift);
  2965   __ andr(T8, flags, AT);
  2967   Label notByte, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
  2969   assert(btos == 0, "change code, btos != 0");
  2970   // btos
  2971   __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
  2972   __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask);
  2973   __ bne(flags, R0, notByte);
  2974   __ delayed()->nop();
  2976   __ pop(btos);
  2977   if (!is_static) {
  2978     pop_and_check_object(obj);
  2980   __ dadd(AT, obj, off);
  2981   __ sb(FSR, AT, 0);
  2983   if (!is_static) {
  2984     patch_bytecode(Bytecodes::_fast_bputfield, bc, off, true, byte_no);
  2986   __ b(Done);
  2987   __ delayed()->nop();
  2989   __ bind(notByte);
  2990   // itos
  2991   __ move(AT, itos);
  2992   __ bne(flags, AT, notInt);
  2993   __ delayed()->nop();
  2995   __ pop(itos);
  2996   if (!is_static) {
  2997     pop_and_check_object(obj);
  2999   __ dadd(AT, obj, off);
  3000   __ sw(FSR, AT, 0);
  3002   if (!is_static) {
  3003     patch_bytecode(Bytecodes::_fast_iputfield, bc, off, true, byte_no);
  3005   __ b(Done);
  3006   __ delayed()->nop();
  3007   __ bind(notInt);
  3008   // atos
  3009   __ move(AT, atos);
  3010   __ bne(flags, AT, notObj);
  3011   __ delayed()->nop();
  3013   __ pop(atos);
  3014   if (!is_static) {
  3015     pop_and_check_object(obj);
  3018   __ dadd(AT, obj, off);
  3019   __ store_heap_oop(Address(AT, 0), FSR);
  3020   __ store_check(obj);
  3022   if (!is_static) {
  3023     patch_bytecode(Bytecodes::_fast_aputfield, bc, off, true, byte_no);
  3025   __ b(Done);
  3026   __ delayed()->nop();
  3027   __ bind(notObj);
  3028   // ctos
  3029   __ move(AT, ctos);
  3030   __ bne(flags, AT, notChar);
  3031   __ delayed()->nop();
  3033   __ pop(ctos);
  3034   if (!is_static) {
  3035     pop_and_check_object(obj);
  3037   __ dadd(AT, obj, off);
  3038   __ sh(FSR, AT, 0);
  3039   if (!is_static) {
  3040     patch_bytecode(Bytecodes::_fast_cputfield, bc, off, true, byte_no);
  3042   __ b(Done);
  3043   __ delayed()->nop();
  3044   __ bind(notChar);
  3045   // stos
  3046   __ move(AT, stos);
  3047   __ bne(flags, AT, notShort);
  3048   __ delayed()->nop();
  3050   __ pop(stos);
  3051   if (!is_static) {
  3052     pop_and_check_object(obj);
  3054   __ dadd(AT, obj, off);
  3055   __ sh(FSR, AT, 0);
  3056   if (!is_static) {
  3057     patch_bytecode(Bytecodes::_fast_sputfield, bc, off, true, byte_no);
  3059   __ b(Done);
  3060   __ delayed()->nop();
  3061   __ bind(notShort);
  3062   // ltos
  3063   __ move(AT, ltos);
  3064   __ bne(flags, AT, notLong);
  3065   __ delayed()->nop();
  3067   // FIXME: there is no simple method to load/store 64-bit data in a atomic operation
  3068   // we just ignore the volatile flag.
  3069   //Label notVolatileLong;
  3070   //__ beq(T1, R0, notVolatileLong);
  3071   //__ delayed()->nop();
  3073   //addent = 2 * wordSize;
  3074   // no need
  3075   //__ lw(FSR, SP, 0);
  3076   //__ lw(SSR, SP, 1 * wordSize);
  3077   //if (!is_static) {
  3078   //  __ lw(T3, SP, addent);
  3079   //  addent += 1 * wordSize;
  3080   //  __ verify_oop(T3);
  3081   //}
  3083   //__ daddu(AT, T3, T2);
  3085   // Replace with real volatile test
  3086   // NOTE : we assume that sdc1&ldc1 operate in 32-bit, this is true for Godson2 even in 64-bit kernel
  3087   // last modified by yjl 7/12/2005
  3088   //__ ldc1(FSF, SP, 0);
  3089   //__ sdc1(FSF, AT, 0);
  3090   //volatile_barrier();
  3092   // Don't rewrite volatile version
  3093   //__ b(notVolatile);
  3094   //__ delayed()->addiu(SP, SP, addent);
  3096   //__ bind(notVolatileLong);
  3098   //__ pop(ltos);  // overwrites edx
  3099   //  __ lw(FSR, SP, 0 * wordSize);
  3100   //  __ lw(SSR, SP, 1 * wordSize);
  3101   //  __ daddi(SP, SP, 2*wordSize);
  3102   __ pop(ltos);
  3103   if (!is_static) {
  3104     pop_and_check_object(obj);
  3106   __ dadd(AT, obj, off);
  3107   __ sd(FSR, AT, 0);
  3108   if (!is_static) {
  3109     patch_bytecode(Bytecodes::_fast_lputfield, bc, off, true, byte_no);
  3111   __ b(notVolatile);
  3112   __ delayed()->nop();
  3114   __ bind(notLong);
  3115   // ftos
  3116   __ move(AT, ftos);
  3117   __ bne(flags, AT, notFloat);
  3118   __ delayed()->nop();
  3120   __ pop(ftos);
  3121   if (!is_static) {
  3122     pop_and_check_object(obj);
  3124   __ dadd(AT, obj, off);
  3125   __ swc1(FSF, AT, 0);
  3126   if (!is_static) {
  3127     patch_bytecode(Bytecodes::_fast_fputfield, bc, off, true, byte_no);
  3129   __ b(Done);
  3130   __ delayed()->nop();
  3131   __ bind(notFloat);
  3132   // dtos
  3133   __ move(AT, dtos);
  3134   __ bne(flags, AT, notDouble);
  3135   __ delayed()->nop();
  3137   __ pop(dtos);
  3138   if (!is_static) {
  3139     pop_and_check_object(obj);
  3141   __ dadd(AT, obj, off);
  3142   __ sdc1(FSF, AT, 0);
  3143   if (!is_static) {
  3144     patch_bytecode(Bytecodes::_fast_dputfield, bc, off, true, byte_no);
  3147 #ifdef ASSERT
  3148   __ b(Done);
  3149   __ delayed()->nop();
  3151   __ bind(notDouble);
  3152   __ stop("Bad state");
  3153 #endif
  3155   __ bind(Done);
  3157   // Check for volatile store
  3158   __ beq(T8, R0, notVolatile);
  3159   __ delayed()->nop();
  3160   volatile_barrier( );
  3161   __ bind(notVolatile);
  3164 void TemplateTable::putfield(int byte_no) {
  3165   putfield_or_static(byte_no, false);
  3168 void TemplateTable::putstatic(int byte_no) {
  3169   putfield_or_static(byte_no, true);
  3172 // used registers : T1, T2, T3
  3173 // T1 : cp_entry
  3174 // T2 : obj
  3175 // T3 : value pointer
  3176 void TemplateTable::jvmti_post_fast_field_mod() {
  3177   if (JvmtiExport::can_post_field_modification()) {
  3178     // Check to see if a field modification watch has been set before
  3179     // we take the time to call into the VM.
  3180     Label L2;
  3181     //kill AT, T1, T2, T3, T9
  3182     Register tmp1 = T2;
  3183     Register tmp2 = T1;
  3184     Register tmp3 = T3;
  3185     Register tmp4 = T9;
  3186     __ li(AT, JvmtiExport::get_field_modification_count_addr());
  3187     __ lw(tmp3, AT, 0);
  3188     __ beq(tmp3, R0, L2);
  3189     __ delayed()->nop();
  3190     __ pop_ptr(tmp1);
  3191     __ verify_oop(tmp1);
  3192     __ push_ptr(tmp1);
  3193     switch (bytecode()) {          // load values into the jvalue object
  3194     case Bytecodes::_fast_aputfield: __ push_ptr(FSR); break;
  3195     case Bytecodes::_fast_bputfield: // fall through
  3196     case Bytecodes::_fast_sputfield: // fall through
  3197     case Bytecodes::_fast_cputfield: // fall through
  3198     case Bytecodes::_fast_iputfield: __ push_i(FSR); break;
  3199     case Bytecodes::_fast_dputfield: __ push_d(FSF); break;
  3200     case Bytecodes::_fast_fputfield: __ push_f(); break;
  3201     case Bytecodes::_fast_lputfield: __ push_l(FSR); break;
  3202       default:  ShouldNotReachHere();
  3204     __ move(tmp3, SP);
  3205     // access constant pool cache entry
  3206     __ get_cache_entry_pointer_at_bcp(tmp2, FSR, 1);
  3207     __ verify_oop(tmp1);
  3208     // tmp1: object pointer copied above
  3209     // tmp2: cache entry pointer
  3210     // tmp3: jvalue object on the stack
  3211     __ call_VM(NOREG,
  3212                CAST_FROM_FN_PTR(address,
  3213                                 InterpreterRuntime::post_field_modification),
  3214                tmp1, tmp2, tmp3);
  3216     switch (bytecode()) {             // restore tos values
  3217     case Bytecodes::_fast_aputfield: __ pop_ptr(FSR); break;
  3218     case Bytecodes::_fast_bputfield: // fall through
  3219     case Bytecodes::_fast_sputfield: // fall through
  3220     case Bytecodes::_fast_cputfield: // fall through
  3221     case Bytecodes::_fast_iputfield: __ pop_i(FSR); break;
  3222     case Bytecodes::_fast_dputfield: __ pop_d(); break;
  3223     case Bytecodes::_fast_fputfield: __ pop_f(); break;
  3224     case Bytecodes::_fast_lputfield: __ pop_l(FSR); break;
  3226     __ bind(L2);
  3230 // used registers : T2, T3, T1
  3231 // T2 : index & off & field address
  3232 // T3 : cache & obj
  3233 // T1 : flags
  3234 void TemplateTable::fast_storefield(TosState state) {
  3235   transition(state, vtos);
  3237   ByteSize base = ConstantPoolCache::base_offset();
  3239   jvmti_post_fast_field_mod();
  3241   // access constant pool cache
  3242   __ get_cache_and_index_at_bcp(T3, T2, 1);
  3244   // test for volatile with edx but edx is tos register for lputfield.
  3245   __ dsll(AT, T2, Address::times_8);
  3246   __ dadd(AT, T3, AT);
  3247   __ ld(T1, AT, in_bytes(base + ConstantPoolCacheEntry::flags_offset()));
  3249   // replace index with field offset from cache entry
  3250   __ ld(T2, AT, in_bytes(base + ConstantPoolCacheEntry::f2_offset()));
  3252   // Doug Lea believes this is not needed with current Sparcs (TSO) and Intel (PSO).
  3253   // volatile_barrier( );
  3255   Label notVolatile, Done;
  3256   // Check for volatile store
  3257   __ move(AT, 1<<ConstantPoolCacheEntry::is_volatile_shift);
  3258   __ andr(AT, T1, AT);
  3259   __ beq(AT, R0, notVolatile);
  3260   __ delayed()->nop();
  3263   // Get object from stack
  3264   pop_and_check_object(T3);
  3266   // field address
  3267   __ dadd(T2, T3, T2);
  3269   // access field
  3270   switch (bytecode()) {
  3271     case Bytecodes::_fast_bputfield:
  3272       __ sb(FSR, T2, 0);
  3273       break;
  3274     case Bytecodes::_fast_sputfield: // fall through
  3275     case Bytecodes::_fast_cputfield:
  3276       __ sh(FSR, T2, 0);
  3277       break;
  3278     case Bytecodes::_fast_iputfield:
  3279       __ sw(FSR, T2, 0);
  3280       break;
  3281     case Bytecodes::_fast_lputfield:
  3282       __ sd(FSR, T2, 0 * wordSize);
  3283       break;
  3284     case Bytecodes::_fast_fputfield:
  3285       __ swc1(FSF, T2, 0);
  3286       break;
  3287     case Bytecodes::_fast_dputfield:
  3288       __ sdc1(FSF, T2, 0 * wordSize);
  3289       break;
  3290     case Bytecodes::_fast_aputfield:
  3291       __ store_heap_oop(Address(T2, 0), FSR);
  3292       __ store_check(T3);
  3293       break;
  3294     default:
  3295       ShouldNotReachHere();
  3298   Label done;
  3299   volatile_barrier( );
  3300   __ b(done);
  3301   __ delayed()->nop();
  3303   // Same code as above, but don't need edx to test for volatile.
  3304   __ bind(notVolatile);
  3305   pop_and_check_object(T3);
  3306   //get the field address
  3307   __ dadd(T2, T3, T2);
  3309   // access field
  3310   switch (bytecode()) {
  3311     case Bytecodes::_fast_bputfield:
  3312       __ sb(FSR, T2, 0);
  3313       break;
  3314     case Bytecodes::_fast_sputfield: // fall through
  3315     case Bytecodes::_fast_cputfield:
  3316       __ sh(FSR, T2, 0);
  3317       break;
  3318     case Bytecodes::_fast_iputfield:
  3319       __ sw(FSR, T2, 0);
  3320       break;
  3321     case Bytecodes::_fast_lputfield:
  3322       __ sd(FSR, T2, 0 * wordSize);
  3323       break;
  3324     case Bytecodes::_fast_fputfield:
  3325       __ swc1(FSF, T2, 0);
  3326       break;
  3327     case Bytecodes::_fast_dputfield:
  3328       __ sdc1(FSF, T2, 0 * wordSize);
  3329       break;
  3330     case Bytecodes::_fast_aputfield:
  3331       //add for compressedoops
  3332       __ store_heap_oop(Address(T2, 0), FSR);
  3333       __ store_check(T3);
  3334       break;
  3335     default:
  3336       ShouldNotReachHere();
  3338   __ bind(done);
  3341 // used registers : T2, T3, T1
  3342 // T3 : cp_entry & cache
  3343 // T2 : index & offset
  3344 void TemplateTable::fast_accessfield(TosState state) {
  3345   transition(atos, state);
  3347   // do the JVMTI work here to avoid disturbing the register state below
  3348   if (JvmtiExport::can_post_field_access()) {
  3349     // Check to see if a field access watch has been set before we take
  3350     // the time to call into the VM.
  3351     Label L1;
  3352     __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr());
  3353     __ lw(T3, AT, 0);
  3354     __ beq(T3, R0, L1);
  3355     __ delayed()->nop();
  3356     // access constant pool cache entry
  3357     __ get_cache_entry_pointer_at_bcp(T3, T1, 1);
  3358     __ move(TSR, FSR);
  3359     __ verify_oop(FSR);
  3360     // FSR: object pointer copied above
  3361     // T3: cache entry pointer
  3362     __ call_VM(NOREG,
  3363                CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access),
  3364                FSR, T3);
  3365     __ move(FSR, TSR);
  3366     __ bind(L1);
  3369   // access constant pool cache
  3370   __ get_cache_and_index_at_bcp(T3, T2, 1);
  3371   // replace index with field offset from cache entry
  3372   __ dsll(AT, T2, Address::times_8);
  3373   __ dadd(AT, T3, AT);
  3374   __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset()
  3375                          + ConstantPoolCacheEntry::f2_offset()));
  3377   // eax: object
  3378   __ verify_oop(FSR);
  3379   __ null_check(FSR);
  3380   // field addresses
  3381   __ dadd(FSR, FSR, T2);
  3383   // access field
  3384   switch (bytecode()) {
  3385     case Bytecodes::_fast_bgetfield:
  3386       __ lb(FSR, FSR, 0);
  3387       break;
  3388     case Bytecodes::_fast_sgetfield:
  3389       __ lh(FSR, FSR, 0);
  3390       break;
  3391     case Bytecodes::_fast_cgetfield:
  3392       __ lhu(FSR, FSR, 0);
  3393       break;
  3394     case Bytecodes::_fast_igetfield:
  3395       __ lw(FSR, FSR, 0);
  3396       break;
  3397     case Bytecodes::_fast_lgetfield:
  3398       __ stop("should not be rewritten");
  3399       break;
  3400     case Bytecodes::_fast_fgetfield:
  3401       __ lwc1(FSF, FSR, 0);
  3402       break;
  3403     case Bytecodes::_fast_dgetfield:
  3404       __ ldc1(FSF, FSR, 0);
  3405       break;
  3406     case Bytecodes::_fast_agetfield:
  3407       //add for compressedoops
  3408       __ load_heap_oop(FSR, Address(FSR, 0));
  3409       __ verify_oop(FSR);
  3410       break;
  3411     default:
  3412       ShouldNotReachHere();
  3415   // Doug Lea believes this is not needed with current Sparcs(TSO) and Intel(PSO)
  3416   // volatile_barrier( );
  3419 // generator for _fast_iaccess_0, _fast_aaccess_0, _fast_faccess_0
  3420 // used registers : T1, T2, T3, T1
  3421 // T1 : obj & field address
  3422 // T2 : off
  3423 // T3 : cache
  3424 // T1 : index
  3425 void TemplateTable::fast_xaccess(TosState state) {
  3426   transition(vtos, state);
  3428   // get receiver
  3429   __ ld(T1, aaddress(0));
  3430   // access constant pool cache
  3431   __ get_cache_and_index_at_bcp(T3, T2, 2);
  3432   __ dsll(AT, T2, Address::times_8);
  3433   __ dadd(AT, T3, AT);
  3434   __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
  3436   // make sure exception is reported in correct bcp range (getfield is
  3437   // next instruction)
  3438   __ daddi(BCP, BCP, 1);
  3439   __ null_check(T1);
  3440   __ dadd(T1, T1, T2);
  3442   if (state == itos) {
  3443     __ lw(FSR, T1, 0);
  3444   } else if (state == atos) {
  3445     __ load_heap_oop(FSR, Address(T1, 0));
  3446     __ verify_oop(FSR);
  3447   } else if (state == ftos) {
  3448     __ lwc1(FSF, T1, 0);
  3449   } else {
  3450     ShouldNotReachHere();
  3452   __ daddi(BCP, BCP, -1);
  3457 //-----------------------------------------------------------------------------
  3458 // Calls
  3460 void TemplateTable::count_calls(Register method, Register temp) {
  3461   // implemented elsewhere
  3462   ShouldNotReachHere();
  3465 // method, index, recv, flags: T1, T2, T3, T1
  3466 // byte_no = 2 for _invokevirtual, 1 else
  3467 // T0 : return address
  3468 // get the method & index of the invoke, and push the return address of
  3469 // the invoke(first word in the frame)
  3470 // this address is where the return code jmp to.
  3471 // NOTE : this method will set T3&T1 as recv&flags
  3472 void TemplateTable::prepare_invoke(int byte_no,
  3473                                    Register method,  // linked method (or i-klass)
  3474                                    Register index,   // itable index, MethodType, etc.
  3475                                    Register recv,    // if caller wants to see it
  3476                                    Register flags    // if caller wants to test it
  3477                                    ) {
  3478   // determine flags
  3479   const Bytecodes::Code code = bytecode();
  3480   const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
  3481   const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
  3482   const bool is_invokehandle     = code == Bytecodes::_invokehandle;
  3483   const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
  3484   const bool is_invokespecial    = code == Bytecodes::_invokespecial;
  3485   const bool load_receiver       = (recv  != noreg);
  3486   const bool save_flags          = (flags != noreg);
  3487   assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic),"");
  3488   assert(save_flags    == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
  3489   assert(flags == noreg || flags == T1, "error flags reg.");
  3490   assert(recv  == noreg || recv  == T3, "error recv reg.");
  3492   // setup registers & access constant pool cache
  3493   if(recv == noreg) recv  = T3;
  3494   if(flags == noreg) flags  = T1;
  3495   assert_different_registers(method, index, recv, flags);
  3497   // save 'interpreter return address'
  3498   __ save_bcp();
  3500   load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
  3502   if (is_invokedynamic || is_invokehandle) {
  3503    Label L_no_push;
  3504      __ move(AT, (1 << ConstantPoolCacheEntry::has_appendix_shift));
  3505      __ andr(AT, AT, flags);
  3506      __ beq(AT, R0, L_no_push);
  3507      __ delayed()->nop();
  3508      // Push the appendix as a trailing parameter.
  3509      // This must be done before we get the receiver,
  3510      // since the parameter_size includes it.
  3511      Register tmp = SSR;
  3512      __ push(tmp);
  3513      __ move(tmp, index);
  3514      assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0");
  3515      __ load_resolved_reference_at_index(index, tmp);
  3516      __ pop(tmp);
  3517      __ push(index);  // push appendix (MethodType, CallSite, etc.)
  3518      __ bind(L_no_push);
  3521   // load receiver if needed (after appendix is pushed so parameter size is correct)
  3522   // Note: no return address pushed yet
  3523   if (load_receiver) {
  3524     __ move(AT, ConstantPoolCacheEntry::parameter_size_mask);
  3525     __ andr(recv, flags, AT);
  3526     // 2014/07/31 Fu: Since we won't push RA on stack, no_return_pc_pushed_yet should be 0.
  3527     const int no_return_pc_pushed_yet = 0;  // argument slot correction before we push return address
  3528     const int receiver_is_at_end      = -1;  // back off one slot to get receiver
  3529     Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end);
  3530     __ ld(recv, recv_addr);
  3531     __ verify_oop(recv);
  3533   if(save_flags) {
  3534     __ move(BCP, flags);
  3537   // compute return type
  3538   __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
  3539   __ andi(flags, flags, 0xf);
  3541   // Make sure we don't need to mask flags for tos_state_shift after the above shift
  3542   ConstantPoolCacheEntry::verify_tos_state_shift();
  3543   // load return address
  3545     const address table = (address) Interpreter::invoke_return_entry_table_for(code);
  3546     __ li(AT, (long)table);
  3547     __ dsll(flags, flags, LogBytesPerWord);
  3548     __ dadd(AT, AT, flags);
  3549     __ ld(RA, AT, 0);
  3552   if (save_flags) {
  3553     __ move(flags, BCP);
  3554     __ restore_bcp();
  3558 // used registers : T0, T3, T1, T2
  3559 // T3 : recv, this two register using convention is by prepare_invoke
  3560 // T1 : flags, klass
  3561 // Rmethod : method, index must be Rmethod
  3562 void TemplateTable::invokevirtual_helper(Register index,
  3563                                          Register recv,
  3564                                          Register flags) {
  3566   assert_different_registers(index, recv, flags, T2);
  3568   // Test for an invoke of a final method
  3569   Label notFinal;
  3570   __ move(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
  3571   __ andr(AT, flags, AT);
  3572   __ beq(AT, R0, notFinal);
  3573   __ delayed()->nop();
  3575   Register method = index;  // method must be Rmethod
  3576   assert(method == Rmethod, "methodOop must be Rmethod for interpreter calling convention");
  3578   // do the call - the index is actually the method to call
  3579   // the index is indeed methodOop, for this is vfinal,
  3580   // see ConstantPoolCacheEntry::set_method for more info
  3582   __ verify_oop(method);
  3584   // It's final, need a null check here!
  3585   __ null_check(recv);
  3587   // profile this call
  3588   __ profile_final_call(T2);
  3590   // 2014/11/24 Fu
  3591   // T2: tmp, used for mdp
  3592   // method: callee
  3593   // T9: tmp
  3594   // is_virtual: true
  3595   __ profile_arguments_type(T2, method, T9, true);
  3597   __ jump_from_interpreted(method, T2);
  3599   __ bind(notFinal);
  3601   // get receiver klass
  3602   __ null_check(recv, oopDesc::klass_offset_in_bytes());
  3603   __ load_klass(T2, recv);
  3604   __ verify_oop(T2);
  3606   // profile this call
  3607   __ profile_virtual_call(T2, T0, T1);
  3609   // get target methodOop & entry point
  3610   const int base = InstanceKlass::vtable_start_offset() * wordSize;
  3611   assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
  3612   __ dsll(AT, index, Address::times_ptr);
  3613   // T2: receiver
  3614   __ dadd(AT, T2, AT);
  3615   //this is a ualign read
  3616   __ ld(method, AT, base + vtableEntry::method_offset_in_bytes());
  3617   __ profile_arguments_type(T2, method, T9, true);
  3618   __ jump_from_interpreted(method, T2);
  3622 void TemplateTable::invokevirtual(int byte_no) {
  3623   transition(vtos, vtos);
  3624   assert(byte_no == f2_byte, "use this argument");
  3625   prepare_invoke(byte_no, Rmethod, NOREG, T3, T1);
  3626   // now recv & flags in T3, T1
  3627   invokevirtual_helper(Rmethod, T3, T1);
  3630 // T9 : entry
  3631 // Rmethod : method
  3632 void TemplateTable::invokespecial(int byte_no) {
  3633   transition(vtos, vtos);
  3634   assert(byte_no == f1_byte, "use this argument");
  3635   prepare_invoke(byte_no, Rmethod, NOREG, T3);
  3636   // now recv & flags in T3, T1
  3637   __ verify_oop(T3);
  3638   __ null_check(T3);
  3639   __ profile_call(T9);
  3641   // 2014/11/24 Fu
  3642   // T8: tmp, used for mdp
  3643   // Rmethod: callee
  3644   // T9: tmp
  3645   // is_virtual: false
  3646   __ profile_arguments_type(T8, Rmethod, T9, false);
  3648   __ jump_from_interpreted(Rmethod, T9);
  3649   __ move(T0, T3);//aoqi ?
  3652 void TemplateTable::invokestatic(int byte_no) {
  3653   transition(vtos, vtos);
  3654   assert(byte_no == f1_byte, "use this argument");
  3655   prepare_invoke(byte_no, Rmethod, NOREG);
  3656   __ verify_oop(Rmethod);
  3658   __ profile_call(T9);
  3660   // 2014/11/24 Fu
  3661   // T8: tmp, used for mdp
  3662   // Rmethod: callee
  3663   // T9: tmp
  3664   // is_virtual: false
  3665   __ profile_arguments_type(T8, Rmethod, T9, false);
  3667   __ jump_from_interpreted(Rmethod, T9);
  3670 // i have no idea what to do here, now. for future change. FIXME.
  3671 void TemplateTable::fast_invokevfinal(int byte_no) {
  3672   transition(vtos, vtos);
  3673   assert(byte_no == f2_byte, "use this argument");
  3674   __ stop("fast_invokevfinal not used on mips64");
  3677 // used registers : T0, T1, T2, T3, T1, A7
  3678 // T0 : itable, vtable, entry
  3679 // T1 : interface
  3680 // T3 : receiver
  3681 // T1 : flags, klass
  3682 // Rmethod : index, method, this is required by interpreter_entry
  3683 void TemplateTable::invokeinterface(int byte_no) {
  3684   transition(vtos, vtos);
  3685   //this method will use T1-T4 and T0
  3686   assert(byte_no == f1_byte, "use this argument");
  3687   prepare_invoke(byte_no, T2, Rmethod, T3, T1);
  3688   // T2: Interface
  3689   // Rmethod: index
  3690   // T3: receiver
  3691   // T1: flags
  3693   // Special case of invokeinterface called for virtual method of
  3694   // java.lang.Object.  See cpCacheOop.cpp for details.
  3695   // This code isn't produced by javac, but could be produced by
  3696   // another compliant java compiler.
  3697   Label notMethod;
  3698   __ move(AT, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift));
  3699   __ andr(AT, T1, AT);
  3700   __ beq(AT, R0, notMethod);
  3701   __ delayed()->nop();
  3703   invokevirtual_helper(Rmethod, T3, T1);
  3704   __ bind(notMethod);
  3705   // Get receiver klass into T1 - also a null check
  3706   //add for compressedoops
  3707   __ load_klass(T1, T3);
  3708   __ verify_oop(T1);
  3710   // profile this call
  3711   __ profile_virtual_call(T1, T0, FSR);
  3713   // Compute start of first itableOffsetEntry (which is at the end of the vtable)
  3714   // TODO: x86 add a new method lookup_interface_method  // LEE
  3715   const int base = InstanceKlass::vtable_start_offset() * wordSize;
  3716   assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below");
  3717   __ lw(AT, T1, InstanceKlass::vtable_length_offset() * wordSize);
  3718   __ dsll(AT, AT, Address::times_8);
  3719   __ dadd(T0, T1, AT);
  3720   __ daddi(T0, T0, base);
  3721   if (HeapWordsPerLong > 1) {
  3722     // Round up to align_object_offset boundary
  3723     __ round_to(T0, BytesPerLong);
  3725   // now T0 is the begin of the itable
  3727   Label entry, search, interface_ok;
  3729   ///__ jmp(entry);
  3730   __ b(entry);
  3731   __ delayed()->nop();
  3733   __ bind(search);
  3734   __ increment(T0, itableOffsetEntry::size() * wordSize);
  3736   __ bind(entry);
  3738   // Check that the entry is non-null.  A null entry means that the receiver
  3739   // class doesn't implement the interface, and wasn't the same as the
  3740   // receiver class checked when the interface was resolved.
  3741   __ ld(AT, T0, itableOffsetEntry::interface_offset_in_bytes());
  3742   __ bne(AT, R0, interface_ok);
  3743   __ delayed()->nop();
  3744   // throw exception
  3745   // the call_VM checks for exception, so we should never return here.
  3747   //__ pop();//FIXME here,
  3748   // pop return address (pushed by prepare_invoke).
  3749   // no need now, we just save the value in RA now
  3751   __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeError));
  3752   __ should_not_reach_here();
  3754   __ bind(interface_ok);
  3755   //NOTICE here, no pop as x86 do
  3756   __ bne(AT, T2, search);
  3757   __ delayed()->nop();
  3759   // now we get vtable of the interface
  3760   __ ld(T0, T0, itableOffsetEntry::offset_offset_in_bytes());
  3761   __ daddu(T0, T1, T0);
  3762   assert(itableMethodEntry::size() * wordSize == 8, "adjust the scaling in the code below");
  3763   __ dsll(AT, Rmethod, Address::times_8);
  3764   __ daddu(AT, T0, AT);
  3765   // now we get the method
  3766   __ ld(Rmethod, AT, 0);
  3767   // Rnext: methodOop to call
  3768   // T3: receiver
  3769   // Check for abstract method error
  3770   // Note: This should be done more efficiently via a throw_abstract_method_error
  3771   //       interpreter entry point and a conditional jump to it in case of a null
  3772   //       method.
  3774     Label L;
  3775     __ bne(Rmethod, R0, L);
  3776     __ delayed()->nop();
  3778     // throw exception
  3779     // note: must restore interpreter registers to canonical
  3780     //       state for exception handling to work correctly!
  3781     ///__ popl(ebx);          // pop return address (pushed by prepare_invoke)
  3782     //__ restore_bcp();      // esi must be correct for exception handler
  3783     //(was destroyed)
  3784     //__ restore_locals();   // make sure locals pointer
  3785     //is correct as well (was destroyed)
  3786     ///__ call_VM(noreg, CAST_FROM_FN_PTR(address,
  3787     //InterpreterRuntime::throw_AbstractMethodError));
  3788     __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
  3789     // the call_VM checks for exception, so we should never return here.
  3790     __ should_not_reach_here();
  3791     __ bind(L);
  3794   // 2014/11/24 Fu
  3795   // T8: tmp, used for mdp
  3796   // Rmethod: callee
  3797   // T9: tmp
  3798   // is_virtual: true
  3799   __ profile_arguments_type(T8, Rmethod, T9, true);
  3801   __ jump_from_interpreted(Rmethod, T9);
  3805 void TemplateTable::invokehandle(int byte_no) {
  3806   transition(vtos, vtos);
  3807   assert(byte_no == f1_byte, "use this argument");
  3808   const Register T2_method = Rmethod;
  3809   const Register FSR_mtype  = FSR;
  3810   const Register T3_recv   = T3;
  3812   if (!EnableInvokeDynamic) {
  3813      // rewriter does not generate this bytecode
  3814      __ should_not_reach_here();
  3815      return;
  3818    prepare_invoke(byte_no, T2_method, FSR_mtype, T3_recv);
  3819    //??__ verify_method_ptr(T2_method);
  3820    __ verify_oop(T3_recv);
  3821    __ null_check(T3_recv);
  3823    // rax: MethodType object (from cpool->resolved_references[f1], if necessary)
  3824    // rbx: MH.invokeExact_MT method (from f2)
  3826    // Note:  rax_mtype is already pushed (if necessary) by prepare_invoke
  3828    // FIXME: profile the LambdaForm also
  3829    __ profile_final_call(T9);
  3831    // 2014/11/24 Fu
  3832    // T8: tmp, used for mdp
  3833    // T2_method: callee
  3834    // T9: tmp
  3835    // is_virtual: true
  3836    __ profile_arguments_type(T8, T2_method, T9, true);
  3838   __ jump_from_interpreted(T2_method, T9);
  3841  void TemplateTable::invokedynamic(int byte_no) {
  3842    transition(vtos, vtos);
  3843    assert(byte_no == f1_byte, "use this argument");
  3845    if (!EnableInvokeDynamic) {
  3846      // We should not encounter this bytecode if !EnableInvokeDynamic.
  3847      // The verifier will stop it.  However, if we get past the verifier,
  3848      // this will stop the thread in a reasonable way, without crashing the JVM.
  3849      __ call_VM(noreg, CAST_FROM_FN_PTR(address,
  3850                       InterpreterRuntime::throw_IncompatibleClassChangeError));
  3851      // the call_VM checks for exception, so we should never return here.
  3852      __ should_not_reach_here();
  3853      return;
  3856    //const Register Rmethod   = T2;
  3857    const Register T2_callsite = T2;
  3859    prepare_invoke(byte_no, Rmethod, T2_callsite);
  3861    // rax: CallSite object (from cpool->resolved_references[f1])
  3862    // rbx: MH.linkToCallSite method (from f2)
  3864    // Note:  rax_callsite is already pushed by prepare_invoke
  3865    // %%% should make a type profile for any invokedynamic that takes a ref argument
  3866    // profile this call
  3867    __ profile_call(T9);
  3869    // 2014/11/24 Fu
  3870    // T8: tmp, used for mdp
  3871    // Rmethod: callee
  3872    // T9: tmp
  3873    // is_virtual: false
  3874    __ profile_arguments_type(T8, Rmethod, T9, false);
  3876    __ verify_oop(T2_callsite);
  3878    __ jump_from_interpreted(Rmethod, T9);
  3881 //-----------------------------------------------------------------------------
  3882 // Allocation
  3883 // T1 : tags & buffer end & thread
  3884 // T2 : object end
  3885 // T3 : klass
  3886 // T1 : object size
  3887 // A1 : cpool
  3888 // A2 : cp index
  3889 // return object in FSR
  3890 void TemplateTable::_new() {
  3891   transition(vtos, atos);
  3892   __ get_unsigned_2_byte_index_at_bcp(A2, 1);
  3894   Label slow_case;
  3895   Label done;
  3896   Label initialize_header;
  3897   Label initialize_object; // including clearing the fields
  3898   Label allocate_shared;
  3900   // get InstanceKlass in T3
  3901   __ get_cpool_and_tags(A1, T1);
  3903   __ dsll(AT, A2, Address::times_8);
  3904   if (UseLoongsonISA && Assembler::is_simm(sizeof(ConstantPool), 8)) {
  3905     __ gsldx(T3, A1, AT, sizeof(ConstantPool));
  3906   } else {
  3907     __ dadd(AT, A1, AT);
  3908     __ ld(T3, AT, sizeof(ConstantPool));
  3911   // make sure the class we're about to instantiate has been resolved.
  3912   // Note: slow_case does a pop of stack, which is why we loaded class/pushed above
  3913   const int tags_offset = Array<u1>::base_offset_in_bytes();
  3914   if (UseLoongsonISA && Assembler::is_simm(tags_offset, 8)) {
  3915     __ gslbx(AT, T1, A2, tags_offset);
  3916   } else {
  3917     __ dadd(T1, T1, A2);
  3918     __ lb(AT, T1, tags_offset);
  3920   __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
  3921   __ bne(AT, R0, slow_case);
  3922   //__ delayed()->nop();
  3925   // make sure klass is initialized & doesn't have finalizer
  3926   // make sure klass is fully initialized
  3927   __ lhu(T1, T3, in_bytes(InstanceKlass::init_state_offset()));
  3928   __ daddiu(AT, T1, - (int)InstanceKlass::fully_initialized);
  3929   __ bne(AT, R0, slow_case);
  3930   //__ delayed()->nop();
  3932   // has_finalizer
  3933   __ lw(T0, T3, in_bytes(Klass::layout_helper_offset()) );
  3934   __ andi(AT, T0, Klass::_lh_instance_slow_path_bit);
  3935   __ bne(AT, R0, slow_case);
  3936   //__ delayed()->nop();
  3938   // Allocate the instance
  3939   // 1) Try to allocate in the TLAB
  3940   // 2) if fail and the object is large allocate in the shared Eden
  3941   // 3) if the above fails (or is not applicable), go to a slow case
  3942   // (creates a new TLAB, etc.)
  3944   const bool allow_shared_alloc =
  3945     Universe::heap()->supports_inline_contig_alloc() && !CMSIncrementalMode;
  3947   if (UseTLAB) {
  3948 #ifndef OPT_THREAD
  3949     const Register thread = T8;
  3950     __ get_thread(thread);
  3951 #else
  3952     const Register thread = TREG;
  3953 #endif
  3954     // get tlab_top
  3955     __ ld(FSR, thread, in_bytes(JavaThread::tlab_top_offset()));
  3956     // get tlab_end
  3957     __ ld(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
  3958     __ dadd(T2, FSR, T0);
  3959     __ slt(AT, AT, T2);
  3960     __ bne(AT, R0, allow_shared_alloc ? allocate_shared : slow_case);
  3961     __ delayed()->nop();
  3962     __ sd(T2, thread, in_bytes(JavaThread::tlab_top_offset()));
  3964     if (ZeroTLAB) {
  3965       // the fields have been already cleared
  3966       __ beq(R0, R0, initialize_header);
  3967     } else {
  3968       // initialize both the header and fields
  3969       __ beq(R0, R0, initialize_object);
  3971     __ delayed()->nop();
  3974   // Allocation in the shared Eden , if allowed
  3975   // T0 : instance size in words
  3976   if(allow_shared_alloc){
  3977     __ bind(allocate_shared);
  3979     Label retry;
  3980     Address heap_top(T1);
  3981     __ set64(T1, (long)Universe::heap()->top_addr());
  3982     __ ld(FSR, heap_top);
  3984     __ bind(retry);
  3985     __ set64(AT, (long)Universe::heap()->end_addr());
  3986     __ ld(AT, AT, 0);
  3987     __ dadd(T2, FSR, T0);
  3988     __ slt(AT, AT, T2);
  3989     __ bne(AT, R0, slow_case);
  3990     __ delayed()->nop();
  3992     // Compare FSR with the top addr, and if still equal, store the new
  3993     // top addr in ebx at the address of the top addr pointer. Sets ZF if was
  3994     // equal, and clears it otherwise. Use lock prefix for atomicity on MPs.
  3995     //
  3996     // FSR: object begin
  3997     // T2: object end
  3998     // T0: instance size in words
  4000     // if someone beat us on the allocation, try again, otherwise continue
  4001     __ cmpxchg(T2, heap_top, FSR);
  4002     __ beq(AT, R0, retry);
  4003     __ delayed()->nop();
  4006   if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
  4007     // The object is initialized before the header.  If the object size is
  4008     // zero, go directly to the header initialization.
  4009     __ bind(initialize_object);
  4010     __ set64(AT, - sizeof(oopDesc));
  4011     __ daddu(T0, T0, AT);
  4012     __ beq(T0, R0, initialize_header);
  4013     __ delayed()->nop();
  4015     // initialize remaining object fields: T0 is a multiple of 2
  4017       Label loop;
  4018       __ dadd(T1, FSR, T0);
  4019       __ daddi(T1, T1, -oopSize);
  4021       __ bind(loop);
  4022       __ sd(R0, T1, sizeof(oopDesc) + 0 * oopSize);
  4023       __ bne(T1, FSR, loop); //dont clear header
  4024       __ delayed()->daddi(T1, T1, -oopSize);
  4027     //klass in T3,
  4028     // initialize object header only.
  4029     __ bind(initialize_header);
  4030     if (UseBiasedLocking) {
  4031       __ ld(AT, T3, in_bytes(Klass::prototype_header_offset()));
  4032       __ sd(AT, FSR, oopDesc::mark_offset_in_bytes ());
  4033     } else {
  4034       __ set64(AT, (long)markOopDesc::prototype());
  4035       __ sd(AT, FSR, oopDesc::mark_offset_in_bytes());
  4038     __ store_klass_gap(FSR, R0);
  4039     __ store_klass(FSR, T3);
  4042       SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0);
  4043       // Trigger dtrace event for fastpath
  4044       __ push(atos);
  4045       __ call_VM_leaf(
  4046            CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), FSR);
  4047       __ pop(atos);
  4050     __ b(done);
  4051     __ delayed()->nop();
  4054   // slow case
  4055   __ bind(slow_case);
  4056   call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), A1, A2);
  4058   // continue
  4059   __ bind(done);
  4060   __ sync();
  4063 void TemplateTable::newarray() {
  4064   transition(itos, atos);
  4065   __ lbu(A1, at_bcp(1));
  4066   //type, count
  4067   call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR);
  4068   __ sync();
  4071 void TemplateTable::anewarray() {
  4072   transition(itos, atos);
  4073   __ get_2_byte_integer_at_bcp(A2, AT, 1);
  4074   __ huswap(A2);
  4075   __ get_constant_pool(A1);
  4076   // cp, index, count
  4077   call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), A1, A2, FSR);
  4078   __ sync();
  4081 void TemplateTable::arraylength() {
  4082   transition(atos, itos);
  4083   __ null_check(FSR, arrayOopDesc::length_offset_in_bytes());
  4084   __ lw(FSR, FSR, arrayOopDesc::length_offset_in_bytes());
  4087 // i use T2 as ebx, T3 as ecx, T1 as edx
  4088 // when invoke gen_subtype_check, super in T3, sub in T2, object in FSR(it's always)
  4089 // T2 : sub klass
  4090 // T3 : cpool
  4091 // T3 : super klass
  4092 void TemplateTable::checkcast() {
  4093   transition(atos, atos);
  4094   Label done, is_null, ok_is_subtype, quicked, resolved;
  4095   __ beq(FSR, R0, is_null);
  4096   __ delayed()->nop();
  4098   // Get cpool & tags index
  4099   __ get_cpool_and_tags(T3, T1);
  4100   __ get_2_byte_integer_at_bcp(T2, AT, 1);
  4101   __ huswap(T2);
  4103   // See if bytecode has already been quicked
  4104   __ dadd(AT, T1, T2);
  4105   __ lb(AT, AT, Array<u1>::base_offset_in_bytes());
  4106   __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
  4107   __ beq(AT, R0, quicked);
  4108   __ delayed()->nop();
  4110   /* 2012/6/2 Jin: In InterpreterRuntime::quicken_io_cc, lots of new classes may be loaded.
  4111    *  Then, GC will move the object in V0 to another places in heap.
  4112    *  Therefore, We should never save such an object in register.
  4113    *  Instead, we should save it in the stack. It can be modified automatically by the GC thread.
  4114    *  After GC, the object address in FSR is changed to a new place.
  4115    */
  4116   __ push(atos);
  4117   const Register thread = TREG;
  4118 #ifndef OPT_THREAD
  4119   __ get_thread(thread);
  4120 #endif
  4121   call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
  4122   __ get_vm_result_2(T3, thread);
  4123   __ pop_ptr(FSR);
  4124   __ b(resolved);
  4125   __ delayed()->nop();
  4127   // klass already in cp, get superklass in T3
  4128   __ bind(quicked);
  4129   __ dsll(AT, T2, Address::times_8);
  4130   __ dadd(AT, T3, AT);
  4131   __ ld(T3, AT, sizeof(ConstantPool));
  4133   __ bind(resolved);
  4135   // get subklass in T2
  4136   //add for compressedoops
  4137   __ load_klass(T2, FSR);
  4138   // Superklass in T3.  Subklass in T2.
  4139   __ gen_subtype_check(T3, T2, ok_is_subtype);
  4141   // Come here on failure
  4142   // object is at FSR
  4143   __ jmp(Interpreter::_throw_ClassCastException_entry);
  4144   __ delayed()->nop();
  4146   // Come here on success
  4147   __ bind(ok_is_subtype);
  4149   // Collect counts on whether this check-cast sees NULLs a lot or not.
  4150   if (ProfileInterpreter) {
  4151     __ b(done);
  4152     __ delayed()->nop();
  4153     __ bind(is_null);
  4154     __ profile_null_seen(T3);
  4155   } else {
  4156     __ bind(is_null);
  4158   __ bind(done);
  4161 // i use T3 as cpool, T1 as tags, T2 as index
  4162 // object always in FSR, superklass in T3, subklass in T2
  4163 void TemplateTable::instanceof() {
  4164   transition(atos, itos);
  4165   Label done, is_null, ok_is_subtype, quicked, resolved;
  4167   __ beq(FSR, R0, is_null);
  4168   __ delayed()->nop();
  4170   // Get cpool & tags index
  4171   __ get_cpool_and_tags(T3, T1);
  4172   // get index
  4173   __ get_2_byte_integer_at_bcp(T2, AT, 1);
  4174   __ hswap(T2);
  4176   // See if bytecode has already been quicked
  4177   // quicked
  4178   __ daddu(AT, T1, T2);
  4179   __ lb(AT, AT, Array<u1>::base_offset_in_bytes());
  4180   __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
  4181   __ beq(AT, R0, quicked);
  4182   __ delayed()->nop();
  4184   __ push(atos);
  4185   const Register thread = TREG;
  4186 #ifndef OPT_THREAD
  4187   __ get_thread(thread);
  4188 #endif
  4189   call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
  4190   __ get_vm_result_2(T3, thread);
  4191   __ pop_ptr(FSR);
  4192   __ b(resolved);
  4193   __ delayed()->nop();
  4195   // get superklass in T3, subklass in T2
  4196   __ bind(quicked);
  4197   __ dsll(AT, T2, Address::times_8);
  4198   __ daddu(AT, T3, AT);
  4199   __ ld(T3, AT, sizeof(ConstantPool));
  4201   __ bind(resolved);
  4202   // get subklass in T2
  4203   //add for compressedoops
  4204   __ load_klass(T2, FSR);
  4206   // Superklass in T3.  Subklass in T2.
  4207   __ gen_subtype_check(T3, T2, ok_is_subtype);
  4208   // Come here on failure
  4209   __ b(done);
  4210   __ delayed(); __ move(FSR, R0);
  4212   // Come here on success
  4213   __ bind(ok_is_subtype);
  4214   __ move(FSR, 1);
  4216   // Collect counts on whether this test sees NULLs a lot or not.
  4217   if (ProfileInterpreter) {
  4218     __ beq(R0, R0, done);
  4219     __ nop();
  4220     __ bind(is_null);
  4221     __ profile_null_seen(T3);
  4222   } else {
  4223     __ bind(is_null);   // same as 'done'
  4225   __ bind(done);
  4226   // FSR = 0: obj == NULL or  obj is not an instanceof the specified klass
  4227   // FSR = 1: obj != NULL and obj is     an instanceof the specified klass
  4230 //--------------------------------------------------------
  4231 //--------------------------------------------
  4232 // Breakpoints
  4233 void TemplateTable::_breakpoint() {
  4234   // Note: We get here even if we are single stepping..
  4235   // jbug inists on setting breakpoints at every bytecode
  4236   // even if we are in single step mode.
  4238   transition(vtos, vtos);
  4240   // get the unpatched byte code
  4241   __ get_method(A1);
  4242   __ call_VM(NOREG,
  4243              CAST_FROM_FN_PTR(address,
  4244                               InterpreterRuntime::get_original_bytecode_at),
  4245              A1, BCP);
  4246   __ move(Rnext, V0); // Jin: Rnext will be used in dispatch_only_normal
  4248   // post the breakpoint event
  4249   __ get_method(A1);
  4250   __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), A1, BCP);
  4252   // complete the execution of original bytecode
  4253   __ dispatch_only_normal(vtos);
  4256 //-----------------------------------------------------------------------------
  4257 // Exceptions
  4259 void TemplateTable::athrow() {
  4260   transition(atos, vtos);
  4261   __ null_check(FSR);
  4262   __ jmp(Interpreter::throw_exception_entry());
  4263   __ delayed()->nop();
  4266 //-----------------------------------------------------------------------------
  4267 // Synchronization
  4268 //
  4269 // Note: monitorenter & exit are symmetric routines; which is reflected
  4270 //       in the assembly code structure as well
  4271 //
  4272 // Stack layout:
  4273 //
  4274 // [expressions  ] <--- SP               = expression stack top
  4275 // ..
  4276 // [expressions  ]
  4277 // [monitor entry] <--- monitor block top = expression stack bot
  4278 // ..
  4279 // [monitor entry]
  4280 // [frame data   ] <--- monitor block bot
  4281 // ...
  4282 // [return addr  ] <--- FP
  4284 // we use T2 as monitor entry pointer, T3 as monitor top pointer, c_rarg0 as free slot pointer
  4285 // object always in FSR
  4286 void TemplateTable::monitorenter() {
  4287   transition(atos, vtos);
  4289   // check for NULL object
  4290   __ null_check(FSR);
  4292   const Address monitor_block_top(FP, frame::interpreter_frame_monitor_block_top_offset
  4293       * wordSize);
  4294   const int entry_size = (frame::interpreter_frame_monitor_size()* wordSize);
  4295   Label allocated;
  4297   // initialize entry pointer
  4298   __ move(c_rarg0, R0);
  4300   // find a free slot in the monitor block (result in edx)
  4302     Label entry, loop, exit, next;
  4303     __ ld(T2, monitor_block_top);
  4304     __ b(entry);
  4305     __ delayed()->daddi(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
  4307     // free slot?
  4308     __ bind(loop);
  4309     __ ld(AT, T2, BasicObjectLock::obj_offset_in_bytes());
  4310     __ bne(AT, R0, next);
  4311     __ delayed()->nop();
  4312     __ move(c_rarg0, T2);
  4314     __ bind(next);
  4315     __ beq(FSR, AT, exit);
  4316     __ delayed()->nop();
  4317     __ daddi(T2, T2, entry_size);
  4319     __ bind(entry);
  4320     __ bne(T3, T2, loop);
  4321     __ delayed()->nop();
  4322     __ bind(exit);
  4325   __ bne(c_rarg0, R0, allocated);
  4326   __ delayed()->nop();
  4328   // allocate one if there's no free slot
  4330     Label entry, loop;
  4331     // 1. compute new pointers                   // SP: old expression stack top
  4332     __ ld(c_rarg0, monitor_block_top);
  4333     __ daddi(SP, SP, - entry_size);
  4334     __ daddi(c_rarg0, c_rarg0, - entry_size);
  4335     __ sd(c_rarg0, monitor_block_top);
  4336     __ b(entry);
  4337     __ delayed(); __ move(T3, SP);
  4339     // 2. move expression stack contents
  4340     __ bind(loop);
  4341     __ ld(AT, T3, entry_size);
  4342     __ sd(AT, T3, 0);
  4343     __ daddi(T3, T3, wordSize);
  4344     __ bind(entry);
  4345     __ bne(T3, c_rarg0, loop);
  4346     __ delayed()->nop();
  4349   __ bind(allocated);
  4350   // Increment bcp to point to the next bytecode,
  4351   // so exception handling for async. exceptions work correctly.
  4352   // The object has already been poped from the stack, so the
  4353   // expression stack looks correct.
  4354   __ daddi(BCP, BCP, 1);
  4355   __ sd(FSR, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
  4356   __ lock_object(c_rarg0);
  4357   // check to make sure this monitor doesn't cause stack overflow after locking
  4358   __ save_bcp();  // in case of exception
  4359   __ generate_stack_overflow_check(0);
  4360   // The bcp has already been incremented. Just need to dispatch to next instruction.
  4362   __ dispatch_next(vtos);
  4365 // T2 : top
  4366 // c_rarg0 : entry
  4367 void TemplateTable::monitorexit() {
  4368   transition(atos, vtos);
  4370   __ null_check(FSR);
  4372   const int entry_size =(frame::interpreter_frame_monitor_size()* wordSize);
  4373   Label found;
  4375   // find matching slot
  4377     Label entry, loop;
  4378     __ ld(c_rarg0, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
  4379     __ b(entry);
  4380     __ delayed()->daddiu(T2, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
  4382     __ bind(loop);
  4383     __ ld(AT, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
  4384     __ beq(FSR, AT, found);
  4385     __ delayed()->nop();
  4386     __ daddiu(c_rarg0, c_rarg0, entry_size);
  4387     __ bind(entry);
  4388     __ bne(T2, c_rarg0, loop);
  4389     __ delayed()->nop();
  4392   // error handling. Unlocking was not block-structured
  4393   Label end;
  4394   __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
  4395   InterpreterRuntime::throw_illegal_monitor_state_exception));
  4396   __ should_not_reach_here();
  4398   // call run-time routine
  4399   // c_rarg0: points to monitor entry
  4400   __ bind(found);
  4401   __ move(TSR, FSR);
  4402   __ unlock_object(c_rarg0);
  4403   __ move(FSR, TSR);
  4404   __ bind(end);
  4408 // Wide instructions
  4409 void TemplateTable::wide() {
  4410   transition(vtos, vtos);
  4411   // Note: the esi increment step is part of the individual wide bytecode implementations
  4412   __ lbu(Rnext, at_bcp(1));
  4413   __ dsll(T9, Rnext, Address::times_8);
  4414   __ li(AT, (long)Interpreter::_wentry_point);
  4415   __ dadd(AT, T9, AT);
  4416   __ ld(T9, AT, 0);
  4417   __ jr(T9);
  4418   __ delayed()->nop();
  4422 void TemplateTable::multianewarray() {
  4423   transition(vtos, atos);
  4424   // last dim is on top of stack; we want address of first one:
  4425   // first_addr = last_addr + (ndims - 1) * wordSize
  4426   __ lbu(A1, at_bcp(3));  // dimension
  4427   __ daddi(A1, A1, -1);
  4428   __ dsll(A1, A1, Address::times_8);
  4429   __ dadd(A1, SP, A1);    // now A1 pointer to the count array on the stack
  4430   call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), A1);
  4431   __ lbu(AT, at_bcp(3));
  4432   __ dsll(AT, AT, Address::times_8);
  4433   __ dadd(SP, SP, AT);
  4434   __ sync();
  4436 #endif // !CC_INTERP

mercurial