src/cpu/mips/vm/templateTable_mips_64.cpp

Tue, 24 Oct 2017 14:04:09 +0800

author
fujie
date
Tue, 24 Oct 2017 14:04:09 +0800
changeset 8001
76b73e112cb7
parent 7998
367db633bbea
child 8003
8363e2ada4b5
permissions
-rw-r--r--

[Assembler] Complex address modes support for Assembler::lea(Register rt, Address src), Assembler::sd(Register rt, Address dst) and Assembler::sw(Register rt, Address dst)

     1 /*
     2  * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
     3  * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
     4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     5  *
     6  * This code is free software; you can redistribute it and/or modify it
     7  * under the terms of the GNU General Public License version 2 only, as
     8  * published by the Free Software Foundation.
     9  *
    10  * This code is distributed in the hope that it will be useful, but WITHOUT
    11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    13  * version 2 for more details (a copy is included in the LICENSE file that
    14  * accompanied this code).
    15  *
    16  * You should have received a copy of the GNU General Public License version
    17  * 2 along with this work; if not, write to the Free Software Foundation,
    18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    19  *
    20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    21  * or visit www.oracle.com if you need additional information or have any
    22  * questions.
    23  *
    24  */
    26 #include "precompiled.hpp"
    27 #include "asm/macroAssembler.hpp"
    28 #include "interpreter/interpreter.hpp"
    29 #include "interpreter/interpreterRuntime.hpp"
    30 #include "interpreter/templateTable.hpp"
    31 #include "memory/universe.inline.hpp"
    32 #include "oops/methodData.hpp"
    33 #include "oops/objArrayKlass.hpp"
    34 #include "oops/oop.inline.hpp"
    35 #include "prims/methodHandles.hpp"
    36 #include "runtime/sharedRuntime.hpp"
    37 #include "runtime/stubRoutines.hpp"
    38 #include "runtime/synchronizer.hpp"
    41 #ifndef CC_INTERP
    43 #define __ _masm->
    45 // Platform-dependent initialization
    47 void TemplateTable::pd_initialize() {
    48   // No mips specific initialization
    49 }
    51 // Address computation: local variables
    53 static inline Address iaddress(int n) {
    54   return Address(LVP, Interpreter::local_offset_in_bytes(n));
    55 }
    57 static inline Address laddress(int n) {
    58   return iaddress(n + 1);
    59 }
    61 static inline Address faddress(int n) {
    62   return iaddress(n);
    63 }
    65 static inline Address daddress(int n) {
    66   return laddress(n);
    67 }
    69 static inline Address aaddress(int n) {
    70   return iaddress(n);
    71 }
    72 static inline Address haddress(int n)            { return iaddress(n + 0); }
    75 static inline Address at_sp()             {  return Address(SP,   0); }
    76 static inline Address at_sp_p1()          { return Address(SP,  1 * wordSize); }
    77 static inline Address at_sp_p2()          { return Address(SP,  2 * wordSize); }
    79 // At top of Java expression stack which may be different than esp().  It
    80 // isn't for category 1 objects.
    81 static inline Address at_tos   () {
    82   Address tos = Address(SP,  Interpreter::expr_offset_in_bytes(0));
    83   return tos;
    84 }
    86 static inline Address at_tos_p1() {
    87   return Address(SP,  Interpreter::expr_offset_in_bytes(1));
    88 }
    90 static inline Address at_tos_p2() {
    91   return Address(SP,  Interpreter::expr_offset_in_bytes(2));
    92 }
    94 static inline Address at_tos_p3() {
    95   return Address(SP,  Interpreter::expr_offset_in_bytes(3));
    96 }
    98 // we use S0 as bcp, be sure you have bcp in S0 before you call any of the Template generator
    99 Address TemplateTable::at_bcp(int offset) {
   100   assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
   101   return Address(BCP, offset);
   102 }
   104 // Miscelaneous helper routines
   105 // Store an oop (or NULL) at the address described by obj.
   106 // If val == noreg this means store a NULL
   108 static void do_oop_store(InterpreterMacroAssembler* _masm,
   109                          Address obj,
   110                          Register val,
   111                          BarrierSet::Name barrier,
   112                          bool precise) {
   113   assert(val == noreg || val == V0, "parameter is just for looks");
   114   switch (barrier) {
   115 #if INCLUDE_ALL_GCS
   116 //    case BarrierSet::G1SATBCT:
   117 //    case BarrierSet::G1SATBCTLogging:
   118 //      {
   119 //        // flatten object address if needed
   120 //        if (obj.index() == noreg && obj.disp() == 0) {
   121 //          if (obj.base() != rdx) {
   122 //            __ movq(rdx, obj.base());
   123 //          }
   124 //        } else {
   125 //          __ leaq(rdx, obj);
   126 //        }
   127 //        __ g1_write_barrier_pre(rdx /* obj */,
   128 //                                rbx /* pre_val */,
   129 //                                r15_thread /* thread */,
   130 //                                r8  /* tmp */,
   131 //                                val != noreg /* tosca_live */,
   132 //                                false /* expand_call */);
   133 //        if (val == noreg) {
   134 //          __ store_heap_oop_null(Address(rdx, 0));
   135 //        } else {
   136 //          // G1 barrier needs uncompressed oop for region cross check.
   137 //          Register new_val = val;
   138 //          if (UseCompressedOops) {
   139 //            new_val = rbx;
   140 //            __ movptr(new_val, val);
   141 //          }
   142 //          __ store_heap_oop(Address(rdx, 0), val);
   143 //          __ g1_write_barrier_post(rdx /* store_adr */,
   144 //                                   new_val /* new_val */,
   145 //                                   r15_thread /* thread */,
   146 //                                   r8 /* tmp */,
   147 //                                   rbx /* tmp2 */);
   148 //        }
   149 //      }
   150       break;
   151 #endif // INCLUDE_ALL_GCS
   152     case BarrierSet::CardTableModRef:
   153     case BarrierSet::CardTableExtension:
   154       {
   155         if (val == noreg) {
   156           __ store_heap_oop_null(obj);
   157         } else {
   158           __ store_heap_oop(obj, val);
   159           // flatten object address if needed
   160           if (!precise || (obj.index() == noreg && obj.disp() == 0)) {
   161             __ store_check(obj.base());
   162           } else {
   163             //__ leaq(rdx, obj);
   164             //__ store_check(rdx);
   165           }
   166         }
   167       }
   168       break;
   169     case BarrierSet::ModRef:
   170     case BarrierSet::Other:
   171       if (val == noreg) {
   172         __ store_heap_oop_null(obj);
   173       } else {
   174         __ store_heap_oop(obj, val);
   175       }
   176       break;
   177     default      :
   178       ShouldNotReachHere();
   180   }
   181 }
   183 // bytecode folding
   184 void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
   185                                    Register tmp_reg, bool load_bc_into_bc_reg/*=true*/,
   186                                    int byte_no) {
   187   if (!RewriteBytecodes)  return;
   188   Label L_patch_done;
   190   switch (bc) {
   191   case Bytecodes::_fast_aputfield:
   192   case Bytecodes::_fast_bputfield:
   193   case Bytecodes::_fast_cputfield:
   194   case Bytecodes::_fast_dputfield:
   195   case Bytecodes::_fast_fputfield:
   196   case Bytecodes::_fast_iputfield:
   197   case Bytecodes::_fast_lputfield:
   198   case Bytecodes::_fast_sputfield:
   199     {
   200       // We skip bytecode quickening for putfield instructions when
   201       // the put_code written to the constant pool cache is zero.
   202       // This is required so that every execution of this instruction
   203       // calls out to InterpreterRuntime::resolve_get_put to do
   204       // additional, required work.
   205       assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
   206       assert(load_bc_into_bc_reg, "we use bc_reg as temp");
   207       __ get_cache_and_index_and_bytecode_at_bcp(tmp_reg, bc_reg, tmp_reg, byte_no, 1);
   208       __ daddi(bc_reg, R0, bc);
   209       __ beq(tmp_reg, R0, L_patch_done);
   210       __ delayed()->nop();
   211     }
   212     break;
   213   default:
   214     assert(byte_no == -1, "sanity");
   215     // the pair bytecodes have already done the load.
   216     if (load_bc_into_bc_reg) {
   217       __ move(bc_reg, bc);
   218     }
   219   }
   221   if (JvmtiExport::can_post_breakpoint()) {
   222     Label L_fast_patch;
   223     // if a breakpoint is present we can't rewrite the stream directly
   224     __ lbu(tmp_reg, at_bcp(0));
   225     __ move(AT, Bytecodes::_breakpoint);
   226     __ bne(tmp_reg, AT, L_fast_patch);
   227     __ delayed()->nop();
   229     __ get_method(tmp_reg);
   230     // Let breakpoint table handling rewrite to quicker bytecode
   231     __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
   232     InterpreterRuntime::set_original_bytecode_at), tmp_reg, BCP, bc_reg);
   234     __ b(L_patch_done);
   235     __ delayed()->nop();
   236     __ bind(L_fast_patch);
   237   }
   239 #ifdef ASSERT
   240   Label L_okay;
   241   __ lbu(tmp_reg, at_bcp(0));
   242   __ move(AT, (int)Bytecodes::java_code(bc));
   243   __ beq(tmp_reg, AT, L_okay);
   244   __ delayed()->nop();
   245   __ beq(tmp_reg, bc_reg, L_patch_done);
   246   __ delayed()->nop();
   247   __ stop("patching the wrong bytecode");
   248   __ bind(L_okay);
   249 #endif
   251   // patch bytecode
   252   __ sb(bc_reg, at_bcp(0));
   253   __ bind(L_patch_done);
   254 }
   257 // Individual instructions
   259 void TemplateTable::nop() {
   260   transition(vtos, vtos);
   261   // nothing to do
   262 }
   264 void TemplateTable::shouldnotreachhere() {
   265   transition(vtos, vtos);
   266   __ stop("shouldnotreachhere bytecode");
   267 }
   269 void TemplateTable::aconst_null() {
   270   transition(vtos, atos);
   271   __ move(FSR, R0);
   272 }
   274 void TemplateTable::iconst(int value) {
   275   transition(vtos, itos);
   276   if (value == 0) {
   277     __ move(FSR, R0);
   278   } else {
   279     __ move(FSR, value);
   280   }
   281 }
   283 void TemplateTable::lconst(int value) {
   284   transition(vtos, ltos);
   285   if (value == 0) {
   286     __ move(FSR, R0);
   287   } else {
   288     __ move(FSR, value);
   289   }
   290 }
   292 void TemplateTable::fconst(int value) {
   293   transition(vtos, ftos);
   294   switch( value ) {
   295     case 0:  __ mtc1(R0, FSF);    return;
   296     case 1:  __ addiu(AT, R0, 1); break;
   297     case 2:  __ addiu(AT, R0, 2); break;
   298     default: ShouldNotReachHere();
   299   }
   300   __ mtc1(AT, FSF);
   301   __ cvt_s_w(FSF, FSF);
   302 }
   304 void TemplateTable::dconst(int value) {
   305   transition(vtos, dtos);
   306   switch( value ) {
   307     case 0:  __ dmtc1(R0, FSF);
   308              return;
   309     case 1:  __ daddiu(AT, R0, 1);
   310              __ dmtc1(AT, FSF);
   311              __ cvt_d_w(FSF, FSF);
   312              break;
   313     default: ShouldNotReachHere();
   314   }
   315 }
   317 void TemplateTable::bipush() {
   318   transition(vtos, itos);
   319   __ lb(FSR, at_bcp(1));
   320 }
   322 void TemplateTable::sipush() {
   323   transition(vtos, itos);
   324   __ lb(FSR, BCP, 1);
   325   __ lbu(AT, BCP, 2);
   326   __ dsll(FSR, FSR, 8);
   327   __ orr(FSR, FSR, AT);
   328 }
   330 // T1 : tags
   331 // T2 : index
   332 // T3 : cpool
   333 // T8 : tag
   334 void TemplateTable::ldc(bool wide) {
   335   transition(vtos, vtos);
   336   Label call_ldc, notFloat, notClass, Done;
   337   // get index in cpool
   338   if (wide) {
   339     __ get_unsigned_2_byte_index_at_bcp(T2, 1);
   340   } else {
   341     __ lbu(T2, at_bcp(1));
   342   }
   344   __ get_cpool_and_tags(T3, T1);
   346   const int base_offset = ConstantPool::header_size() * wordSize;
   347   const int tags_offset = Array<u1>::base_offset_in_bytes();
   349   // get type
   350   if (UseLoongsonISA && Assembler::is_simm(sizeof(tags_offset), 8)) {
   351     __ gslbx(T1, T1, T2, tags_offset);
   352   } else {
   353     __ dadd(AT, T1, T2);
   354     __ lb(T1, AT, tags_offset);
   355   }
   356   //now T1 is the tag
   358   // unresolved class - get the resolved class
   359   __ daddiu(AT, T1, - JVM_CONSTANT_UnresolvedClass);
   360   __ beq(AT, R0, call_ldc);
   361   __ delayed()->nop();
   363   // unresolved class in error (resolution failed) - call into runtime
   364   // so that the same error from first resolution attempt is thrown.
   365   __ daddiu(AT, T1, -JVM_CONSTANT_UnresolvedClassInError);
   366   __ beq(AT, R0, call_ldc);
   367   __ delayed()->nop();
   369   // resolved class - need to call vm to get java mirror of the class
   370   __ daddiu(AT, T1, - JVM_CONSTANT_Class);
   371   __ bne(AT, R0, notClass);
   372   __ delayed()->dsll(T2, T2, Address::times_8);
   374   __ bind(call_ldc);
   375   __ move(A1, wide);
   376   call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), A1);
   377   //__ push(atos);
   378   __ sd(FSR, SP, - Interpreter::stackElementSize);
   379   __ b(Done);
   380   __ delayed()->daddiu(SP, SP, - Interpreter::stackElementSize);
   381   __ nop(); // added for performance issue
   383   __ bind(notClass);
   384   __ daddiu(AT, T1, -JVM_CONSTANT_Float);
   385   __ bne(AT, R0, notFloat);
   386   __ delayed()->nop();
   387   // ftos
   388   if (UseLoongsonISA && Assembler::is_simm(sizeof(base_offset), 8)) {
   389     __ gslwxc1(FSF, T3, T2, base_offset);
   390   } else {
   391     __ dadd(AT, T3, T2);
   392     __ lwc1(FSF, AT, base_offset);
   393   }
   394   //__ push_f();
   395   __ swc1(FSF, SP, - Interpreter::stackElementSize);
   396   __ b(Done);
   397   __ delayed()->daddiu(SP, SP, - Interpreter::stackElementSize);
   399   __ bind(notFloat);
   400 #ifdef ASSERT
   401   {
   402     Label L;
   403     __ daddiu(AT, T1, -JVM_CONSTANT_Integer);
   404     __ beq(AT, R0, L);
   405     __ delayed()->nop();
   406     __ stop("unexpected tag type in ldc");
   407     __ bind(L);
   408   }
   409 #endif
   410   // itos JVM_CONSTANT_Integer only
   411   if (UseLoongsonISA && Assembler::is_simm(sizeof(base_offset), 8)) {
   412     __ gslwx(FSR, T3, T2, base_offset);
   413   } else {
   414     __ dadd(T0, T3, T2);
   415     __ lw(FSR, T0, base_offset);
   416   }
   417   __ push(itos);
   418   __ bind(Done);
   419 }
   421 // Fast path for caching oop constants.
   422 void TemplateTable::fast_aldc(bool wide) {
   423   transition(vtos, atos);
   425   Register result = FSR;
   426   Register tmp = SSR;
   427   int index_size = wide ? sizeof(u2) : sizeof(u1);
   429   Label resolved;
   431   // We are resolved if the resolved reference cache entry contains a
   432   // non-null object (String, MethodType, etc.)
   433   assert_different_registers(result, tmp);
   434   __ get_cache_index_at_bcp(tmp, 1, index_size);
   435   __ load_resolved_reference_at_index(result, tmp);
   436   __ bne(result, R0, resolved);
   437   __ delayed()->nop();
   439   address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
   440   // first time invocation - must resolve first
   441   int i = (int)bytecode();
   442   __ move(tmp, i);
   443   __ call_VM(result, entry, tmp);
   445   __ bind(resolved);
   447   if (VerifyOops) {
   448     __ verify_oop(result);
   449   }
   450 }
   453 // used register: T2, T3, T1
   454 // T2 : index
   455 // T3 : cpool
   456 // T1 : tag
   457 void TemplateTable::ldc2_w() {
   458   transition(vtos, vtos);
   459   Label Long, Done;
   461   // get index in cpool
   462   __ get_unsigned_2_byte_index_at_bcp(T2, 1);
   464   __ get_cpool_and_tags(T3, T1);
   466   const int base_offset = ConstantPool::header_size() * wordSize;
   467   const int tags_offset = Array<u1>::base_offset_in_bytes();
   469   // get type in T1
   470   if (UseLoongsonISA && Assembler::is_simm(tags_offset, 8)) {
   471     __ gslbx(T1, T1, T2, tags_offset);
   472   } else {
   473     __ dadd(AT, T1, T2);
   474     __ lb(T1, AT, tags_offset);
   475   }
   477   __ daddiu(AT, T1, - JVM_CONSTANT_Double);
   478   __ bne(AT, R0, Long);
   479   __ delayed()->dsll(T2, T2, Address::times_8);
   481   // dtos
   482   if (UseLoongsonISA && Assembler::is_simm(base_offset, 8)) {
   483     __ gsldxc1(FSF, T3, T2, base_offset);
   484   } else {
   485     __ daddu(AT, T3, T2);
   486     __ ldc1(FSF, AT, base_offset);
   487   }
   488   __ sdc1(FSF, SP, - 2 * wordSize);
   489   __ b(Done);
   490   __ delayed()->daddi(SP, SP, - 2 * wordSize);
   492   // ltos
   493   __ bind(Long);
   494   if (UseLoongsonISA && Assembler::is_simm(base_offset, 8)) {
   495     __ gsldx(FSR, T3, T2, base_offset);
   496   } else {
   497     __ dadd(AT, T3, T2);
   498     __ ld(FSR, AT, base_offset);
   499   }
   500   __ push(ltos);
   502   __ bind(Done);
   503 }
   505 // we compute the actual local variable address here
   506 // the x86 dont do so for it has scaled index memory access model, we dont have, so do here
   507 void TemplateTable::locals_index(Register reg, int offset) {
   508   __ lbu(reg, at_bcp(offset));
   509   __ dsll(reg, reg, Address::times_8);
   510   __ dsub(reg, LVP, reg);
   511 }
   513 // this method will do bytecode folding of the two form:
   514 // iload iload      iload caload
   515 // used register : T2, T3
   516 // T2 : bytecode
   517 // T3 : folded code
   518 void TemplateTable::iload() {
   519   transition(vtos, itos);
   520   if (RewriteFrequentPairs) {
   521     Label rewrite, done;
   522     // get the next bytecode in T2
   523     __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
   524     // if _iload, wait to rewrite to iload2.  We only want to rewrite the
   525     // last two iloads in a pair.  Comparing against fast_iload means that
   526     // the next bytecode is neither an iload or a caload, and therefore
   527     // an iload pair.
   528     __ move(AT, Bytecodes::_iload);
   529     __ beq(AT, T2, done);
   530     __ delayed()->nop();
   532     __ move(T3, Bytecodes::_fast_iload2);
   533     __ move(AT, Bytecodes::_fast_iload);
   534     __ beq(AT, T2, rewrite);
   535     __ delayed()->nop();
   537     // if _caload, rewrite to fast_icaload
   538     __ move(T3, Bytecodes::_fast_icaload);
   539     __ move(AT, Bytecodes::_caload);
   540     __ beq(AT, T2, rewrite);
   541     __ delayed()->nop();
   543     // rewrite so iload doesn't check again.
   544     __ move(T3, Bytecodes::_fast_iload);
   546     // rewrite
   547     // T3 : fast bytecode
   548     __ bind(rewrite);
   549     patch_bytecode(Bytecodes::_iload, T3, T2, false);
   550     __ bind(done);
   551   }
   553   // Get the local value into tos
   554   locals_index(T2);
   555   __ lw(FSR, T2, 0);
   556 }
   558 // used register T2
   559 // T2 : index
   560 void TemplateTable::fast_iload2() {
   561   transition(vtos, itos);
   562   locals_index(T2);
   563   __ lw(FSR, T2, 0);
   564   __ push(itos);
   565   locals_index(T2, 3);
   566   __ lw(FSR, T2, 0);
   567 }
   569 // used register T2
   570 // T2 : index
   571 void TemplateTable::fast_iload() {
   572   transition(vtos, itos);
   573   locals_index(T2);
   574   __ lw(FSR, T2, 0);
   575 }
   577 // used register T2
   578 // T2 : index
   579 void TemplateTable::lload() {
   580   transition(vtos, ltos);
   581   locals_index(T2);
   582   __ ld(FSR, T2, -wordSize);
   583 }
   585 // used register T2
   586 // T2 : index
   587 void TemplateTable::fload() {
   588   transition(vtos, ftos);
   589   locals_index(T2);
   590   __ lwc1(FSF, T2, 0);
   591 }
   593 // used register T2
   594 // T2 : index
   595 void TemplateTable::dload() {
   596   transition(vtos, dtos);
   597   locals_index(T2);
   598   __ ldc1(FSF, T2, -wordSize);
   599 }
   601 // used register T2
   602 // T2 : index
   603 void TemplateTable::aload() {
   604   transition(vtos, atos);
   605   locals_index(T2);
   606   __ ld(FSR, T2, 0);
   607 }
   609 void TemplateTable::locals_index_wide(Register reg) {
   610   __ get_unsigned_2_byte_index_at_bcp(reg, 2);
   611   __ dsll(reg, reg, Address::times_8);
   612   __ dsub(reg, LVP, reg);
   613 }
   615 // used register T2
   616 // T2 : index
   617 void TemplateTable::wide_iload() {
   618   transition(vtos, itos);
   619   locals_index_wide(T2);
   620   __ ld(FSR, T2, 0);
   621 }
   623 // used register T2
   624 // T2 : index
   625 void TemplateTable::wide_lload() {
   626   transition(vtos, ltos);
   627   locals_index_wide(T2);
   628   __ ld(FSR, T2, -wordSize);
   629 }
   631 // used register T2
   632 // T2 : index
   633 void TemplateTable::wide_fload() {
   634   transition(vtos, ftos);
   635   locals_index_wide(T2);
   636   __ lwc1(FSF, T2, 0);
   637 }
   639 // used register T2
   640 // T2 : index
   641 void TemplateTable::wide_dload() {
   642   transition(vtos, dtos);
   643   locals_index_wide(T2);
   644   __ ldc1(FSF, T2, -wordSize);
   645 }
   647 // used register T2
   648 // T2 : index
   649 void TemplateTable::wide_aload() {
   650   transition(vtos, atos);
   651   locals_index_wide(T2);
   652   __ ld(FSR, T2, 0);
   653 }
   655 // we use A2 as the regiser for index, BE CAREFUL!
   656 // we dont use our tge 29 now, for later optimization
   657 void TemplateTable::index_check(Register array, Register index) {
   658   // Pop ptr into array
   659   __ pop_ptr(array);
   660   index_check_without_pop(array, index);
   661 }
   663 void TemplateTable::index_check_without_pop(Register array, Register index) {
   664   // destroys ebx
   665   // check array
   666   __ null_check(array, arrayOopDesc::length_offset_in_bytes());
   668 #ifdef _LP64
   669   // sign extend since tos (index) might contain garbage in upper bits
   670   __ sll(index, index, 0);
   671 #endif // _LP64
   673   // check index
   674   Label ok;
   675   __ lw(AT, array, arrayOopDesc::length_offset_in_bytes());
   676 #ifndef OPT_RANGECHECK
   677   __ sltu(AT, index, AT);
   678   __ bne(AT, R0, ok);
   679   __ delayed()->nop();
   681   //throw_ArrayIndexOutOfBoundsException assume abberrant index in A2
   682   if (A2 != index) __ move(A2, index);
   683   __ jmp(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
   684   __ delayed()->nop();
   685   __ bind(ok);
   686 #else
   687   __ lw(AT, array, arrayOopDesc::length_offset_in_bytes());
   688   __ move(A2, index);
   689   __ tgeu(A2, AT, 29);
   690 #endif
   691 }
   693 void TemplateTable::iaload() {
   694   transition(itos, itos);
   695   if(UseBoundCheckInstruction) {
   696     __ pop(SSR); //SSR:array    FSR: index
   697     __ dsll(FSR, FSR, 2);
   698     __ dadd(FSR, SSR, FSR);
   699     __ addi(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT));
   701     __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
   702     __ dsll(AT, AT, 2);
   703     __ dadd(AT, SSR, AT);
   704     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT));
   706     __ gslwle(FSR, FSR, AT);
   707   } else {
   708     index_check(SSR, FSR);
   709     __ dsll(FSR, FSR, 2);
   710     if (UseLoongsonISA && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_INT), 8)) {
   711       __ gslwx(FSR, FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_INT));
   712     } else {
   713       __ dadd(FSR, SSR, FSR);
   714       __ lw(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT));
   715     }
   716   }
   717 }
   719 void TemplateTable::laload() {
   720   transition(itos, ltos);
   721   if(UseBoundCheckInstruction) {
   722     __ pop(SSR); //SSR:array    FSR: index
   723     __ dsll(FSR, FSR, Address::times_8);
   724     __ dadd(FSR, SSR, FSR);
   725     __ addi(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);
   727     __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
   728     __ dsll(AT, AT, Address::times_8);
   729     __ dadd(AT, SSR, AT);
   730     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);
   732     __ gsldle(FSR, FSR, AT);
   733   } else {
   734     index_check(SSR, FSR);
   735     __ dsll(AT, FSR, Address::times_8);
   736     if (UseLoongsonISA && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_LONG), 8)) {
   737       __ gsldx(FSR, SSR, AT, arrayOopDesc::base_offset_in_bytes(T_LONG));
   738     } else {
   739       __ dadd(AT, SSR, AT);
   740       __ ld(FSR, AT, arrayOopDesc::base_offset_in_bytes(T_LONG));
   741     }
   742   }
   743 }
   745 void TemplateTable::faload() {
   746   transition(itos, ftos);
   747   if(UseBoundCheckInstruction) {
   748     __ pop(SSR); //SSR:array    FSR: index
   749     __ shl(FSR, 2);
   750     __ dadd(FSR, SSR, FSR);
   751     __ addi(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
   753     __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
   754     __ shl(AT, 2);
   755     __ dadd(AT, SSR, AT);
   756     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
   758     __ gslwlec1(FSF, FSR, AT);
   759   } else {
   760     index_check(SSR, FSR);
   761     __ shl(FSR, 2);
   762     if (UseLoongsonISA && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_FLOAT), 8)) {
   763       __ gslwxc1(FSF, SSR, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
   764     } else {
   765       __ dadd(FSR, SSR, FSR);
   766       __ lwc1(FSF, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
   767     }
   768   }
   769 }
   771 void TemplateTable::daload() {
   772   transition(itos, dtos);
   773   if(UseBoundCheckInstruction) {
   774     __ pop(SSR); //SSR:array    FSR: index
   775     __ dsll(FSR, FSR, 3);
   776     __ dadd(FSR, SSR, FSR);
   777     __ addi(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
   779     __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
   780     __ dsll(AT, AT, 3);
   781     __ dadd(AT, SSR, AT);
   782     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
   784     __ gsldlec1(FSF, FSR, AT);
   785   } else {
   786     index_check(SSR, FSR);
   787     __ dsll(AT, FSR, 3);
   788     if (UseLoongsonISA && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_DOUBLE), 8)) {
   789       __ gsldxc1(FSF, SSR, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE));
   790     } else {
   791       __ dadd(AT, SSR, AT);
   792       __ ldc1(FSF, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE));
   793     }
   794   }
   795 }
   797 void TemplateTable::aaload() {
   798   transition(itos, atos);
   799   index_check(SSR, FSR);
   800   __ dsll(FSR, FSR, UseCompressedOops ? Address::times_4 : Address::times_8);
   801   __ dadd(FSR, SSR, FSR);
   802   //add for compressedoops
   803   __ load_heap_oop(FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
   804 }
   806 void TemplateTable::baload() {
   807   transition(itos, itos);
   808   if(UseBoundCheckInstruction) {
   809     __ pop(SSR); //SSR:array   FSR:index
   810     __ dadd(FSR, SSR, FSR);
   811     __ addi(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //base
   813     __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());
   814     __ dadd(AT, SSR, AT);
   815     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //bound
   817     __ gslble(FSR, FSR, AT);
   818   } else {
   819     index_check(SSR, FSR);
   820     if (UseLoongsonISA && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_BYTE), 8)) {
   821       __ gslbx(FSR, SSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));
   822     } else {
   823       __ dadd(FSR, SSR, FSR);
   824       __ lb(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));
   825     }
   826   }
   827 }
   829 void TemplateTable::caload() {
   830   transition(itos, itos);
   831   index_check(SSR, FSR);
   832   __ dsll(FSR, FSR, Address::times_2);
   833   __ dadd(FSR, SSR, FSR);
   834   __ lhu(FSR, FSR,  arrayOopDesc::base_offset_in_bytes(T_CHAR));
   835 }
   837 // iload followed by caload frequent pair
   838 // used register : T2
   839 // T2 : index
   840 void TemplateTable::fast_icaload() {
   841   transition(vtos, itos);
   842   // load index out of locals
   843   locals_index(T2);
   844   __ lw(FSR, T2, 0);
   845   index_check(SSR, FSR);
   846   __ dsll(FSR, FSR, 1);
   847   __ dadd(FSR, SSR, FSR);
   848   __ lhu(FSR, FSR,  arrayOopDesc::base_offset_in_bytes(T_CHAR));
   849 }
   851 void TemplateTable::saload() {
   852   transition(itos, itos);
   853   if(UseBoundCheckInstruction) {
   854     __ pop(SSR); //SSR:array    FSR: index
   855     __ dsll(FSR, FSR, Address::times_2);
   856     __ dadd(FSR, SSR, FSR);
   857     __ addi(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT));
   859     __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
   860     __ dsll(AT, AT, Address::times_2);
   861     __ dadd(AT, SSR, AT);
   862     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_SHORT));
   864     __ gslhle(FSR, FSR, AT);
   865   } else {
   866     index_check(SSR, FSR);
   867     __ dsll(FSR, FSR, Address::times_2);
   868     if (UseLoongsonISA && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_SHORT), 8)) {
   869       __ gslhx(FSR, SSR, FSR,  arrayOopDesc::base_offset_in_bytes(T_SHORT));
   870     } else {
   871       __ dadd(FSR, SSR, FSR);
   872       __ lh(FSR, FSR,  arrayOopDesc::base_offset_in_bytes(T_SHORT));
   873     }
   874   }
   875 }
   877 void TemplateTable::iload(int n) {
   878   transition(vtos, itos);
   879   __ lw(FSR, iaddress(n));
   880 }
   882 void TemplateTable::lload(int n) {
   883   transition(vtos, ltos);
   884   __ ld(FSR, laddress(n));
   885 }
   887 void TemplateTable::fload(int n) {
   888   transition(vtos, ftos);
   889   __ lwc1(FSF, faddress(n));
   890 }
   892 void TemplateTable::dload(int n) {
   893   transition(vtos, dtos);
   894   __ ldc1(FSF, laddress(n));
   895 }
   897 void TemplateTable::aload(int n) {
   898   transition(vtos, atos);
   899   __ ld(FSR, aaddress(n));
   900 }
   902 // used register : T2, T3
   903 // T2 : bytecode
   904 // T3 : folded code
   905 void TemplateTable::aload_0() {
   906   transition(vtos, atos);
   907   // According to bytecode histograms, the pairs:
   908   //
   909   // _aload_0, _fast_igetfield
   910   // _aload_0, _fast_agetfield
   911   // _aload_0, _fast_fgetfield
   912   //
   913   // occur frequently. If RewriteFrequentPairs is set, the (slow)
   914   // _aload_0 bytecode checks if the next bytecode is either
   915   // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
   916   // rewrites the current bytecode into a pair bytecode; otherwise it
   917   // rewrites the current bytecode into _fast_aload_0 that doesn't do
   918   // the pair check anymore.
   919   //
   920   // Note: If the next bytecode is _getfield, the rewrite must be
   921   //       delayed, otherwise we may miss an opportunity for a pair.
   922   //
   923   // Also rewrite frequent pairs
   924   //   aload_0, aload_1
   925   //   aload_0, iload_1
   926   // These bytecodes with a small amount of code are most profitable
   927   // to rewrite
   928   if (RewriteFrequentPairs) {
   929     Label rewrite, done;
   930     // get the next bytecode in T2
   931     __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
   933     // do actual aload_0
   934     aload(0);
   936     // if _getfield then wait with rewrite
   937     __ move(AT, Bytecodes::_getfield);
   938     __ beq(AT, T2, done);
   939     __ delayed()->nop();
   941     // if _igetfield then reqrite to _fast_iaccess_0
   942     assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) ==
   943         Bytecodes::_aload_0,
   944         "fix bytecode definition");
   945     __ move(T3, Bytecodes::_fast_iaccess_0);
   946     __ move(AT, Bytecodes::_fast_igetfield);
   947     __ beq(AT, T2, rewrite);
   948     __ delayed()->nop();
   950     // if _agetfield then reqrite to _fast_aaccess_0
   951     assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) ==
   952         Bytecodes::_aload_0,
   953         "fix bytecode definition");
   954     __ move(T3, Bytecodes::_fast_aaccess_0);
   955     __ move(AT, Bytecodes::_fast_agetfield);
   956     __ beq(AT, T2, rewrite);
   957     __ delayed()->nop();
   959     // if _fgetfield then reqrite to _fast_faccess_0
   960     assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) ==
   961         Bytecodes::_aload_0,
   962         "fix bytecode definition");
   963     __ move(T3, Bytecodes::_fast_faccess_0);
   964     __ move(AT, Bytecodes::_fast_fgetfield);
   965     __ beq(AT, T2, rewrite);
   966     __ delayed()->nop();
   968     // else rewrite to _fast_aload0
   969     assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) ==
   970         Bytecodes::_aload_0,
   971         "fix bytecode definition");
   972     __ move(T3, Bytecodes::_fast_aload_0);
   974     // rewrite
   975     __ bind(rewrite);
   976     patch_bytecode(Bytecodes::_aload_0, T3, T2, false);
   978     __ bind(done);
   979   } else {
   980     aload(0);
   981   }
   982 }
   984 void TemplateTable::istore() {
   985   transition(itos, vtos);
   986   locals_index(T2);
   987   __ sw(FSR, T2, 0);
   988 }
   990 void TemplateTable::lstore() {
   991   transition(ltos, vtos);
   992   locals_index(T2);
   993   __ sd(FSR, T2, -wordSize);
   994 }
   996 void TemplateTable::fstore() {
   997   transition(ftos, vtos);
   998   locals_index(T2);
   999   __ swc1(FSF, T2, 0);
  1002 void TemplateTable::dstore() {
  1003   transition(dtos, vtos);
  1004   locals_index(T2);
  1005   __ sdc1(FSF, T2, -wordSize);
  1008 void TemplateTable::astore() {
  1009   transition(vtos, vtos);
  1010   __ pop_ptr(FSR);
  1011   locals_index(T2);
  1012   __ sd(FSR, T2, 0);
  1015 void TemplateTable::wide_istore() {
  1016   transition(vtos, vtos);
  1017   __ pop_i(FSR);
  1018   locals_index_wide(T2);
  1019   __ sd(FSR, T2, 0);
  1022 void TemplateTable::wide_lstore() {
  1023   transition(vtos, vtos);
  1024   __ pop_l(FSR);
  1025   locals_index_wide(T2);
  1026   __ sd(FSR, T2, -wordSize);
  1029 void TemplateTable::wide_fstore() {
  1030   wide_istore();
  1033 void TemplateTable::wide_dstore() {
  1034   wide_lstore();
  1037 void TemplateTable::wide_astore() {
  1038   transition(vtos, vtos);
  1039   __ pop_ptr(FSR);
  1040   locals_index_wide(T2);
  1041   __ sd(FSR, T2, 0);
  1044 // used register : T2
  1045 void TemplateTable::iastore() {
  1046   transition(itos, vtos);
  1047   __ pop_i(SSR);   // T2: array  SSR: index
  1048   if(UseBoundCheckInstruction) {
  1049     __ pop_ptr(T2);
  1050     __ dsll(SSR, SSR, Address::times_4);
  1051     __ dadd(SSR, T2, SSR);
  1052     __ addi(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_INT));  // base
  1054     __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
  1055     __ dsll(AT, AT, Address::times_4);
  1056     __ dadd(AT, T2, AT);
  1057     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT));  //bound
  1059     __ gsswle(FSR, SSR, AT);
  1060   } else {
  1061     index_check(T2, SSR);  // prefer index in ebx
  1062     __ dsll(SSR, SSR, Address::times_4);
  1063     if (UseLoongsonISA && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_INT), 8)) {
  1064       __ gsswx(FSR, T2, SSR, arrayOopDesc::base_offset_in_bytes(T_INT));
  1065     } else {
  1066       __ dadd(T2, T2, SSR);
  1067       __ sw(FSR, T2, arrayOopDesc::base_offset_in_bytes(T_INT));
  1074 // used register T2, T3
  1075 void TemplateTable::lastore() {
  1076   transition(ltos, vtos);
  1077   __ pop_i (T2);
  1078   if(UseBoundCheckInstruction) {
  1079     __ pop_ptr(T3);
  1080     __ dsll(T2, T2, Address::times_8);
  1081     __ dadd(T2, T3, T2);
  1082     __ addi(T2, T2, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);  // base
  1084     __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes());
  1085     __ dsll(AT, AT, Address::times_8);
  1086     __ dadd(AT, T3, AT);
  1087     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);  //bound
  1089     __ gssdle(FSR, T2, AT);
  1090   } else {
  1091     index_check(T3, T2);
  1092     __ dsll(T2, T2, Address::times_8);
  1093     if (UseLoongsonISA && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_LONG), 8)) {
  1094       __ gssdx(FSR, T3, T2, arrayOopDesc::base_offset_in_bytes(T_LONG));
  1095     } else {
  1096       __ dadd(T3, T3, T2);
  1097       __ sd(FSR, T3, arrayOopDesc::base_offset_in_bytes(T_LONG));
  1102 // used register T2
  1103 void TemplateTable::fastore() {
  1104   transition(ftos, vtos);
  1105   __ pop_i(SSR);
  1106   if(UseBoundCheckInstruction) {
  1107     __ pop_ptr(T2);
  1108     __ dsll(SSR, SSR, Address::times_4);
  1109     __ dadd(SSR, T2, SSR);
  1110     __ addi(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));  // base
  1112     __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
  1113     __ dsll(AT, AT, Address::times_4);
  1114     __ dadd(AT, T2, AT);
  1115     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT));  //bound
  1117     __ gsswlec1(FSF, SSR, AT);
  1118   } else {
  1119     index_check(T2, SSR);
  1120     __ dsll(SSR, SSR, Address::times_4);
  1121     if (UseLoongsonISA && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_FLOAT), 8)) {
  1122       __ gsswxc1(FSF, T2, SSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
  1123     } else {
  1124       __ dadd(T2, T2, SSR);
  1125       __ swc1(FSF, T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
  1130 // used register T2, T3
  1131 void TemplateTable::dastore() {
  1132   transition(dtos, vtos);
  1133   __ pop_i (T2);
  1134   if(UseBoundCheckInstruction) {
  1135     __ pop_ptr(T3);
  1136     __ dsll(T2, T2, Address::times_8);
  1137     __ dadd(T2, T3, T2);
  1138     __ addi(T2, T2, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);  // base
  1140     __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes());
  1141     __ dsll(AT, AT, Address::times_8);
  1142     __ dadd(AT, T3, AT);
  1143     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);  //bound
  1145     __ gssdlec1(FSF, T2, AT);
  1146   } else {
  1147     index_check(T3, T2);
  1148     __ dsll(T2, T2, Address::times_8);
  1149     if (UseLoongsonISA && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_DOUBLE), 8)) {
  1150       __ gssdxc1(FSF, T3, T2, arrayOopDesc::base_offset_in_bytes(T_DOUBLE));
  1151     } else {
  1152       __ daddu(T3, T3, T2);
  1153       __ sdc1(FSF, T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE));
  1158 // used register : T2, T3, T8
  1159 // T2 : array
  1160 // T3 : subklass
  1161 // T8 : supklass
  1162 void TemplateTable::aastore() {
  1163   Label is_null, ok_is_subtype, done;
  1164   transition(vtos, vtos);
  1165   // stack: ..., array, index, value
  1166   __ ld(FSR, at_tos());     // Value
  1167   __ lw(SSR, at_tos_p1());  // Index
  1168   __ ld(T2, at_tos_p2());  // Array
  1170   // index_check(T2, SSR);
  1171   index_check_without_pop(T2, SSR);
  1172   // do array store check - check for NULL value first
  1173   __ beq(FSR, R0, is_null);
  1174   __ delayed()->nop();
  1176   // Move subklass into T3
  1177   //add for compressedoops
  1178   __ load_klass(T3, FSR);
  1179   // Move superklass into T8
  1180   //add for compressedoops
  1181   __ load_klass(T8, T2);
  1182   __ ld(T8, Address(T8,  ObjArrayKlass::element_klass_offset()));
  1183   // Compress array+index*4+12 into a single register. T2
  1184   __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8);
  1185   __ dadd(T2, T2, AT);
  1186   __ daddi(T2, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
  1188   // Generate subtype check.
  1189   // Superklass in T8.  Subklass in T3.
  1190   __ gen_subtype_check(T8, T3, ok_is_subtype);        // <-- Jin
  1191   // Come here on failure
  1192   // object is at FSR
  1193   __ jmp(Interpreter::_throw_ArrayStoreException_entry);    // <-- Jin
  1194   __ delayed()->nop();
  1195   // Come here on success
  1196   __ bind(ok_is_subtype);
  1197   //replace with do_oop_store->store_heap_oop
  1198   __ store_heap_oop(Address(T2, 0), FSR);          // <-- Jin
  1199   __ store_check(T2);
  1200   __ b(done);
  1201   __ delayed()->nop();
  1203   // Have a NULL in FSR, EDX=T2, SSR=index.  Store NULL at ary[idx]
  1204   __ bind(is_null);
  1205   __ profile_null_seen(T9);
  1206   __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8);
  1207   __ dadd(T2, T2, AT);
  1208   __ store_heap_oop(Address(T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), FSR);  /* FSR is null here */
  1210   __ bind(done);
  1211   __ daddi(SP, SP, 3 * Interpreter::stackElementSize);
  1214 void TemplateTable::bastore() {
  1215   transition(itos, vtos);
  1216   __ pop_i(SSR);
  1217   if(UseBoundCheckInstruction) {
  1218     __ pop_ptr(T2);
  1219     __ dadd(SSR, T2, SSR);
  1220     __ addi(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));  // base
  1222     __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
  1223     __ dadd(AT, T2, AT);
  1224     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE));  //bound
  1226     __ gssble(FSR, SSR, AT);
  1227   } else {
  1228     index_check(T2, SSR);
  1229     if (UseLoongsonISA && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_BYTE), 8)) {
  1230       __ gssbx(FSR, T2, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));
  1231     } else {
  1232       __ dadd(SSR, T2, SSR);
  1233       __ sb(FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));
  1238 void TemplateTable::castore() {
  1239   transition(itos, vtos);
  1240   __ pop_i(SSR);
  1241   if(UseBoundCheckInstruction) {
  1242     __ pop_ptr(T2);
  1243     __ dsll(SSR, SSR, Address::times_2);
  1244     __ dadd(SSR, T2, SSR);
  1245     __ addi(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR));  // base
  1247     __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
  1248     __ dsll(AT, AT, Address::times_2);
  1249     __ dadd(AT, T2, AT);
  1250     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_CHAR));  //bound
  1252     __ gsshle(FSR, SSR, AT);
  1253   } else {
  1254     index_check(T2, SSR);
  1255     __ dsll(SSR, SSR, Address::times_2);
  1256     if (UseLoongsonISA && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_CHAR), 8)) {
  1257       __ gsshx(FSR, T2, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR));
  1258     } else {
  1259       __ dadd(SSR, T2, SSR);
  1260       __ sh(FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR));
  1265 void TemplateTable::sastore() {
  1266   castore();
  1269 void TemplateTable::istore(int n) {
  1270   transition(itos, vtos);
  1271   __ sw(FSR, iaddress(n));
  1274 void TemplateTable::lstore(int n) {
  1275   transition(ltos, vtos);
  1276   __ sd(FSR, laddress(n));
  1279 void TemplateTable::fstore(int n) {
  1280   transition(ftos, vtos);
  1281   __ swc1(FSF, faddress(n));
  1284 void TemplateTable::dstore(int n) {
  1285   transition(dtos, vtos);
  1286   __ sdc1(FSF, laddress(n));
  1289 void TemplateTable::astore(int n) {
  1290   transition(vtos, vtos);
  1291   __ pop_ptr(FSR);
  1292   __ sd(FSR, aaddress(n));
  1295 void TemplateTable::pop() {
  1296   transition(vtos, vtos);
  1297   __ daddi(SP, SP, Interpreter::stackElementSize);
  1300 void TemplateTable::pop2() {
  1301   transition(vtos, vtos);
  1302   __ daddi(SP, SP, 2 * Interpreter::stackElementSize);
  1305 void TemplateTable::dup() {
  1306   transition(vtos, vtos);
  1307   // stack: ..., a
  1308   __ load_ptr(0, FSR);
  1309   __ push_ptr(FSR);
  1310   // stack: ..., a, a
  1313 // blows FSR
  1314 void TemplateTable::dup_x1() {
  1315   transition(vtos, vtos);
  1316   // stack: ..., a, b
  1317   __ load_ptr(0, FSR);  // load b
  1318   __ load_ptr(1, A5);  // load a
  1319   __ store_ptr(1, FSR); // store b
  1320   __ store_ptr(0, A5); // store a
  1321   __ push_ptr(FSR);             // push b
  1322   // stack: ..., b, a, b
  1325 // blows FSR
  1326 void TemplateTable::dup_x2() {
  1327   transition(vtos, vtos);
  1328   // stack: ..., a, b, c
  1329   __ load_ptr(0, FSR);  // load c
  1330   __ load_ptr(2, A5);  // load a
  1331   __ store_ptr(2, FSR); // store c in a
  1332   __ push_ptr(FSR);             // push c
  1333   // stack: ..., c, b, c, c
  1334   __ load_ptr(2, FSR);  // load b
  1335   __ store_ptr(2, A5); // store a in b
  1336   // stack: ..., c, a, c, c
  1337   __ store_ptr(1, FSR); // store b in c
  1338   // stack: ..., c, a, b, c
  1341 // blows FSR
  1342 void TemplateTable::dup2() {
  1343   transition(vtos, vtos);
  1344   // stack: ..., a, b
  1345   __ load_ptr(1, FSR);  // load a
  1346   __ push_ptr(FSR);             // push a
  1347   __ load_ptr(1, FSR);  // load b
  1348   __ push_ptr(FSR);             // push b
  1349   // stack: ..., a, b, a, b
  1352 // blows FSR
  1353 void TemplateTable::dup2_x1() {
  1354   transition(vtos, vtos);
  1355   // stack: ..., a, b, c
  1356   __ load_ptr(0, T2);  // load c
  1357   __ load_ptr(1, FSR);  // load b
  1358   __ push_ptr(FSR);             // push b
  1359   __ push_ptr(T2);             // push c
  1360   // stack: ..., a, b, c, b, c
  1361   __ store_ptr(3, T2); // store c in b
  1362   // stack: ..., a, c, c, b, c
  1363   __ load_ptr(4, T2);  // load a
  1364   __ store_ptr(2, T2); // store a in 2nd c
  1365   // stack: ..., a, c, a, b, c
  1366   __ store_ptr(4, FSR); // store b in a
  1367   // stack: ..., b, c, a, b, c
  1369   // stack: ..., b, c, a, b, c
  1372 // blows FSR, SSR
  1373 void TemplateTable::dup2_x2() {
  1374   transition(vtos, vtos);
  1375   // stack: ..., a, b, c, d
  1376   // stack: ..., a, b, c, d
  1377   __ load_ptr(0, T2);  // load d
  1378   __ load_ptr(1, FSR);  // load c
  1379   __ push_ptr(FSR);             // push c
  1380   __ push_ptr(T2);             // push d
  1381   // stack: ..., a, b, c, d, c, d
  1382   __ load_ptr(4, FSR);  // load b
  1383   __ store_ptr(2, FSR); // store b in d
  1384   __ store_ptr(4, T2); // store d in b
  1385   // stack: ..., a, d, c, b, c, d
  1386   __ load_ptr(5, T2);  // load a
  1387   __ load_ptr(3, FSR);  // load c
  1388   __ store_ptr(3, T2); // store a in c
  1389   __ store_ptr(5, FSR); // store c in a
  1390   // stack: ..., c, d, a, b, c, d
  1392   // stack: ..., c, d, a, b, c, d
  1395 // blows FSR
  1396 void TemplateTable::swap() {
  1397   transition(vtos, vtos);
  1398   // stack: ..., a, b
  1400   __ load_ptr(1, A5);  // load a
  1401   __ load_ptr(0, FSR);  // load b
  1402   __ store_ptr(0, A5); // store a in b
  1403   __ store_ptr(1, FSR); // store b in a
  1405   // stack: ..., b, a
  1408 void TemplateTable::iop2(Operation op) {
  1409   transition(itos, itos);
  1411   __ pop_i(SSR);
  1412   switch (op) {
  1413     case add  : __ addu32(FSR, SSR, FSR); break;
  1414     case sub  : __ subu32(FSR, SSR, FSR); break;
  1415     case mul  : __ mul(FSR, SSR, FSR);    break;
  1416     case _and : __ andr(FSR, SSR, FSR);   break;
  1417     case _or  : __ orr(FSR, SSR, FSR);    break;
  1418     case _xor : __ xorr(FSR, SSR, FSR);   break;
  1419     case shl  : __ sllv(FSR, SSR, FSR);   break; // implicit masking of lower 5 bits by Intel shift instr. mips also
  1420     case shr  : __ srav(FSR, SSR, FSR);   break; // implicit masking of lower 5 bits by Intel shift instr. mips also
  1421     case ushr : __ srlv(FSR, SSR, FSR);   break; // implicit masking of lower 5 bits by Intel shift instr. mips also
  1422     default   : ShouldNotReachHere();
  1426 // the result stored in FSR, SSR,
  1427 // used registers : T2, T3
  1428 void TemplateTable::lop2(Operation op) {
  1429   transition(ltos, ltos);
  1430   __ pop_l(T2);
  1432   switch (op) {
  1433     case add : __ daddu(FSR, T2, FSR); break;
  1434     case sub : __ dsubu(FSR, T2, FSR); break;
  1435     case _and: __ andr(FSR, T2, FSR);  break;
  1436     case _or : __ orr(FSR, T2, FSR);   break;
  1437     case _xor: __ xorr(FSR, T2, FSR);  break;
  1438     default : ShouldNotReachHere();
  1442 // java require this bytecode could handle 0x80000000/-1, dont cause a overflow exception,
  1443 // the result is 0x80000000
  1444 // the godson2 cpu do the same, so we need not handle this specially like x86
  1445 void TemplateTable::idiv() {
  1446   transition(itos, itos);
  1447   Label not_zero;
  1449   __ bne(FSR, R0, not_zero);
  1450   __ delayed()->nop();
  1451   __ jmp(Interpreter::_throw_ArithmeticException_entry);
  1452   __ delayed()->nop();
  1453   __ bind(not_zero);
  1455   __ pop_i(SSR);
  1456   if (UseLoongsonISA) {
  1457     __ gsdiv(FSR, SSR, FSR);
  1458   } else {
  1459     __ div(SSR, FSR);
  1460     __ mflo(FSR);
  1464 void TemplateTable::irem() {
  1465   transition(itos, itos);
  1466   Label not_zero;
  1467   __ pop_i(SSR);
  1468   __ div(SSR, FSR);
  1470   __ bne(FSR, R0, not_zero);
  1471   __ delayed()->nop();
  1472   //__ brk(7);
  1473   __ jmp(Interpreter::_throw_ArithmeticException_entry);
  1474   __ delayed()->nop();
  1476   __ bind(not_zero);
  1477   __ mfhi(FSR);
  1480 void TemplateTable::lmul() {
  1481   transition(ltos, ltos);
  1482   __ pop_l(T2);
  1483   if(UseLoongsonISA){
  1484     __ gsdmult(FSR, T2, FSR);
  1485   } else {
  1486     __ dmult(T2, FSR);
  1487     __ mflo(FSR);
  1491 // NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
  1492 void TemplateTable::ldiv() {
  1493   transition(ltos, ltos);
  1494   Label normal;
  1496   __ bne(FSR, R0, normal);
  1497   __ delayed()->nop();
  1499   //__ brk(7);    //generate FPE
  1500   __ jmp(Interpreter::_throw_ArithmeticException_entry);
  1501   __ delayed()->nop();
  1503   __ bind(normal);
  1504   __ pop_l(A2);
  1505   if (UseLoongsonISA) {
  1506     __ gsddiv(FSR, A2, FSR);
  1507   } else {
  1508     __ ddiv(A2, FSR);
  1509     __ mflo(FSR);
  1513 // NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
  1514 void TemplateTable::lrem() {
  1515   transition(ltos, ltos);
  1516   Label normal;
  1518   __ bne(FSR, R0, normal);
  1519   __ delayed()->nop();
  1521   __ jmp(Interpreter::_throw_ArithmeticException_entry);
  1522   __ delayed()->nop();
  1524   __ bind(normal);
  1525   __ pop_l (A2);
  1527   if(UseLoongsonISA){
  1528     __ gsdmod(FSR, A2, FSR);
  1529   } else {
  1530     __ ddiv(A2, FSR);
  1531     __ mfhi(FSR);
  1535 // result in FSR
  1536 // used registers : T0
  1537 void TemplateTable::lshl() {
  1538   transition(itos, ltos);
  1539   __ pop_l(T0);
  1540   __ dsllv(FSR, T0, FSR);
  1543 // used registers : T0
  1544 void TemplateTable::lshr() {
  1545   transition(itos, ltos);
  1546   __ pop_l(T0);
  1547   __ dsrav(FSR, T0, FSR);
  1550 // used registers : T0
  1551 void TemplateTable::lushr() {
  1552   transition(itos, ltos);
  1553   __ pop_l(T0);
  1554   __ dsrlv(FSR, T0, FSR);
  1557 // result in FSF
  1558 void TemplateTable::fop2(Operation op) {
  1559   transition(ftos, ftos);
  1560   switch (op) {
  1561     case add:
  1562       __ lwc1(FTF, at_sp());
  1563       __ add_s(FSF, FTF, FSF);
  1564       break;
  1565     case sub:
  1566       __ lwc1(FTF, at_sp());
  1567       __ sub_s(FSF, FTF, FSF);
  1568       break;
  1569     case mul:
  1570       __ lwc1(FTF, at_sp());
  1571       __ mul_s(FSF, FTF, FSF);
  1572       break;
  1573     case div:
  1574       __ lwc1(FTF, at_sp());
  1575       __ div_s(FSF, FTF, FSF);
  1576       break;
  1577     case rem:
  1578       __ mov_s(F13, FSF);
  1579       __ lwc1(F12, at_sp());
  1580        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2);
  1581       break;
  1582     default : ShouldNotReachHere();
  1585   __ daddi(SP, SP, 1 * wordSize);
  1588 // result in SSF||FSF
  1589 // i dont handle the strict flags
  1590 void TemplateTable::dop2(Operation op) {
  1591   transition(dtos, dtos);
  1592   switch (op) {
  1593     case add:
  1594       __ ldc1(FTF, at_sp());
  1595       __ add_d(FSF, FTF, FSF);
  1596       break;
  1597     case sub:
  1598       __ ldc1(FTF, at_sp());
  1599       __ sub_d(FSF, FTF, FSF);
  1600       break;
  1601     case mul:
  1602       __ ldc1(FTF, at_sp());
  1603       __ mul_d(FSF, FTF, FSF);
  1604       break;
  1605     case div:
  1606       __ ldc1(FTF, at_sp());
  1607       __ div_d(FSF, FTF, FSF);
  1608       break;
  1609     case rem:
  1610       __ mov_d(F13, FSF);
  1611       __ ldc1(F12, at_sp());
  1612       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2);
  1613       break;
  1614     default : ShouldNotReachHere();
  1617   __ daddi(SP, SP, 2 * wordSize);
  1620 void TemplateTable::ineg() {
  1621   transition(itos, itos);
  1622   __ subu32(FSR, R0, FSR);
  1625 void TemplateTable::lneg() {
  1626   transition(ltos, ltos);
  1627   __ dsubu(FSR, R0, FSR);
  1630 void TemplateTable::fneg() {
  1631   transition(ftos, ftos);
  1632   __ neg_s(FSF, FSF);
  1635 void TemplateTable::dneg() {
  1636   transition(dtos, dtos);
  1637   __ neg_d(FSF, FSF);
  1640 // used registers : T2
  1641 void TemplateTable::iinc() {
  1642   transition(vtos, vtos);
  1643   locals_index(T2);
  1644   __ lw(FSR, T2, 0);
  1645   __ lb(AT, at_bcp(2));           // get constant
  1646   __ daddu(FSR, FSR, AT);
  1647   __ sw(FSR, T2, 0);
  1650 // used register : T2
  1651 void TemplateTable::wide_iinc() {
  1652   transition(vtos, vtos);
  1653   locals_index_wide(T2);
  1654   __ get_2_byte_integer_at_bcp(FSR, AT, 4);
  1655   __ hswap(FSR);
  1656   __ lw(AT, T2, 0);
  1657   __ daddu(FSR, AT, FSR);
  1658   __ sw(FSR, T2, 0);
  1661 void TemplateTable::convert() {
  1662   // Checking
  1663 #ifdef ASSERT
  1665     TosState tos_in  = ilgl;
  1666     TosState tos_out = ilgl;
  1667     switch (bytecode()) {
  1668       case Bytecodes::_i2l: // fall through
  1669       case Bytecodes::_i2f: // fall through
  1670       case Bytecodes::_i2d: // fall through
  1671       case Bytecodes::_i2b: // fall through
  1672       case Bytecodes::_i2c: // fall through
  1673       case Bytecodes::_i2s: tos_in = itos; break;
  1674       case Bytecodes::_l2i: // fall through
  1675       case Bytecodes::_l2f: // fall through
  1676       case Bytecodes::_l2d: tos_in = ltos; break;
  1677       case Bytecodes::_f2i: // fall through
  1678       case Bytecodes::_f2l: // fall through
  1679       case Bytecodes::_f2d: tos_in = ftos; break;
  1680       case Bytecodes::_d2i: // fall through
  1681       case Bytecodes::_d2l: // fall through
  1682       case Bytecodes::_d2f: tos_in = dtos; break;
  1683       default             : ShouldNotReachHere();
  1685     switch (bytecode()) {
  1686       case Bytecodes::_l2i: // fall through
  1687       case Bytecodes::_f2i: // fall through
  1688       case Bytecodes::_d2i: // fall through
  1689       case Bytecodes::_i2b: // fall through
  1690       case Bytecodes::_i2c: // fall through
  1691       case Bytecodes::_i2s: tos_out = itos; break;
  1692       case Bytecodes::_i2l: // fall through
  1693       case Bytecodes::_f2l: // fall through
  1694       case Bytecodes::_d2l: tos_out = ltos; break;
  1695       case Bytecodes::_i2f: // fall through
  1696       case Bytecodes::_l2f: // fall through
  1697       case Bytecodes::_d2f: tos_out = ftos; break;
  1698       case Bytecodes::_i2d: // fall through
  1699       case Bytecodes::_l2d: // fall through
  1700       case Bytecodes::_f2d: tos_out = dtos; break;
  1701       default             : ShouldNotReachHere();
  1703     transition(tos_in, tos_out);
  1705 #endif // ASSERT
  1707   // Conversion
  1708   // (Note: use pushl(ecx)/popl(ecx) for 1/2-word stack-ptr manipulation)
  1709   switch (bytecode()) {
  1710     case Bytecodes::_i2l:
  1711       __ sll(FSR, FSR, 0);
  1712       break;
  1713     case Bytecodes::_i2f:
  1714       __ mtc1(FSR, FSF);
  1715       __ cvt_s_w(FSF, FSF);
  1716       break;
  1717     case Bytecodes::_i2d:
  1718       __ mtc1(FSR, FSF);
  1719       __ cvt_d_w(FSF, FSF);
  1720       break;
  1721     case Bytecodes::_i2b:
  1722       __ seb(FSR, FSR);
  1723       break;
  1724     case Bytecodes::_i2c:
  1725       __ andi(FSR, FSR, 0xFFFF);  // truncate upper 56 bits
  1726       break;
  1727     case Bytecodes::_i2s:
  1728       __ seh(FSR, FSR);
  1729       break;
  1730     case Bytecodes::_l2i:
  1731       __ sll(FSR, FSR, 0);
  1732       break;
  1733     case Bytecodes::_l2f:
  1734       __ dmtc1(FSR, FSF);
  1735       __ cvt_s_l(FSF, FSF);
  1736       break;
  1737     case Bytecodes::_l2d:
  1738       __ dmtc1(FSR, FSF);
  1739       __ cvt_d_l(FSF, FSF);
  1740       break;
  1741     case Bytecodes::_f2i:
  1743       Label L;
  1745       __ trunc_w_s(F12, FSF);
  1746       __ move(AT, 0x7fffffff);
  1747       __ mfc1(FSR, F12);
  1748       __ c_un_s(FSF, FSF);    //NaN?
  1749       __ movt(FSR, R0);
  1751       __ bne(AT, FSR, L);
  1752       __ delayed()->lui(T9, 0x8000);
  1754       __ mfc1(AT, FSF);
  1755       __ andr(AT, AT, T9);
  1757       __ movn(FSR, T9, AT);
  1759       __ bind(L);
  1761       break;
  1762     case Bytecodes::_f2l:
  1764       Label L;
  1766       __ trunc_l_s(F12, FSF);
  1767       __ daddiu(AT, R0, -1);
  1768       __ dsrl(AT, AT, 1);
  1769       __ dmfc1(FSR, F12);
  1770       __ c_un_s(FSF, FSF);    //NaN?
  1771       __ movt(FSR, R0);
  1773       __ bne(AT, FSR, L);
  1774       __ delayed()->lui(T9, 0x8000);
  1776       __ mfc1(AT, FSF);
  1777       __ andr(AT, AT, T9);
  1779       __ dsll32(T9, T9, 0);
  1780       __ movn(FSR, T9, AT);
  1782       __ bind(L);
  1784       break;
  1785     case Bytecodes::_f2d:
  1786       __ cvt_d_s(FSF, FSF);
  1787       break;
  1788     case Bytecodes::_d2i:
  1790       Label L;
  1792       __ trunc_w_d(F12, FSF);
  1793       __ move(AT, 0x7fffffff);
  1794       __ mfc1(FSR, F12);
  1796       __ bne(FSR, AT, L);
  1797       __ delayed()->mtc1(R0, F12);
  1799       __ cvt_d_w(F12, F12);
  1800       __ c_ult_d(FSF, F12);
  1801       __ bc1f(L);
  1802       __ delayed()->addiu(T9, R0, -1);
  1804       __ c_un_d(FSF, FSF);    //NaN?
  1805       __ subu32(FSR, T9, AT);
  1806       __ movt(FSR, R0);
  1808       __ bind(L);
  1810       break;
  1811     case Bytecodes::_d2l:
  1813       Label L;
  1815       __ trunc_l_d(F12, FSF);
  1816       __ daddiu(AT, R0, -1);
  1817       __ dsrl(AT, AT, 1);
  1818       __ dmfc1(FSR, F12);
  1820       __ bne(FSR, AT, L);
  1821       __ delayed()->mtc1(R0, F12);
  1823       __ cvt_d_w(F12, F12);
  1824       __ c_ult_d(FSF, F12);
  1825       __ bc1f(L);
  1826       __ delayed()->daddiu(T9, R0, -1);
  1828       __ c_un_d(FSF, FSF);    //NaN?
  1829       __ subu(FSR, T9, AT);
  1830       __ movt(FSR, R0);
  1832     __ bind(L);
  1834       break;
  1835     case Bytecodes::_d2f:
  1836       __ cvt_s_d(FSF, FSF);
  1837       break;
  1838     default             :
  1839       ShouldNotReachHere();
  1843 void TemplateTable::lcmp() {
  1844   transition(ltos, itos);
  1846   Label low, high, done;
  1847   __ pop(T0);
  1848   __ pop(R0);
  1849   __ slt(AT, T0, FSR);
  1850   __ bne(AT, R0, low);
  1851   __ delayed()->nop();
  1853   __ bne(T0, FSR, high);
  1854   __ delayed()->nop();
  1856   __ li(FSR, (long)0);
  1857   __ b(done);
  1858   __ delayed()->nop();
  1860   __ bind(low);
  1861   __ li(FSR, (long)-1);
  1862   __ b(done);
  1863   __ delayed()->nop();
  1865   __ bind(high);
  1866   __ li(FSR, (long)1);
  1867   __ b(done);
  1868   __ delayed()->nop();
  1870   __ bind(done);
  1873 void TemplateTable::float_cmp(bool is_float, int unordered_result) {
  1874   Label less, done;
  1876   __ move(FSR, R0);
  1878   if (is_float) {
  1879     __ lwc1(FTF, at_sp());
  1880     __ c_eq_s(FTF, FSF);
  1881     __ bc1t(done);
  1882     __ delayed()->daddi(SP, SP, 1 * wordSize);
  1884     if (unordered_result<0)
  1885       __ c_ult_s(FTF, FSF);
  1886     else
  1887       __ c_olt_s(FTF, FSF);
  1888   } else {
  1889     __ ldc1(FTF, at_sp());
  1890     __ c_eq_d(FTF, FSF);
  1891     __ bc1t(done);
  1892     __ delayed()->daddi(SP, SP, 2 * wordSize);
  1894     if (unordered_result<0)
  1895       __ c_ult_d(FTF, FSF);
  1896     else
  1897       __ c_olt_d(FTF, FSF);
  1899   __ bc1t(less);
  1900   __ delayed()->nop();
  1901   __ move(FSR, 1);
  1902   __ b(done);
  1903   __ delayed()->nop();
  1904   __ bind(less);
  1905   __ move(FSR, -1);
  1906   __ bind(done);
  1910 // used registers : T3, A7, Rnext
  1911 // FSR : return bci, this is defined by the vm specification
  1912 // T2 : MDO taken count
  1913 // T3 : method
  1914 // A7 : offset
  1915 // Rnext : next bytecode, this is required by dispatch_base
  1916 void TemplateTable::branch(bool is_jsr, bool is_wide) {
  1917   __ get_method(T3);
  1918   __ profile_taken_branch(A7, T2);    // only C2 meaningful
  1920 #ifndef CORE
  1921   const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
  1922                              InvocationCounter::counter_offset();
  1923   const ByteSize inv_offset = MethodCounters::invocation_counter_offset() +
  1924                               InvocationCounter::counter_offset();
  1925 #endif // CORE
  1927   // Load up T4 with the branch displacement
  1928   if (!is_wide) {
  1929     __ lb(A7, BCP, 1);
  1930     __ lbu(AT, BCP, 2);
  1931     __ dsll(A7, A7, 8);
  1932     __ orr(A7, A7, AT);
  1933   } else {
  1934     __ get_4_byte_integer_at_bcp(A7, AT, 1);
  1935     __ swap(A7);
  1938   // Handle all the JSR stuff here, then exit.
  1939   // It's much shorter and cleaner than intermingling with the non-JSR
  1940   // normal-branch stuff occuring below.
  1941   if (is_jsr) {
  1942     // Pre-load the next target bytecode into Rnext
  1943     __ dadd(AT, BCP, A7);
  1944     __ lbu(Rnext, AT, 0);
  1946     // compute return address as bci in FSR
  1947     __ daddi(FSR, BCP, (is_wide?5:3) - in_bytes(ConstMethod::codes_offset()));
  1948     __ ld(AT, T3, in_bytes(Method::const_offset()));
  1949     __ dsub(FSR, FSR, AT);
  1950     // Adjust the bcp in BCP by the displacement in A7
  1951     __ dadd(BCP, BCP, A7);
  1952     // jsr returns atos that is not an oop
  1953     // Push return address
  1954     __ push_i(FSR);
  1955     // jsr returns vtos
  1956     __ dispatch_only_noverify(vtos);
  1958     return;
  1961   // Normal (non-jsr) branch handling
  1963   // Adjust the bcp in S0 by the displacement in T4
  1964   __ dadd(BCP, BCP, A7);
  1966 #ifdef CORE
  1967   // Pre-load the next target bytecode into EBX
  1968   __ lbu(Rnext, BCP, 0);
  1969   // continue with the bytecode @ target
  1970   __ dispatch_only(vtos);
  1971 #else
  1972   assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters");
  1973   Label backedge_counter_overflow;
  1974   Label profile_method;
  1975   Label dispatch;
  1976   if (UseLoopCounter) {
  1977     // increment backedge counter for backward branches
  1978     // eax: MDO
  1979     // ebx: MDO bumped taken-count
  1980     // T3: method
  1981     // T4: target offset
  1982     // BCP: target bcp
  1983     // LVP: locals pointer
  1984     __ bgtz(A7, dispatch);  // check if forward or backward branch
  1985     __ delayed()->nop();
  1987     // check if MethodCounters exists
  1988     Label has_counters;
  1989     __ ld(AT, T3, in_bytes(Method::method_counters_offset()));  // use AT as MDO, TEMP
  1990     __ bne(AT, R0, has_counters);
  1991     __ nop();
  1992     __ push(T3);
  1993     //__ push(A7);
  1994     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters),
  1995                T3);
  1996     //__ pop(A7);
  1997     __ pop(T3);
  1998     __ ld(AT, T3, in_bytes(Method::method_counters_offset()));  // use AT as MDO, TEMP
  1999     __ beq(AT, R0, dispatch);
  2000     __ nop();
  2001     __ bind(has_counters);
  2003     // increment back edge counter
  2004     __ ld(T1, T3, in_bytes(Method::method_counters_offset()));
  2005     __ lw(T0, T1, in_bytes(be_offset));
  2006     __ increment(T0, InvocationCounter::count_increment);
  2007     __ sw(T0, T1, in_bytes(be_offset));
  2009     // load invocation counter
  2010     __ lw(T1, T1, in_bytes(inv_offset));
  2011     // buffer bit added, mask no needed
  2013     // dadd backedge counter & invocation counter
  2014     __ dadd(T1, T1, T0);
  2016     if (ProfileInterpreter) {
  2017       // Test to see if we should create a method data oop
  2018       // T1 : backedge counter & invocation counter
  2019       if (Assembler::is_simm16(InvocationCounter::InterpreterProfileLimit)) {
  2020         __ slti(AT, T1, InvocationCounter::InterpreterProfileLimit);
  2021       } else {
  2022         __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit);
  2023         __ lw(AT, AT, 0);
  2024         __ slt(AT, T1, AT);
  2027       __ bne(AT, R0, dispatch);
  2028       __ delayed()->nop();
  2030       // if no method data exists, go to profile method
  2031       __ test_method_data_pointer(T1, profile_method);
  2033       if (UseOnStackReplacement) {
  2034         if (Assembler::is_simm16(InvocationCounter::InterpreterBackwardBranchLimit)) {
  2035           __ slti(AT, T2, InvocationCounter::InterpreterBackwardBranchLimit);
  2036         } else {
  2037           __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
  2038           __ lw(AT, AT, 0);
  2039           __ slt(AT, T2, AT);
  2042         __ bne(AT, R0, dispatch);
  2043         __ delayed()->nop();
  2045         // When ProfileInterpreter is on, the backedge_count comes
  2046         // from the methodDataOop, which value does not get reset on
  2047         // the call to  frequency_counter_overflow().
  2048         // To avoid excessive calls to the overflow routine while
  2049         // the method is being compiled, dadd a second test to make
  2050         // sure the overflow function is called only once every
  2051         // overflow_frequency.
  2052         const int overflow_frequency = 1024;
  2053         __ andi(AT, T2, overflow_frequency-1);
  2054         __ beq(AT, R0, backedge_counter_overflow);
  2055         __ delayed()->nop();
  2057     } else {
  2058       if (UseOnStackReplacement) {
  2059         // check for overflow against eax, which is the sum of the counters
  2060         __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
  2061         __ lw(AT, AT, 0);
  2062         __ slt(AT, T1, AT);
  2063         __ beq(AT, R0, backedge_counter_overflow);
  2064         __ delayed()->nop();
  2067     __ bind(dispatch);
  2070   // Pre-load the next target bytecode into Rnext
  2071   __ lbu(Rnext, BCP, 0);
  2073   // continue with the bytecode @ target
  2074   // FSR: return bci for jsr's, unused otherwise
  2075   // Rnext: target bytecode
  2076   // BCP: target bcp
  2077   __ dispatch_only(vtos);
  2079   if (UseLoopCounter) {
  2080     if (ProfileInterpreter) {
  2081       // Out-of-line code to allocate method data oop.
  2082       __ bind(profile_method);
  2083       __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
  2084       __ lbu(Rnext, BCP, 0);
  2085       __ set_method_data_pointer_for_bcp();
  2086       __ b(dispatch);
  2087       __ delayed()->nop();
  2090     if (UseOnStackReplacement) {
  2091       // invocation counter overflow
  2092       __ bind(backedge_counter_overflow);
  2093       __ sub(A7, BCP, A7);  // branch bcp
  2094       call_VM(NOREG, CAST_FROM_FN_PTR(address,
  2095       InterpreterRuntime::frequency_counter_overflow), A7);
  2096       __ lbu(Rnext, BCP, 0);
  2098       // V0: osr nmethod (osr ok) or NULL (osr not possible)
  2099       // V1: osr adapter frame return address
  2100       // Rnext: target bytecode
  2101       // LVP: locals pointer
  2102       // BCP: bcp
  2103       __ beq(V0, R0, dispatch);
  2104       __ delayed()->nop();
  2105       // nmethod may have been invalidated (VM may block upon call_VM return)
  2106       __ lw(T3, V0, nmethod::entry_bci_offset());
  2107       __ move(AT, InvalidOSREntryBci);
  2108       __ beq(AT, T3, dispatch);
  2109       __ delayed()->nop();
  2110       // We need to prepare to execute the OSR method. First we must
  2111       // migrate the locals and monitors off of the stack.
  2112       //eax V0: osr nmethod (osr ok) or NULL (osr not possible)
  2113       //ebx V1: osr adapter frame return address
  2114       //edx  Rnext: target bytecode
  2115       //edi  LVP: locals pointer
  2116       //esi  BCP: bcp
  2117       __ move(BCP, V0);
  2118       // const Register thread = ecx;
  2119       const Register thread = TREG;
  2120 #ifndef OPT_THREAD
  2121       __ get_thread(thread);
  2122 #endif
  2123       call_VM(noreg, CAST_FROM_FN_PTR(address,
  2124       SharedRuntime::OSR_migration_begin));
  2125       // eax is OSR buffer, move it to expected parameter location
  2126       //refer to osrBufferPointer in c1_LIRAssembler_mips.cpp
  2127       __ move(T0, V0);
  2129       // pop the interpreter frame
  2130       __ ld(A7, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
  2131       //FIXME, shall we keep the return address on the stack?
  2132       __ leave();                                // remove frame anchor
  2133       __ move(LVP, RA);
  2134       __ move(SP, A7);
  2136       __ move(AT, -(StackAlignmentInBytes));
  2137       __ andr(SP , SP , AT);
  2139       // push the (possibly adjusted) return address
  2140       //refer to osr_entry in c1_LIRAssembler_mips.cpp
  2141       __ ld(AT, BCP, nmethod::osr_entry_point_offset());
  2142       __ jr(AT);
  2143       __ delayed()->nop();
  2146 #endif // not CORE
  2150 void TemplateTable::if_0cmp(Condition cc) {
  2151   transition(itos, vtos);
  2152   // assume branch is more often taken than not (loops use backward branches)
  2153   Label not_taken;
  2154   switch(cc) {
  2155     case not_equal:
  2156       __ beq(FSR, R0, not_taken);
  2157       break;
  2158     case equal:
  2159       __ bne(FSR, R0, not_taken);
  2160       break;
  2161     case less:
  2162       __ bgez(FSR, not_taken);
  2163       break;
  2164     case less_equal:
  2165       __ bgtz(FSR, not_taken);
  2166       break;
  2167     case greater:
  2168       __ blez(FSR, not_taken);
  2169       break;
  2170     case greater_equal:
  2171       __ bltz(FSR, not_taken);
  2172       break;
  2174   __ delayed()->nop();
  2176   branch(false, false);
  2178   __ bind(not_taken);
  2179   __ profile_not_taken_branch(FSR);
  2182 void TemplateTable::if_icmp(Condition cc) {
  2183   transition(itos, vtos);
  2184   // assume branch is more often taken than not (loops use backward branches)
  2185   Label not_taken;
  2187   __ pop_i(SSR);
  2188   switch(cc) {
  2189     case not_equal:
  2190       __ beq(SSR, FSR, not_taken);
  2191       break;
  2192     case equal:
  2193       __ bne(SSR, FSR, not_taken);
  2194       break;
  2195     case less:
  2196       __ slt(AT, SSR, FSR);
  2197       __ beq(AT, R0, not_taken);
  2198       break;
  2199     case less_equal:
  2200       __ slt(AT, FSR, SSR);
  2201       __ bne(AT, R0, not_taken);
  2202       break;
  2203     case greater:
  2204       __ slt(AT, FSR, SSR);
  2205       __ beq(AT, R0, not_taken);
  2206       break;
  2207     case greater_equal:
  2208       __ slt(AT, SSR, FSR);
  2209       __ bne(AT, R0, not_taken);
  2210       break;
  2212   __ delayed()->nop();
  2214   branch(false, false);
  2215   __ bind(not_taken);
  2216   __ profile_not_taken_branch(FSR);
  2219 void TemplateTable::if_nullcmp(Condition cc) {
  2220   transition(atos, vtos);
  2221   // assume branch is more often taken than not (loops use backward branches)
  2222   Label not_taken;
  2223   switch(cc) {
  2224     case not_equal:
  2225       __ beq(FSR, R0, not_taken);
  2226       break;
  2227     case equal:
  2228       __ bne(FSR, R0, not_taken);
  2229       break;
  2230     default:
  2231       ShouldNotReachHere();
  2233   __ delayed()->nop();
  2235   branch(false, false);
  2236   __ bind(not_taken);
  2237   __ profile_not_taken_branch(FSR);
  2241 void TemplateTable::if_acmp(Condition cc) {
  2242   transition(atos, vtos);
  2243   // assume branch is more often taken than not (loops use backward branches)
  2244   Label not_taken;
  2245   //  __ lw(SSR, SP, 0);
  2246   __ pop_ptr(SSR);
  2247   switch(cc) {
  2248     case not_equal:
  2249       __ beq(SSR, FSR, not_taken);
  2250       break;
  2251     case equal:
  2252       __ bne(SSR, FSR, not_taken);
  2253       break;
  2254     default:
  2255       ShouldNotReachHere();
  2257   __ delayed()->nop();
  2259   branch(false, false);
  2261   __ bind(not_taken);
  2262   __ profile_not_taken_branch(FSR);
  2265 // used registers : T1, T2, T3
  2266 // T1 : method
  2267 // T2 : returb bci
  2268 void TemplateTable::ret() {
  2269   transition(vtos, vtos);
  2271   locals_index(T2);
  2272   __ ld(T2, T2, 0);
  2273   __ profile_ret(T2, T3);
  2275   __ get_method(T1);
  2276   __ ld(BCP, T1, in_bytes(Method::const_offset()));
  2277   __ dadd(BCP, BCP, T2);
  2278   __ daddi(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
  2280   __ dispatch_next(vtos);
  2283 // used registers : T1, T2, T3
  2284 // T1 : method
  2285 // T2 : returb bci
  2286 void TemplateTable::wide_ret() {
  2287   transition(vtos, vtos);
  2289   locals_index_wide(T2);
  2290   __ ld(T2, T2, 0);                   // get return bci, compute return bcp
  2291   __ profile_ret(T2, T3);
  2293   __ get_method(T1);
  2294   __ ld(BCP, T1, in_bytes(Method::const_offset()));
  2295   __ dadd(BCP, BCP, T2);
  2296   __ daddi(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
  2298   __ dispatch_next(vtos);
  2301 // used register T2, T3, A7, Rnext
  2302 // T2 : bytecode pointer
  2303 // T3 : low
  2304 // A7 : high
  2305 // Rnext : dest bytecode, required by dispatch_base
  2306 void TemplateTable::tableswitch() {
  2307   Label default_case, continue_execution;
  2308   transition(itos, vtos);
  2310   // align BCP
  2311   __ daddi(T2, BCP, BytesPerInt);
  2312   __ li(AT, -BytesPerInt);
  2313   __ andr(T2, T2, AT);
  2315   // load lo & hi
  2316   __ lw(T3, T2, 1 * BytesPerInt);
  2317   __ swap(T3);
  2318   __ lw(A7, T2, 2 * BytesPerInt);
  2319   __ swap(A7);
  2321   // check against lo & hi
  2322   __ slt(AT, FSR, T3);
  2323   __ bne(AT, R0, default_case);
  2324   __ delayed()->nop();
  2326   __ slt(AT, A7, FSR);
  2327   __ bne(AT, R0, default_case);
  2328   __ delayed()->nop();
  2330   // lookup dispatch offset, in A7 big endian
  2331   __ dsub(FSR, FSR, T3);
  2332   __ dsll(AT, FSR, Address::times_4);
  2333   __ dadd(AT, T2, AT);
  2334   __ lw(A7, AT, 3 * BytesPerInt);
  2335   __ profile_switch_case(FSR, T9, T3);
  2337   __ bind(continue_execution);
  2338   __ swap(A7);
  2339   __ dadd(BCP, BCP, A7);
  2340   __ lbu(Rnext, BCP, 0);
  2341   __ dispatch_only(vtos);
  2343   // handle default
  2344   __ bind(default_case);
  2345   __ profile_switch_default(FSR);
  2346   __ lw(A7, T2, 0);
  2347   __ b(continue_execution);
  2348   __ delayed()->nop();
  2351 void TemplateTable::lookupswitch() {
  2352   transition(itos, itos);
  2353   __ stop("lookupswitch bytecode should have been rewritten");
  2356 // used registers : T2, T3, A7, Rnext
  2357 // T2 : bytecode pointer
  2358 // T3 : pair index
  2359 // A7 : offset
  2360 // Rnext : dest bytecode
  2361 // the data after the opcode is the same as lookupswitch
  2362 // see Rewriter::rewrite_method for more information
  2363 void TemplateTable::fast_linearswitch() {
  2364   transition(itos, vtos);
  2365   Label loop_entry, loop, found, continue_execution;
  2367   // swap eax so we can avoid swapping the table entries
  2368   __ swap(FSR);
  2370   // align BCP
  2371   __ daddi(T2, BCP, BytesPerInt);
  2372   __ li(AT, -BytesPerInt);
  2373   __ andr(T2, T2, AT);
  2375   // set counter
  2376   __ lw(T3, T2, BytesPerInt);
  2377   __ swap(T3);
  2378   __ b(loop_entry);
  2379   __ delayed()->nop();
  2381   // table search
  2382   __ bind(loop);
  2383   // get the entry value
  2384   __ dsll(AT, T3, Address::times_8);
  2385   __ dadd(AT, T2, AT);
  2386   __ lw(AT, AT, 2 * BytesPerInt);
  2388   // found?
  2389   __ beq(FSR, AT, found);
  2390   __ delayed()->nop();
  2392   __ bind(loop_entry);
  2393   __ bgtz(T3, loop);
  2394   __ delayed()->daddiu(T3, T3, -1);
  2396   // default case
  2397   __ profile_switch_default(FSR);
  2398   __ lw(A7, T2, 0);
  2399   __ b(continue_execution);
  2400   __ delayed()->nop();
  2402   // entry found -> get offset
  2403   __ bind(found);
  2404   __ dsll(AT, T3, Address::times_8);
  2405   __ dadd(AT, T2, AT);
  2406   __ lw(A7, AT, 3 * BytesPerInt);
  2407   __ profile_switch_case(T3, FSR, T2);
  2409   // continue execution
  2410   __ bind(continue_execution);
  2411   __ swap(A7);
  2412   __ dadd(BCP, BCP, A7);
  2413   __ lbu(Rnext, BCP, 0);
  2414   __ dispatch_only(vtos);
  2417 // used registers : T0, T1, T2, T3, A7, Rnext
  2418 // T2 : pairs address(array)
  2419 // Rnext : dest bytecode
  2420 // the data after the opcode is the same as lookupswitch
  2421 // see Rewriter::rewrite_method for more information
  2422 void TemplateTable::fast_binaryswitch() {
  2423   transition(itos, vtos);
  2424   // Implementation using the following core algorithm:
  2425   //
  2426   // int binary_search(int key, LookupswitchPair* array, int n) {
  2427   //   // Binary search according to "Methodik des Programmierens" by
  2428   //   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
  2429   //   int i = 0;
  2430   //   int j = n;
  2431   //   while (i+1 < j) {
  2432   //     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
  2433   //     // with      Q: for all i: 0 <= i < n: key < a[i]
  2434   //     // where a stands for the array and assuming that the (inexisting)
  2435   //     // element a[n] is infinitely big.
  2436   //     int h = (i + j) >> 1;
  2437   //     // i < h < j
  2438   //     if (key < array[h].fast_match()) {
  2439   //       j = h;
  2440   //     } else {
  2441   //       i = h;
  2442   //     }
  2443   //   }
  2444   //   // R: a[i] <= key < a[i+1] or Q
  2445   //   // (i.e., if key is within array, i is the correct index)
  2446   //   return i;
  2447   // }
  2449   // register allocation
  2450   const Register array = T2;
  2451   const Register i = T3, j = A7;
  2452   const Register h = T1;
  2453   const Register temp = T0;
  2454   const Register key = FSR;
  2456   // setup array
  2457   __ daddi(array, BCP, 3*BytesPerInt);
  2458   __ li(AT, -BytesPerInt);
  2459   __ andr(array, array, AT);
  2461   // initialize i & j
  2462   __ move(i, R0);
  2463   __ lw(j, array, - 1 * BytesPerInt);
  2464   // Convert j into native byteordering
  2465   __ swap(j);
  2467   // and start
  2468   Label entry;
  2469   __ b(entry);
  2470   __ delayed()->nop();
  2472   // binary search loop
  2474     Label loop;
  2475     __ bind(loop);
  2476     // int h = (i + j) >> 1;
  2477     __ dadd(h, i, j);
  2478     __ dsrl(h, h, 1);
  2479     // if (key < array[h].fast_match()) {
  2480     //   j = h;
  2481     // } else {
  2482     //   i = h;
  2483     // }
  2484     // Convert array[h].match to native byte-ordering before compare
  2485     __ dsll(AT, h, Address::times_8);
  2486     __ dadd(AT, array, AT);
  2487     __ lw(temp, AT, 0 * BytesPerInt);
  2488     __ swap(temp);
  2491       Label set_i, end_of_if;
  2492       __ slt(AT, key, temp);
  2493       __ beq(AT, R0, set_i);
  2494       __ delayed()->nop();
  2496       __ b(end_of_if);
  2497       __ delayed(); __ move(j, h);
  2499       __ bind(set_i);
  2500       __ move(i, h);
  2502       __ bind(end_of_if);
  2504     // while (i+1 < j)
  2505     __ bind(entry);
  2506     __ daddi(h, i, 1);
  2507     __ slt(AT, h, j);
  2508     __ bne(AT, R0, loop);
  2509     __ delayed()->nop();
  2512   // end of binary search, result index is i (must check again!)
  2513   Label default_case;
  2514   // Convert array[i].match to native byte-ordering before compare
  2515   __ dsll(AT, i, Address::times_8);
  2516   __ dadd(AT, array, AT);
  2517   __ lw(temp, AT, 0 * BytesPerInt);
  2518   __ swap(temp);
  2519   __ bne(key, temp, default_case);
  2520   __ delayed()->nop();
  2522   // entry found -> j = offset
  2523   __ dsll(AT, i, Address::times_8);
  2524   __ dadd(AT, array, AT);
  2525   __ lw(j, AT, 1 * BytesPerInt);
  2526   __ profile_switch_case(i, key, array);
  2527   __ swap(j);
  2529   __ dadd(BCP, BCP, j);
  2530   __ lbu(Rnext, BCP, 0);
  2531   __ dispatch_only(vtos);
  2533   // default case -> j = default offset
  2534   __ bind(default_case);
  2535   __ profile_switch_default(i);
  2536   __ lw(j, array, - 2 * BytesPerInt);
  2537   __ swap(j);
  2538   __ dadd(BCP, BCP, j);
  2539   __ lbu(Rnext, BCP, 0);
  2540   __ dispatch_only(vtos);
  2543 void TemplateTable::_return(TosState state) {
  2544   transition(state, state);
  2545   assert(_desc->calls_vm(),
  2546       "inconsistent calls_vm information"); // call in remove_activation
  2548   if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
  2549     assert(state == vtos, "only valid state");
  2550     __ ld(T1, aaddress(0));
  2551     __ load_klass(LVP, T1);
  2552     __ lw(LVP, LVP, in_bytes(Klass::access_flags_offset()));
  2553     __ move(AT, JVM_ACC_HAS_FINALIZER);
  2554     __ andr(AT, AT, LVP);//by_css
  2555     Label skip_register_finalizer;
  2556     __ beq(AT, R0, skip_register_finalizer);
  2557     __ delayed()->nop();
  2558     __ call_VM(noreg, CAST_FROM_FN_PTR(address,
  2559     InterpreterRuntime::register_finalizer), T1);
  2560     __ bind(skip_register_finalizer);
  2562   __ remove_activation(state, T9);
  2563   __ sync();
  2565   __ jr(T9);
  2566   __ delayed()->nop();
  2569 // ----------------------------------------------------------------------------
  2570 // Volatile variables demand their effects be made known to all CPU's
  2571 // in order.  Store buffers on most chips allow reads & writes to
  2572 // reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode
  2573 // without some kind of memory barrier (i.e., it's not sufficient that
  2574 // the interpreter does not reorder volatile references, the hardware
  2575 // also must not reorder them).
  2576 //
  2577 // According to the new Java Memory Model (JMM):
  2578 // (1) All volatiles are serialized wrt to each other.  ALSO reads &
  2579 //     writes act as aquire & release, so:
  2580 // (2) A read cannot let unrelated NON-volatile memory refs that
  2581 //     happen after the read float up to before the read.  It's OK for
  2582 //     non-volatile memory refs that happen before the volatile read to
  2583 //     float down below it.
  2584 // (3) Similar a volatile write cannot let unrelated NON-volatile
  2585 //     memory refs that happen BEFORE the write float down to after the
  2586 //     write.  It's OK for non-volatile memory refs that happen after the
  2587 //     volatile write to float up before it.
  2588 //
  2589 // We only put in barriers around volatile refs (they are expensive),
  2590 // not _between_ memory refs (that would require us to track the
  2591 // flavor of the previous memory refs).  Requirements (2) and (3)
  2592 // require some barriers before volatile stores and after volatile
  2593 // loads.  These nearly cover requirement (1) but miss the
  2594 // volatile-store-volatile-load case.  This final case is placed after
  2595 // volatile-stores although it could just as well go before
  2596 // volatile-loads.
  2597 //void TemplateTable::volatile_barrier(Assembler::Membar_mask_bits
  2598 //                                     order_constraint) {
  2599 void TemplateTable::volatile_barrier( ) {
  2600   // Helper function to insert a is-volatile test and memory barrier
  2601   //if (os::is_MP()) { // Not needed on single CPU
  2602   //  __ membar(order_constraint);
  2603   //}
  2604   if( !os::is_MP() ) return;  // Not needed on single CPU
  2605   __ sync();
  2608 // we dont shift left 2 bits in get_cache_and_index_at_bcp
  2609 // for we always need shift the index we use it. the ConstantPoolCacheEntry
  2610 // is 16-byte long, index is the index in
  2611 // ConstantPoolCache, so cache + base_offset() + index * 16 is
  2612 // the corresponding ConstantPoolCacheEntry
  2613 // used registers : T2
  2614 // NOTE : the returned index need also shift left 4 to get the address!
  2615 void TemplateTable::resolve_cache_and_index(int byte_no,
  2616                                             Register Rcache,
  2617                                             Register index,
  2618                                             size_t index_size) {
  2619   assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
  2620   const Register temp = A1;
  2621   assert_different_registers(Rcache, index);
  2623   Label resolved;
  2624   __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
  2625   // is resolved?
  2626   int i = (int)bytecode();
  2627   __ addi(temp, temp, -i);
  2628   __ beq(temp, R0, resolved);
  2629   __ delayed()->nop();
  2630   // resolve first time through
  2631   address entry;
  2632   switch (bytecode()) {
  2633     case Bytecodes::_getstatic      : // fall through
  2634     case Bytecodes::_putstatic      : // fall through
  2635     case Bytecodes::_getfield       : // fall through
  2636     case Bytecodes::_putfield       :
  2637       entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put);
  2638       break;
  2639     case Bytecodes::_invokevirtual  : // fall through
  2640     case Bytecodes::_invokespecial  : // fall through
  2641     case Bytecodes::_invokestatic   : // fall through
  2642     case Bytecodes::_invokeinterface:
  2643       entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);
  2644       break;
  2645     case Bytecodes::_invokehandle:
  2646       entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokehandle);
  2647       break;
  2648     case Bytecodes::_invokedynamic:
  2649       entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic);
  2650       break;
  2651     default                          :
  2652       fatal(err_msg("unexpected bytecode: %s", Bytecodes::name(bytecode())));
  2653       break;
  2656   __ move(temp, i);
  2657   __ call_VM(NOREG, entry, temp);
  2659   // Update registers with resolved info
  2660   __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
  2661   __ bind(resolved);
  2664 // The Rcache and index registers must be set before call
  2665 void TemplateTable::load_field_cp_cache_entry(Register obj,
  2666                                               Register cache,
  2667                                               Register index,
  2668                                               Register off,
  2669                                               Register flags,
  2670                                               bool is_static = false) {
  2671   assert_different_registers(cache, index, flags, off);
  2673   ByteSize cp_base_offset = ConstantPoolCache::base_offset();
  2674   // Field offset
  2675   __ dsll(AT, index, Address::times_ptr);
  2676   __ dadd(AT, cache, AT);
  2677   __ ld(off, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset()));
  2678   // Flags
  2679   __ ld(flags, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset()));
  2681   // klass overwrite register
  2682   if (is_static) {
  2683     __ ld(obj, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset()));
  2684     const int mirror_offset = in_bytes(Klass::java_mirror_offset());
  2685     __ ld(obj, Address(obj, mirror_offset));
  2687     __ verify_oop(obj);
  2691 // get the method, itable_index and flags of the current invoke
  2692 void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
  2693                                                Register method,
  2694                                                Register itable_index,
  2695                                                Register flags,
  2696                                                bool is_invokevirtual,
  2697                                                bool is_invokevfinal, /*unused*/
  2698                                                bool is_invokedynamic) {
  2699   // setup registers
  2700   const Register cache = T3;
  2701   const Register index = T1;
  2702   assert_different_registers(method, flags);
  2703   assert_different_registers(method, cache, index);
  2704   assert_different_registers(itable_index, flags);
  2705   assert_different_registers(itable_index, cache, index);
  2706   assert(is_invokevirtual == (byte_no == f2_byte), "is invokevirtual flag redundant");
  2707   // determine constant pool cache field offsets
  2708   const int method_offset = in_bytes(
  2709     ConstantPoolCache::base_offset() +
  2710       ((byte_no == f2_byte)
  2711        ? ConstantPoolCacheEntry::f2_offset()
  2712        : ConstantPoolCacheEntry::f1_offset()));
  2713   const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
  2714                                     ConstantPoolCacheEntry::flags_offset());
  2715   // access constant pool cache fields
  2716   const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
  2717                                     ConstantPoolCacheEntry::f2_offset());
  2719   size_t index_size = (is_invokedynamic ? sizeof(u4): sizeof(u2));
  2720   resolve_cache_and_index(byte_no, cache, index, index_size);
  2722   //assert(wordSize == 8, "adjust code below");
  2723   // note we shift 4 not 2, for we get is the true inde
  2724   // of ConstantPoolCacheEntry, not the shifted 2-bit index as x86 version
  2725   __ dsll(AT, index, Address::times_ptr);
  2726   __ dadd(AT, cache, AT);
  2727   __ ld(method, AT, method_offset);
  2729   if (itable_index != NOREG) {
  2730     __ ld(itable_index, AT, index_offset);
  2732   __ ld(flags, AT, flags_offset);
  2735 // The registers cache and index expected to be set before call.
  2736 // Correct values of the cache and index registers are preserved.
  2737 void TemplateTable::jvmti_post_field_access(Register cache, Register index,
  2738                                             bool is_static, bool has_tos) {
  2739   // do the JVMTI work here to avoid disturbing the register state below
  2740   // We use c_rarg registers here because we want to use the register used in
  2741   // the call to the VM
  2742   if (JvmtiExport::can_post_field_access()) {
  2743     // Check to see if a field access watch has been set before we
  2744     // take the time to call into the VM.
  2745     Label L1;
  2746     // kill FSR
  2747     Register tmp1 = T2;
  2748     Register tmp2 = T1;
  2749     Register tmp3 = T3;
  2750     assert_different_registers(cache, index, AT);
  2751     __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr());
  2752     __ lw(AT, AT, 0);
  2753     __ beq(AT, R0, L1);
  2754     __ delayed()->nop();
  2756     __ get_cache_and_index_at_bcp(tmp2, tmp3, 1);
  2758     // cache entry pointer
  2759     __ daddi(tmp2, tmp2, in_bytes(ConstantPoolCache::base_offset()));
  2760     __ shl(tmp3, LogBytesPerWord);
  2761     __ dadd(tmp2, tmp2, tmp3);
  2762     if (is_static) {
  2763       __ move(tmp1, R0);
  2764     } else {
  2765       __ ld(tmp1, SP, 0);
  2766       __ verify_oop(tmp1);
  2768     // tmp1: object pointer or NULL
  2769     // tmp2: cache entry pointer
  2770     // tmp3: jvalue object on the stack
  2771     __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
  2772                                        InterpreterRuntime::post_field_access),
  2773                tmp1, tmp2, tmp3);
  2774     __ get_cache_and_index_at_bcp(cache, index, 1);
  2775     __ bind(L1);
  2779 void TemplateTable::pop_and_check_object(Register r) {
  2780   __ pop_ptr(r);
  2781   __ null_check(r);  // for field access must check obj.
  2782   __ verify_oop(r);
  2785 // used registers : T1, T2, T3, T1
  2786 // T1 : flags
  2787 // T2 : off
  2788 // T3 : obj
  2789 // T1 : field address
  2790 // The flags 31, 30, 29, 28 together build a 4 bit number 0 to 8 with the
  2791 // following mapping to the TosState states:
  2792 // btos: 0
  2793 // ctos: 1
  2794 // stos: 2
  2795 // itos: 3
  2796 // ltos: 4
  2797 // ftos: 5
  2798 // dtos: 6
  2799 // atos: 7
  2800 // vtos: 8
  2801 // see ConstantPoolCacheEntry::set_field for more info
  2802 void TemplateTable::getfield_or_static(int byte_no, bool is_static) {
  2803   transition(vtos, vtos);
  2805   const Register cache = T3;
  2806   const Register index = T0;
  2808   const Register obj   = T3;
  2809   const Register off   = T2;
  2810   const Register flags = T1;
  2811   resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
  2812   jvmti_post_field_access(cache, index, is_static, false);
  2813   load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
  2815   if (!is_static) pop_and_check_object(obj);
  2816   __ dadd(index, obj, off);
  2819   Label Done, notByte, notInt, notShort, notChar,
  2820               notLong, notFloat, notObj, notDouble;
  2822   assert(btos == 0, "change code, btos != 0");
  2823   __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
  2824   __ andi(flags, flags, 0xf);
  2825   __ bne(flags, R0, notByte);
  2826   __ delayed()->nop();
  2828   // btos
  2829   __ lb(FSR, index, 0);
  2830   __ sd(FSR, SP, - wordSize);
  2832   // Rewrite bytecode to be faster
  2833   if (!is_static) {
  2834     patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2);
  2836   __ b(Done);
  2837   __ delayed()->daddi(SP, SP, - wordSize);
  2839   __ bind(notByte);
  2840   __ move(AT, itos);
  2841   __ bne(flags, AT, notInt);
  2842   __ delayed()->nop();
  2844   // itos
  2845   __ lw(FSR, index, 0);
  2846   __ sd(FSR, SP, - wordSize);
  2848   // Rewrite bytecode to be faster
  2849   if (!is_static) {
  2850     // patch_bytecode(Bytecodes::_fast_igetfield, T3, T2);
  2851     patch_bytecode(Bytecodes::_fast_igetfield, T3, T2);
  2853   __ b(Done);
  2854   __ delayed()->daddi(SP, SP, - wordSize);
  2856   __ bind(notInt);
  2857   __ move(AT, atos);
  2858   __ bne(flags, AT, notObj);
  2859   __ delayed()->nop();
  2861   // atos
  2862   //add for compressedoops
  2863   __ load_heap_oop(FSR, Address(index, 0));
  2864   __ sd(FSR, SP, - wordSize);
  2866   if (!is_static) {
  2867     //patch_bytecode(Bytecodes::_fast_agetfield, T3, T2);
  2868     patch_bytecode(Bytecodes::_fast_agetfield, T3, T2);
  2870   __ b(Done);
  2871   __ delayed()->daddi(SP, SP, - wordSize);
  2873   __ bind(notObj);
  2874   __ move(AT, ctos);
  2875   __ bne(flags, AT, notChar);
  2876   __ delayed()->nop();
  2878   // ctos
  2879   __ lhu(FSR, index, 0);
  2880   __ sd(FSR, SP, - wordSize);
  2882   if (!is_static) {
  2883     patch_bytecode(Bytecodes::_fast_cgetfield, T3, T2);
  2885   __ b(Done);
  2886   __ delayed()->daddi(SP, SP, - wordSize);
  2888   __ bind(notChar);
  2889   __ move(AT, stos);
  2890   __ bne(flags, AT, notShort);
  2891   __ delayed()->nop();
  2893   // stos
  2894   __ lh(FSR, index, 0);
  2895   __ sd(FSR, SP, - wordSize);
  2897   if (!is_static) {
  2898     patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2);
  2900   __ b(Done);
  2901   __ delayed()->daddi(SP, SP, - wordSize);
  2903   __ bind(notShort);
  2904   __ move(AT, ltos);
  2905   __ bne(flags, AT, notLong);
  2906   __ delayed()->nop();
  2908   // FIXME : the load/store should be atomic, we have no simple method to do this in mips32
  2909   // ltos
  2910   __ ld(FSR, index, 0 * wordSize);
  2911   __ sd(FSR, SP, -2 * wordSize);
  2912   __ sd(R0, SP, -1 * wordSize);
  2914   // Don't rewrite to _fast_lgetfield for potential volatile case.
  2915   __ b(Done);
  2916   __ delayed()->daddi(SP, SP, - 2 * wordSize);
  2918   __ bind(notLong);
  2919   __ move(AT, ftos);
  2920   __ bne(flags, AT, notFloat);
  2921   __ delayed()->nop();
  2923   // ftos
  2924   __ lwc1(FSF, index, 0);
  2925   __ sdc1(FSF, SP, - wordSize);
  2927   if (!is_static) {
  2928     patch_bytecode(Bytecodes::_fast_fgetfield, T3, T2);
  2930   __ b(Done);
  2931   __ delayed()->daddi(SP, SP, - wordSize);
  2933   __ bind(notFloat);
  2934   __ move(AT, dtos);
  2935   __ bne(flags, AT, notDouble);
  2936   __ delayed()->nop();
  2938   // dtos
  2939   __ ldc1(FSF, index, 0 * wordSize);
  2940   __ sdc1(FSF, SP, - 2 * wordSize);
  2941   __ sd(R0, SP, - 1 * wordSize);
  2943   if (!is_static) {
  2944     patch_bytecode(Bytecodes::_fast_dgetfield, T3, T2);
  2946   __ b(Done);
  2947   __ delayed()->daddi(SP, SP, - 2 * wordSize);
  2949   __ bind(notDouble);
  2951   __ stop("Bad state");
  2953   __ bind(Done);
  2957 void TemplateTable::getfield(int byte_no) {
  2958   getfield_or_static(byte_no, false);
  2961 void TemplateTable::getstatic(int byte_no) {
  2962   getfield_or_static(byte_no, true);
  2965 // The registers cache and index expected to be set before call.
  2966 // The function may destroy various registers, just not the cache and index registers.
  2967 void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
  2968   transition(vtos, vtos);
  2970   ByteSize cp_base_offset = ConstantPoolCache::base_offset();
  2972   if (JvmtiExport::can_post_field_modification()) {
  2973     // Check to see if a field modification watch has been set before
  2974     // we take the time to call into the VM.
  2975     Label L1;
  2976     //kill AT, T1, T2, T3, T9
  2977     Register tmp1 = T2;
  2978     Register tmp2 = T1;
  2979     Register tmp3 = T3;
  2980     Register tmp4 = T9;
  2981     assert_different_registers(cache, index, tmp4);
  2983     __ li(AT, JvmtiExport::get_field_modification_count_addr());
  2984     __ lw(AT, AT, 0);
  2985     __ beq(AT, R0, L1);
  2986     __ delayed()->nop();
  2988     __ get_cache_and_index_at_bcp(tmp2, tmp4, 1);
  2990     if (is_static) {
  2991       __ move(tmp1, R0);
  2992     } else {
  2993       // Life is harder. The stack holds the value on top, followed by
  2994       // the object.  We don't know the size of the value, though; it
  2995       // could be one or two words depending on its type. As a result,
  2996       // we must find the type to determine where the object is.
  2997       Label two_word, valsize_known;
  2998       __ dsll(AT, tmp4, Address::times_8);
  2999       __ dadd(AT, tmp2, AT);
  3000       __ ld(tmp3, AT, in_bytes(cp_base_offset +
  3001                                ConstantPoolCacheEntry::flags_offset()));
  3002       __ shr(tmp3, ConstantPoolCacheEntry::tos_state_shift);
  3004       // Make sure we don't need to mask ecx for tos_state_shift
  3005       // after the above shift
  3006       ConstantPoolCacheEntry::verify_tos_state_shift();
  3007       __ move(tmp1, SP);
  3008       __ move(AT, ltos);
  3009       __ beq(tmp3, AT, two_word);
  3010       __ delayed()->nop();
  3011       __ move(AT, dtos);
  3012       __ beq(tmp3, AT, two_word);
  3013       __ delayed()->nop();
  3014       __ b(valsize_known);
  3015       __ delayed()->daddi(tmp1, tmp1, Interpreter::expr_offset_in_bytes(1) );
  3017       __ bind(two_word);
  3018       __ daddi(tmp1, tmp1, Interpreter::expr_offset_in_bytes(2));
  3020       __ bind(valsize_known);
  3021       // setup object pointer
  3022       __ ld(tmp1, tmp1, 0*wordSize);
  3024     // cache entry pointer
  3025     __ daddi(tmp2, tmp2, in_bytes(cp_base_offset));
  3026     __ shl(tmp4, LogBytesPerWord);
  3027     __ daddu(tmp2, tmp2, tmp4);
  3028     // object (tos)
  3029     __ move(tmp3, SP);
  3030     // tmp1: object pointer set up above (NULL if static)
  3031     // tmp2: cache entry pointer
  3032     // tmp3: jvalue object on the stack
  3033     __ call_VM(NOREG,
  3034                CAST_FROM_FN_PTR(address,
  3035                                 InterpreterRuntime::post_field_modification),
  3036                tmp1, tmp2, tmp3);
  3037     __ get_cache_and_index_at_bcp(cache, index, 1);
  3038     __ bind(L1);
  3042 // used registers : T0, T1, T2, T3, T8
  3043 // T1 : flags
  3044 // T2 : off
  3045 // T3 : obj
  3046 // T8 : volatile bit
  3047 // see ConstantPoolCacheEntry::set_field for more info
  3048 void TemplateTable::putfield_or_static(int byte_no, bool is_static) {
  3049   transition(vtos, vtos);
  3051   const Register cache = T3;
  3052   const Register index = T0;
  3053   const Register obj   = T3;
  3054   const Register off   = T2;
  3055   const Register flags = T1;
  3056   const Register bc    = T3;
  3058   resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
  3059   jvmti_post_field_mod(cache, index, is_static);
  3060   load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
  3062   Label notVolatile, Done;
  3063   __ move(AT, 1<<ConstantPoolCacheEntry::is_volatile_shift);
  3064   __ andr(T8, flags, AT);
  3066   Label notByte, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
  3068   assert(btos == 0, "change code, btos != 0");
  3069   // btos
  3070   __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
  3071   __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask);
  3072   __ bne(flags, R0, notByte);
  3073   __ delayed()->nop();
  3075   __ pop(btos);
  3076   if (!is_static) {
  3077     pop_and_check_object(obj);
  3079   __ dadd(AT, obj, off);
  3080   __ sb(FSR, AT, 0);
  3082   if (!is_static) {
  3083     patch_bytecode(Bytecodes::_fast_bputfield, bc, off, true, byte_no);
  3085   __ b(Done);
  3086   __ delayed()->nop();
  3088   __ bind(notByte);
  3089   // itos
  3090   __ move(AT, itos);
  3091   __ bne(flags, AT, notInt);
  3092   __ delayed()->nop();
  3094   __ pop(itos);
  3095   if (!is_static) {
  3096     pop_and_check_object(obj);
  3098   __ dadd(AT, obj, off);
  3099   __ sw(FSR, AT, 0);
  3101   if (!is_static) {
  3102     patch_bytecode(Bytecodes::_fast_iputfield, bc, off, true, byte_no);
  3104   __ b(Done);
  3105   __ delayed()->nop();
  3106   __ bind(notInt);
  3107   // atos
  3108   __ move(AT, atos);
  3109   __ bne(flags, AT, notObj);
  3110   __ delayed()->nop();
  3112   __ pop(atos);
  3113   if (!is_static) {
  3114     pop_and_check_object(obj);
  3117   __ dadd(AT, obj, off);
  3118   __ store_heap_oop(Address(AT, 0), FSR);
  3119   __ store_check(obj);
  3121   if (!is_static) {
  3122     patch_bytecode(Bytecodes::_fast_aputfield, bc, off, true, byte_no);
  3124   __ b(Done);
  3125   __ delayed()->nop();
  3126   __ bind(notObj);
  3127   // ctos
  3128   __ move(AT, ctos);
  3129   __ bne(flags, AT, notChar);
  3130   __ delayed()->nop();
  3132   __ pop(ctos);
  3133   if (!is_static) {
  3134     pop_and_check_object(obj);
  3136   __ dadd(AT, obj, off);
  3137   __ sh(FSR, AT, 0);
  3138   if (!is_static) {
  3139     patch_bytecode(Bytecodes::_fast_cputfield, bc, off, true, byte_no);
  3141   __ b(Done);
  3142   __ delayed()->nop();
  3143   __ bind(notChar);
  3144   // stos
  3145   __ move(AT, stos);
  3146   __ bne(flags, AT, notShort);
  3147   __ delayed()->nop();
  3149   __ pop(stos);
  3150   if (!is_static) {
  3151     pop_and_check_object(obj);
  3153   __ dadd(AT, obj, off);
  3154   __ sh(FSR, AT, 0);
  3155   if (!is_static) {
  3156     patch_bytecode(Bytecodes::_fast_sputfield, bc, off, true, byte_no);
  3158   __ b(Done);
  3159   __ delayed()->nop();
  3160   __ bind(notShort);
  3161   // ltos
  3162   __ move(AT, ltos);
  3163   __ bne(flags, AT, notLong);
  3164   __ delayed()->nop();
  3166   // FIXME: there is no simple method to load/store 64-bit data in a atomic operation
  3167   // we just ignore the volatile flag.
  3168   //Label notVolatileLong;
  3169   //__ beq(T1, R0, notVolatileLong);
  3170   //__ delayed()->nop();
  3172   //addent = 2 * wordSize;
  3173   // no need
  3174   //__ lw(FSR, SP, 0);
  3175   //__ lw(SSR, SP, 1 * wordSize);
  3176   //if (!is_static) {
  3177   //  __ lw(T3, SP, addent);
  3178   //  addent += 1 * wordSize;
  3179   //  __ verify_oop(T3);
  3180   //}
  3182   //__ daddu(AT, T3, T2);
  3184   // Replace with real volatile test
  3185   // NOTE : we assume that sdc1&ldc1 operate in 32-bit, this is true for Godson2 even in 64-bit kernel
  3186   // last modified by yjl 7/12/2005
  3187   //__ ldc1(FSF, SP, 0);
  3188   //__ sdc1(FSF, AT, 0);
  3189   //volatile_barrier();
  3191   // Don't rewrite volatile version
  3192   //__ b(notVolatile);
  3193   //__ delayed()->addiu(SP, SP, addent);
  3195   //__ bind(notVolatileLong);
  3197   //__ pop(ltos);  // overwrites edx
  3198   //  __ lw(FSR, SP, 0 * wordSize);
  3199   //  __ lw(SSR, SP, 1 * wordSize);
  3200   //  __ daddi(SP, SP, 2*wordSize);
  3201   __ pop(ltos);
  3202   if (!is_static) {
  3203     pop_and_check_object(obj);
  3205   __ dadd(AT, obj, off);
  3206   __ sd(FSR, AT, 0);
  3207   if (!is_static) {
  3208     patch_bytecode(Bytecodes::_fast_lputfield, bc, off, true, byte_no);
  3210   __ b(notVolatile);
  3211   __ delayed()->nop();
  3213   __ bind(notLong);
  3214   // ftos
  3215   __ move(AT, ftos);
  3216   __ bne(flags, AT, notFloat);
  3217   __ delayed()->nop();
  3219   __ pop(ftos);
  3220   if (!is_static) {
  3221     pop_and_check_object(obj);
  3223   __ dadd(AT, obj, off);
  3224   __ swc1(FSF, AT, 0);
  3225   if (!is_static) {
  3226     patch_bytecode(Bytecodes::_fast_fputfield, bc, off, true, byte_no);
  3228   __ b(Done);
  3229   __ delayed()->nop();
  3230   __ bind(notFloat);
  3231   // dtos
  3232   __ move(AT, dtos);
  3233   __ bne(flags, AT, notDouble);
  3234   __ delayed()->nop();
  3236   __ pop(dtos);
  3237   if (!is_static) {
  3238     pop_and_check_object(obj);
  3240   __ dadd(AT, obj, off);
  3241   __ sdc1(FSF, AT, 0);
  3242   if (!is_static) {
  3243     patch_bytecode(Bytecodes::_fast_dputfield, bc, off, true, byte_no);
  3246 #ifdef ASSERT
  3247   __ b(Done);
  3248   __ delayed()->nop();
  3250   __ bind(notDouble);
  3251   __ stop("Bad state");
  3252 #endif
  3254   __ bind(Done);
  3256   // Check for volatile store
  3257   __ beq(T8, R0, notVolatile);
  3258   __ delayed()->nop();
  3259   volatile_barrier( );
  3260   __ bind(notVolatile);
  3263 void TemplateTable::putfield(int byte_no) {
  3264   putfield_or_static(byte_no, false);
  3267 void TemplateTable::putstatic(int byte_no) {
  3268   putfield_or_static(byte_no, true);
  3271 // used registers : T1, T2, T3
  3272 // T1 : cp_entry
  3273 // T2 : obj
  3274 // T3 : value pointer
  3275 void TemplateTable::jvmti_post_fast_field_mod() {
  3276   if (JvmtiExport::can_post_field_modification()) {
  3277     // Check to see if a field modification watch has been set before
  3278     // we take the time to call into the VM.
  3279     Label L2;
  3280     //kill AT, T1, T2, T3, T9
  3281     Register tmp1 = T2;
  3282     Register tmp2 = T1;
  3283     Register tmp3 = T3;
  3284     Register tmp4 = T9;
  3285     __ li(AT, JvmtiExport::get_field_modification_count_addr());
  3286     __ lw(tmp3, AT, 0);
  3287     __ beq(tmp3, R0, L2);
  3288     __ delayed()->nop();
  3289     __ pop_ptr(tmp1);
  3290     __ verify_oop(tmp1);
  3291     __ push_ptr(tmp1);
  3292     switch (bytecode()) {          // load values into the jvalue object
  3293     case Bytecodes::_fast_aputfield: __ push_ptr(FSR); break;
  3294     case Bytecodes::_fast_bputfield: // fall through
  3295     case Bytecodes::_fast_sputfield: // fall through
  3296     case Bytecodes::_fast_cputfield: // fall through
  3297     case Bytecodes::_fast_iputfield: __ push_i(FSR); break;
  3298     case Bytecodes::_fast_dputfield: __ push_d(FSF); break;
  3299     case Bytecodes::_fast_fputfield: __ push_f(); break;
  3300     case Bytecodes::_fast_lputfield: __ push_l(FSR); break;
  3301       default:  ShouldNotReachHere();
  3303     __ move(tmp3, SP);
  3304     // access constant pool cache entry
  3305     __ get_cache_entry_pointer_at_bcp(tmp2, FSR, 1);
  3306     __ verify_oop(tmp1);
  3307     // tmp1: object pointer copied above
  3308     // tmp2: cache entry pointer
  3309     // tmp3: jvalue object on the stack
  3310     __ call_VM(NOREG,
  3311                CAST_FROM_FN_PTR(address,
  3312                                 InterpreterRuntime::post_field_modification),
  3313                tmp1, tmp2, tmp3);
  3315     switch (bytecode()) {             // restore tos values
  3316     case Bytecodes::_fast_aputfield: __ pop_ptr(FSR); break;
  3317     case Bytecodes::_fast_bputfield: // fall through
  3318     case Bytecodes::_fast_sputfield: // fall through
  3319     case Bytecodes::_fast_cputfield: // fall through
  3320     case Bytecodes::_fast_iputfield: __ pop_i(FSR); break;
  3321     case Bytecodes::_fast_dputfield: __ pop_d(); break;
  3322     case Bytecodes::_fast_fputfield: __ pop_f(); break;
  3323     case Bytecodes::_fast_lputfield: __ pop_l(FSR); break;
  3325     __ bind(L2);
  3329 // used registers : T2, T3, T1
  3330 // T2 : index & off & field address
  3331 // T3 : cache & obj
  3332 // T1 : flags
  3333 void TemplateTable::fast_storefield(TosState state) {
  3334   transition(state, vtos);
  3336   ByteSize base = ConstantPoolCache::base_offset();
  3338   jvmti_post_fast_field_mod();
  3340   // access constant pool cache
  3341   __ get_cache_and_index_at_bcp(T3, T2, 1);
  3343   // test for volatile with edx but edx is tos register for lputfield.
  3344   __ dsll(AT, T2, Address::times_8);
  3345   __ dadd(AT, T3, AT);
  3346   __ ld(T1, AT, in_bytes(base + ConstantPoolCacheEntry::flags_offset()));
  3348   // replace index with field offset from cache entry
  3349   __ ld(T2, AT, in_bytes(base + ConstantPoolCacheEntry::f2_offset()));
  3351   // Doug Lea believes this is not needed with current Sparcs (TSO) and Intel (PSO).
  3352   // volatile_barrier( );
  3354   Label notVolatile, Done;
  3355   // Check for volatile store
  3356   __ move(AT, 1<<ConstantPoolCacheEntry::is_volatile_shift);
  3357   __ andr(AT, T1, AT);
  3358   __ beq(AT, R0, notVolatile);
  3359   __ delayed()->nop();
  3362   // Get object from stack
  3363   pop_and_check_object(T3);
  3365   // field address
  3366   __ dadd(T2, T3, T2);
  3368   // access field
  3369   switch (bytecode()) {
  3370     case Bytecodes::_fast_bputfield:
  3371       __ sb(FSR, T2, 0);
  3372       break;
  3373     case Bytecodes::_fast_sputfield: // fall through
  3374     case Bytecodes::_fast_cputfield:
  3375       __ sh(FSR, T2, 0);
  3376       break;
  3377     case Bytecodes::_fast_iputfield:
  3378       __ sw(FSR, T2, 0);
  3379       break;
  3380     case Bytecodes::_fast_lputfield:
  3381       __ sd(FSR, T2, 0 * wordSize);
  3382       break;
  3383     case Bytecodes::_fast_fputfield:
  3384       __ swc1(FSF, T2, 0);
  3385       break;
  3386     case Bytecodes::_fast_dputfield:
  3387       __ sdc1(FSF, T2, 0 * wordSize);
  3388       break;
  3389     case Bytecodes::_fast_aputfield:
  3390       __ store_heap_oop(Address(T2, 0), FSR);
  3391       __ store_check(T3);
  3392       break;
  3393     default:
  3394       ShouldNotReachHere();
  3397   Label done;
  3398   volatile_barrier( );
  3399   __ b(done);
  3400   __ delayed()->nop();
  3402   // Same code as above, but don't need edx to test for volatile.
  3403   __ bind(notVolatile);
  3404   pop_and_check_object(T3);
  3405   //get the field address
  3406   __ dadd(T2, T3, T2);
  3408   // access field
  3409   switch (bytecode()) {
  3410     case Bytecodes::_fast_bputfield:
  3411       __ sb(FSR, T2, 0);
  3412       break;
  3413     case Bytecodes::_fast_sputfield: // fall through
  3414     case Bytecodes::_fast_cputfield:
  3415       __ sh(FSR, T2, 0);
  3416       break;
  3417     case Bytecodes::_fast_iputfield:
  3418       __ sw(FSR, T2, 0);
  3419       break;
  3420     case Bytecodes::_fast_lputfield:
  3421       __ sd(FSR, T2, 0 * wordSize);
  3422       break;
  3423     case Bytecodes::_fast_fputfield:
  3424       __ swc1(FSF, T2, 0);
  3425       break;
  3426     case Bytecodes::_fast_dputfield:
  3427       __ sdc1(FSF, T2, 0 * wordSize);
  3428       break;
  3429     case Bytecodes::_fast_aputfield:
  3430       //add for compressedoops
  3431       __ store_heap_oop(Address(T2, 0), FSR);
  3432       __ store_check(T3);
  3433       break;
  3434     default:
  3435       ShouldNotReachHere();
  3437   __ bind(done);
  3440 // used registers : T2, T3, T1
  3441 // T3 : cp_entry & cache
  3442 // T2 : index & offset
  3443 void TemplateTable::fast_accessfield(TosState state) {
  3444   transition(atos, state);
  3446   // do the JVMTI work here to avoid disturbing the register state below
  3447   if (JvmtiExport::can_post_field_access()) {
  3448     // Check to see if a field access watch has been set before we take
  3449     // the time to call into the VM.
  3450     Label L1;
  3451     __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr());
  3452     __ lw(T3, AT, 0);
  3453     __ beq(T3, R0, L1);
  3454     __ delayed()->nop();
  3455     // access constant pool cache entry
  3456     __ get_cache_entry_pointer_at_bcp(T3, T1, 1);
  3457     __ move(TSR, FSR);
  3458     __ verify_oop(FSR);
  3459     // FSR: object pointer copied above
  3460     // T3: cache entry pointer
  3461     __ call_VM(NOREG,
  3462                CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access),
  3463                FSR, T3);
  3464     __ move(FSR, TSR);
  3465     __ bind(L1);
  3468   // access constant pool cache
  3469   __ get_cache_and_index_at_bcp(T3, T2, 1);
  3470   // replace index with field offset from cache entry
  3471   __ dsll(AT, T2, Address::times_8);
  3472   __ dadd(AT, T3, AT);
  3473   __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset()
  3474                          + ConstantPoolCacheEntry::f2_offset()));
  3476   // eax: object
  3477   __ verify_oop(FSR);
  3478   __ null_check(FSR);
  3479   // field addresses
  3480   __ dadd(FSR, FSR, T2);
  3482   // access field
  3483   switch (bytecode()) {
  3484     case Bytecodes::_fast_bgetfield:
  3485       __ lb(FSR, FSR, 0);
  3486       break;
  3487     case Bytecodes::_fast_sgetfield:
  3488       __ lh(FSR, FSR, 0);
  3489       break;
  3490     case Bytecodes::_fast_cgetfield:
  3491       __ lhu(FSR, FSR, 0);
  3492       break;
  3493     case Bytecodes::_fast_igetfield:
  3494       __ lw(FSR, FSR, 0);
  3495       break;
  3496     case Bytecodes::_fast_lgetfield:
  3497       __ stop("should not be rewritten");
  3498       break;
  3499     case Bytecodes::_fast_fgetfield:
  3500       __ lwc1(FSF, FSR, 0);
  3501       break;
  3502     case Bytecodes::_fast_dgetfield:
  3503       __ ldc1(FSF, FSR, 0);
  3504       break;
  3505     case Bytecodes::_fast_agetfield:
  3506       //add for compressedoops
  3507       __ load_heap_oop(FSR, Address(FSR, 0));
  3508       __ verify_oop(FSR);
  3509       break;
  3510     default:
  3511       ShouldNotReachHere();
  3514   // Doug Lea believes this is not needed with current Sparcs(TSO) and Intel(PSO)
  3515   // volatile_barrier( );
  3518 // generator for _fast_iaccess_0, _fast_aaccess_0, _fast_faccess_0
  3519 // used registers : T1, T2, T3, T1
  3520 // T1 : obj & field address
  3521 // T2 : off
  3522 // T3 : cache
  3523 // T1 : index
  3524 void TemplateTable::fast_xaccess(TosState state) {
  3525   transition(vtos, state);
  3527   // get receiver
  3528   __ ld(T1, aaddress(0));
  3529   // access constant pool cache
  3530   __ get_cache_and_index_at_bcp(T3, T2, 2);
  3531   __ dsll(AT, T2, Address::times_8);
  3532   __ dadd(AT, T3, AT);
  3533   __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
  3535   // make sure exception is reported in correct bcp range (getfield is
  3536   // next instruction)
  3537   __ daddi(BCP, BCP, 1);
  3538   __ null_check(T1);
  3539   __ dadd(T1, T1, T2);
  3541   if (state == itos) {
  3542     __ lw(FSR, T1, 0);
  3543   } else if (state == atos) {
  3544     __ load_heap_oop(FSR, Address(T1, 0));
  3545     __ verify_oop(FSR);
  3546   } else if (state == ftos) {
  3547     __ lwc1(FSF, T1, 0);
  3548   } else {
  3549     ShouldNotReachHere();
  3551   __ daddi(BCP, BCP, -1);
  3556 //-----------------------------------------------------------------------------
  3557 // Calls
  3559 void TemplateTable::count_calls(Register method, Register temp) {
  3560   // implemented elsewhere
  3561   ShouldNotReachHere();
  3564 // method, index, recv, flags: T1, T2, T3, T1
  3565 // byte_no = 2 for _invokevirtual, 1 else
  3566 // T0 : return address
  3567 // get the method & index of the invoke, and push the return address of
  3568 // the invoke(first word in the frame)
  3569 // this address is where the return code jmp to.
  3570 // NOTE : this method will set T3&T1 as recv&flags
  3571 void TemplateTable::prepare_invoke(int byte_no,
  3572                                    Register method,  // linked method (or i-klass)
  3573                                    Register index,   // itable index, MethodType, etc.
  3574                                    Register recv,    // if caller wants to see it
  3575                                    Register flags    // if caller wants to test it
  3576                                    ) {
  3577   // determine flags
  3578   const Bytecodes::Code code = bytecode();
  3579   const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
  3580   const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
  3581   const bool is_invokehandle     = code == Bytecodes::_invokehandle;
  3582   const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
  3583   const bool is_invokespecial    = code == Bytecodes::_invokespecial;
  3584   const bool load_receiver       = (recv  != noreg);
  3585   const bool save_flags          = (flags != noreg);
  3586   assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic),"");
  3587   assert(save_flags    == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
  3588   assert(flags == noreg || flags == T1, "error flags reg.");
  3589   assert(recv  == noreg || recv  == T3, "error recv reg.");
  3591   // setup registers & access constant pool cache
  3592   if(recv == noreg) recv  = T3;
  3593   if(flags == noreg) flags  = T1;
  3594   assert_different_registers(method, index, recv, flags);
  3596   // save 'interpreter return address'
  3597   __ save_bcp();
  3599   load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
  3601   if (is_invokedynamic || is_invokehandle) {
  3602    Label L_no_push;
  3603      __ move(AT, (1 << ConstantPoolCacheEntry::has_appendix_shift));
  3604      __ andr(AT, AT, flags);
  3605      __ beq(AT, R0, L_no_push);
  3606      __ delayed()->nop();
  3607      // Push the appendix as a trailing parameter.
  3608      // This must be done before we get the receiver,
  3609      // since the parameter_size includes it.
  3610      Register tmp = SSR;
  3611      __ push(tmp);
  3612      __ move(tmp, index);
  3613      assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0");
  3614      __ load_resolved_reference_at_index(index, tmp);
  3615      __ pop(tmp);
  3616      __ push(index);  // push appendix (MethodType, CallSite, etc.)
  3617      __ bind(L_no_push);
  3620   // load receiver if needed (after appendix is pushed so parameter size is correct)
  3621   // Note: no return address pushed yet
  3622   if (load_receiver) {
  3623     __ move(AT, ConstantPoolCacheEntry::parameter_size_mask);
  3624     __ andr(recv, flags, AT);
  3625     // 2014/07/31 Fu: Since we won't push RA on stack, no_return_pc_pushed_yet should be 0.
  3626     const int no_return_pc_pushed_yet = 0;  // argument slot correction before we push return address
  3627     const int receiver_is_at_end      = -1;  // back off one slot to get receiver
  3628     Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end);
  3629     __ ld(recv, recv_addr);
  3630     __ verify_oop(recv);
  3632   if(save_flags) {
  3633     __ move(BCP, flags);
  3636   // compute return type
  3637   __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
  3638   __ andi(flags, flags, 0xf);
  3640   // Make sure we don't need to mask flags for tos_state_shift after the above shift
  3641   ConstantPoolCacheEntry::verify_tos_state_shift();
  3642   // load return address
  3644     const address table = (address) Interpreter::invoke_return_entry_table_for(code);
  3645     __ li(AT, (long)table);
  3646     __ dsll(flags, flags, LogBytesPerWord);
  3647     __ dadd(AT, AT, flags);
  3648     __ ld(RA, AT, 0);
  3651   if (save_flags) {
  3652     __ move(flags, BCP);
  3653     __ restore_bcp();
  3657 // used registers : T0, T3, T1, T2
  3658 // T3 : recv, this two register using convention is by prepare_invoke
  3659 // T1 : flags, klass
  3660 // Rmethod : method, index must be Rmethod
  3661 void TemplateTable::invokevirtual_helper(Register index,
  3662                                          Register recv,
  3663                                          Register flags) {
  3665   assert_different_registers(index, recv, flags, T2);
  3667   // Test for an invoke of a final method
  3668   Label notFinal;
  3669   __ move(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
  3670   __ andr(AT, flags, AT);
  3671   __ beq(AT, R0, notFinal);
  3672   __ delayed()->nop();
  3674   Register method = index;  // method must be Rmethod
  3675   assert(method == Rmethod, "methodOop must be Rmethod for interpreter calling convention");
  3677   // do the call - the index is actually the method to call
  3678   // the index is indeed methodOop, for this is vfinal,
  3679   // see ConstantPoolCacheEntry::set_method for more info
  3681   __ verify_oop(method);
  3683   // It's final, need a null check here!
  3684   __ null_check(recv);
  3686   // profile this call
  3687   __ profile_final_call(T2);
  3689   // 2014/11/24 Fu
  3690   // T2: tmp, used for mdp
  3691   // method: callee
  3692   // T9: tmp
  3693   // is_virtual: true
  3694   __ profile_arguments_type(T2, method, T9, true);
  3696   __ jump_from_interpreted(method, T2);
  3698   __ bind(notFinal);
  3700   // get receiver klass
  3701   __ null_check(recv, oopDesc::klass_offset_in_bytes());
  3702   __ load_klass(T2, recv);
  3703   __ verify_oop(T2);
  3705   // profile this call
  3706   __ profile_virtual_call(T2, T0, T1);
  3708   // get target methodOop & entry point
  3709   const int base = InstanceKlass::vtable_start_offset() * wordSize;
  3710   assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
  3711   __ dsll(AT, index, Address::times_ptr);
  3712   // T2: receiver
  3713   __ dadd(AT, T2, AT);
  3714   //this is a ualign read
  3715   __ ld(method, AT, base + vtableEntry::method_offset_in_bytes());
  3716   __ profile_arguments_type(T2, method, T9, true);
  3717   __ jump_from_interpreted(method, T2);
  3721 void TemplateTable::invokevirtual(int byte_no) {
  3722   transition(vtos, vtos);
  3723   assert(byte_no == f2_byte, "use this argument");
  3724   prepare_invoke(byte_no, Rmethod, NOREG, T3, T1);
  3725   // now recv & flags in T3, T1
  3726   invokevirtual_helper(Rmethod, T3, T1);
  3729 // T9 : entry
  3730 // Rmethod : method
  3731 void TemplateTable::invokespecial(int byte_no) {
  3732   transition(vtos, vtos);
  3733   assert(byte_no == f1_byte, "use this argument");
  3734   prepare_invoke(byte_no, Rmethod, NOREG, T3);
  3735   // now recv & flags in T3, T1
  3736   __ verify_oop(T3);
  3737   __ null_check(T3);
  3738   __ profile_call(T9);
  3740   // 2014/11/24 Fu
  3741   // T8: tmp, used for mdp
  3742   // Rmethod: callee
  3743   // T9: tmp
  3744   // is_virtual: false
  3745   __ profile_arguments_type(T8, Rmethod, T9, false);
  3747   __ jump_from_interpreted(Rmethod, T9);
  3748   __ move(T0, T3);//aoqi ?
  3751 void TemplateTable::invokestatic(int byte_no) {
  3752   transition(vtos, vtos);
  3753   assert(byte_no == f1_byte, "use this argument");
  3754   prepare_invoke(byte_no, Rmethod, NOREG);
  3755   __ verify_oop(Rmethod);
  3757   __ profile_call(T9);
  3759   // 2014/11/24 Fu
  3760   // T8: tmp, used for mdp
  3761   // Rmethod: callee
  3762   // T9: tmp
  3763   // is_virtual: false
  3764   __ profile_arguments_type(T8, Rmethod, T9, false);
  3766   __ jump_from_interpreted(Rmethod, T9);
  3769 // i have no idea what to do here, now. for future change. FIXME.
  3770 void TemplateTable::fast_invokevfinal(int byte_no) {
  3771   transition(vtos, vtos);
  3772   assert(byte_no == f2_byte, "use this argument");
  3773   __ stop("fast_invokevfinal not used on mips64");
  3776 // used registers : T0, T1, T2, T3, T1, A7
  3777 // T0 : itable, vtable, entry
  3778 // T1 : interface
  3779 // T3 : receiver
  3780 // T1 : flags, klass
  3781 // Rmethod : index, method, this is required by interpreter_entry
  3782 void TemplateTable::invokeinterface(int byte_no) {
  3783   transition(vtos, vtos);
  3784   //this method will use T1-T4 and T0
  3785   assert(byte_no == f1_byte, "use this argument");
  3786   prepare_invoke(byte_no, T2, Rmethod, T3, T1);
  3787   // T2: Interface
  3788   // Rmethod: index
  3789   // T3: receiver
  3790   // T1: flags
  3792   // Special case of invokeinterface called for virtual method of
  3793   // java.lang.Object.  See cpCacheOop.cpp for details.
  3794   // This code isn't produced by javac, but could be produced by
  3795   // another compliant java compiler.
  3796   Label notMethod;
  3797   __ move(AT, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift));
  3798   __ andr(AT, T1, AT);
  3799   __ beq(AT, R0, notMethod);
  3800   __ delayed()->nop();
  3802   invokevirtual_helper(Rmethod, T3, T1);
  3803   __ bind(notMethod);
  3804   // Get receiver klass into T1 - also a null check
  3805   //add for compressedoops
  3806   __ load_klass(T1, T3);
  3807   __ verify_oop(T1);
  3809   // profile this call
  3810   __ profile_virtual_call(T1, T0, FSR);
  3812   // Compute start of first itableOffsetEntry (which is at the end of the vtable)
  3813   // TODO: x86 add a new method lookup_interface_method  // LEE
  3814   const int base = InstanceKlass::vtable_start_offset() * wordSize;
  3815   assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below");
  3816   __ lw(AT, T1, InstanceKlass::vtable_length_offset() * wordSize);
  3817   __ dsll(AT, AT, Address::times_8);
  3818   __ dadd(T0, T1, AT);
  3819   __ daddi(T0, T0, base);
  3820   if (HeapWordsPerLong > 1) {
  3821     // Round up to align_object_offset boundary
  3822     __ round_to(T0, BytesPerLong);
  3824   // now T0 is the begin of the itable
  3826   Label entry, search, interface_ok;
  3828   ///__ jmp(entry);
  3829   __ b(entry);
  3830   __ delayed()->nop();
  3832   __ bind(search);
  3833   __ increment(T0, itableOffsetEntry::size() * wordSize);
  3835   __ bind(entry);
  3837   // Check that the entry is non-null.  A null entry means that the receiver
  3838   // class doesn't implement the interface, and wasn't the same as the
  3839   // receiver class checked when the interface was resolved.
  3840   __ ld(AT, T0, itableOffsetEntry::interface_offset_in_bytes());
  3841   __ bne(AT, R0, interface_ok);
  3842   __ delayed()->nop();
  3843   // throw exception
  3844   // the call_VM checks for exception, so we should never return here.
  3846   //__ pop();//FIXME here,
  3847   // pop return address (pushed by prepare_invoke).
  3848   // no need now, we just save the value in RA now
  3850   __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeError));
  3851   __ should_not_reach_here();
  3853   __ bind(interface_ok);
  3854   //NOTICE here, no pop as x86 do
  3855   __ bne(AT, T2, search);
  3856   __ delayed()->nop();
  3858   // now we get vtable of the interface
  3859   __ ld(T0, T0, itableOffsetEntry::offset_offset_in_bytes());
  3860   __ daddu(T0, T1, T0);
  3861   assert(itableMethodEntry::size() * wordSize == 8, "adjust the scaling in the code below");
  3862   __ dsll(AT, Rmethod, Address::times_8);
  3863   __ daddu(AT, T0, AT);
  3864   // now we get the method
  3865   __ ld(Rmethod, AT, 0);
  3866   // Rnext: methodOop to call
  3867   // T3: receiver
  3868   // Check for abstract method error
  3869   // Note: This should be done more efficiently via a throw_abstract_method_error
  3870   //       interpreter entry point and a conditional jump to it in case of a null
  3871   //       method.
  3873     Label L;
  3874     __ bne(Rmethod, R0, L);
  3875     __ delayed()->nop();
  3877     // throw exception
  3878     // note: must restore interpreter registers to canonical
  3879     //       state for exception handling to work correctly!
  3880     ///__ popl(ebx);          // pop return address (pushed by prepare_invoke)
  3881     //__ restore_bcp();      // esi must be correct for exception handler
  3882     //(was destroyed)
  3883     //__ restore_locals();   // make sure locals pointer
  3884     //is correct as well (was destroyed)
  3885     ///__ call_VM(noreg, CAST_FROM_FN_PTR(address,
  3886     //InterpreterRuntime::throw_AbstractMethodError));
  3887     __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
  3888     // the call_VM checks for exception, so we should never return here.
  3889     __ should_not_reach_here();
  3890     __ bind(L);
  3893   // 2014/11/24 Fu
  3894   // T8: tmp, used for mdp
  3895   // Rmethod: callee
  3896   // T9: tmp
  3897   // is_virtual: true
  3898   __ profile_arguments_type(T8, Rmethod, T9, true);
  3900   __ jump_from_interpreted(Rmethod, T9);
  3904 void TemplateTable::invokehandle(int byte_no) {
  3905   transition(vtos, vtos);
  3906   assert(byte_no == f1_byte, "use this argument");
  3907   const Register T2_method = Rmethod;
  3908   const Register FSR_mtype  = FSR;
  3909   const Register T3_recv   = T3;
  3911   if (!EnableInvokeDynamic) {
  3912      // rewriter does not generate this bytecode
  3913      __ should_not_reach_here();
  3914      return;
  3917    prepare_invoke(byte_no, T2_method, FSR_mtype, T3_recv);
  3918    //??__ verify_method_ptr(T2_method);
  3919    __ verify_oop(T3_recv);
  3920    __ null_check(T3_recv);
  3922    // rax: MethodType object (from cpool->resolved_references[f1], if necessary)
  3923    // rbx: MH.invokeExact_MT method (from f2)
  3925    // Note:  rax_mtype is already pushed (if necessary) by prepare_invoke
  3927    // FIXME: profile the LambdaForm also
  3928    __ profile_final_call(T9);
  3930    // 2014/11/24 Fu
  3931    // T8: tmp, used for mdp
  3932    // T2_method: callee
  3933    // T9: tmp
  3934    // is_virtual: true
  3935    __ profile_arguments_type(T8, T2_method, T9, true);
  3937   __ jump_from_interpreted(T2_method, T9);
  3940  void TemplateTable::invokedynamic(int byte_no) {
  3941    transition(vtos, vtos);
  3942    assert(byte_no == f1_byte, "use this argument");
  3944    if (!EnableInvokeDynamic) {
  3945      // We should not encounter this bytecode if !EnableInvokeDynamic.
  3946      // The verifier will stop it.  However, if we get past the verifier,
  3947      // this will stop the thread in a reasonable way, without crashing the JVM.
  3948      __ call_VM(noreg, CAST_FROM_FN_PTR(address,
  3949                       InterpreterRuntime::throw_IncompatibleClassChangeError));
  3950      // the call_VM checks for exception, so we should never return here.
  3951      __ should_not_reach_here();
  3952      return;
  3955    //const Register Rmethod   = T2;
  3956    const Register T2_callsite = T2;
  3958    prepare_invoke(byte_no, Rmethod, T2_callsite);
  3960    // rax: CallSite object (from cpool->resolved_references[f1])
  3961    // rbx: MH.linkToCallSite method (from f2)
  3963    // Note:  rax_callsite is already pushed by prepare_invoke
  3964    // %%% should make a type profile for any invokedynamic that takes a ref argument
  3965    // profile this call
  3966    __ profile_call(T9);
  3968    // 2014/11/24 Fu
  3969    // T8: tmp, used for mdp
  3970    // Rmethod: callee
  3971    // T9: tmp
  3972    // is_virtual: false
  3973    __ profile_arguments_type(T8, Rmethod, T9, false);
  3975    __ verify_oop(T2_callsite);
  3977    __ jump_from_interpreted(Rmethod, T9);
  3980 //-----------------------------------------------------------------------------
  3981 // Allocation
  3982 // T1 : tags & buffer end & thread
  3983 // T2 : object end
  3984 // T3 : klass
  3985 // T1 : object size
  3986 // A1 : cpool
  3987 // A2 : cp index
  3988 // return object in FSR
  3989 void TemplateTable::_new() {
  3990   transition(vtos, atos);
  3991   __ get_unsigned_2_byte_index_at_bcp(A2, 1);
  3993   Label slow_case;
  3994   Label done;
  3995   Label initialize_header;
  3996   Label initialize_object; // including clearing the fields
  3997   Label allocate_shared;
  3999   // get InstanceKlass in T3
  4000   __ get_cpool_and_tags(A1, T1);
  4002   __ dsll(AT, A2, Address::times_8);
  4003   if (UseLoongsonISA && Assembler::is_simm(sizeof(ConstantPool), 8)) {
  4004     __ gsldx(T3, A1, AT, sizeof(ConstantPool));
  4005   } else {
  4006     __ dadd(AT, A1, AT);
  4007     __ ld(T3, AT, sizeof(ConstantPool));
  4010   // make sure the class we're about to instantiate has been resolved.
  4011   // Note: slow_case does a pop of stack, which is why we loaded class/pushed above
  4012   const int tags_offset = Array<u1>::base_offset_in_bytes();
  4013   if (UseLoongsonISA && Assembler::is_simm(tags_offset, 8)) {
  4014     __ gslbx(AT, T1, A2, tags_offset);
  4015   } else {
  4016     __ dadd(T1, T1, A2);
  4017     __ lb(AT, T1, tags_offset);
  4019   __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
  4020   __ bne(AT, R0, slow_case);
  4021   //__ delayed()->nop();
  4024   // make sure klass is initialized & doesn't have finalizer
  4025   // make sure klass is fully initialized
  4026   __ lhu(T1, T3, in_bytes(InstanceKlass::init_state_offset()));
  4027   __ daddiu(AT, T1, - (int)InstanceKlass::fully_initialized);
  4028   __ bne(AT, R0, slow_case);
  4029   //__ delayed()->nop();
  4031   // has_finalizer
  4032   __ lw(T0, T3, in_bytes(Klass::layout_helper_offset()) );
  4033   __ andi(AT, T0, Klass::_lh_instance_slow_path_bit);
  4034   __ bne(AT, R0, slow_case);
  4035   //__ delayed()->nop();
  4037   // Allocate the instance
  4038   // 1) Try to allocate in the TLAB
  4039   // 2) if fail and the object is large allocate in the shared Eden
  4040   // 3) if the above fails (or is not applicable), go to a slow case
  4041   // (creates a new TLAB, etc.)
  4043   const bool allow_shared_alloc =
  4044     Universe::heap()->supports_inline_contig_alloc() && !CMSIncrementalMode;
  4046   if (UseTLAB) {
  4047 #ifndef OPT_THREAD
  4048     const Register thread = T8;
  4049     __ get_thread(thread);
  4050 #else
  4051     const Register thread = TREG;
  4052 #endif
  4053     // get tlab_top
  4054     __ ld(FSR, thread, in_bytes(JavaThread::tlab_top_offset()));
  4055     // get tlab_end
  4056     __ ld(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
  4057     __ dadd(T2, FSR, T0);
  4058     __ slt(AT, AT, T2);
  4059     __ bne(AT, R0, allow_shared_alloc ? allocate_shared : slow_case);
  4060     __ delayed()->nop();
  4061     __ sd(T2, thread, in_bytes(JavaThread::tlab_top_offset()));
  4063     if (ZeroTLAB) {
  4064       // the fields have been already cleared
  4065       __ beq(R0, R0, initialize_header);
  4066     } else {
  4067       // initialize both the header and fields
  4068       __ beq(R0, R0, initialize_object);
  4070     __ delayed()->nop();
  4073   // Allocation in the shared Eden , if allowed
  4074   // T0 : instance size in words
  4075   if(allow_shared_alloc){
  4076     __ bind(allocate_shared);
  4078     Label retry;
  4079     Address heap_top(T1);
  4080     __ set64(T1, (long)Universe::heap()->top_addr());
  4081     __ ld(FSR, heap_top);
  4083     __ bind(retry);
  4084     __ set64(AT, (long)Universe::heap()->end_addr());
  4085     __ ld(AT, AT, 0);
  4086     __ dadd(T2, FSR, T0);
  4087     __ slt(AT, AT, T2);
  4088     __ bne(AT, R0, slow_case);
  4089     __ delayed()->nop();
  4091     // Compare FSR with the top addr, and if still equal, store the new
  4092     // top addr in ebx at the address of the top addr pointer. Sets ZF if was
  4093     // equal, and clears it otherwise. Use lock prefix for atomicity on MPs.
  4094     //
  4095     // FSR: object begin
  4096     // T2: object end
  4097     // T0: instance size in words
  4099     // if someone beat us on the allocation, try again, otherwise continue
  4100     __ cmpxchg(T2, heap_top, FSR);
  4101     __ beq(AT, R0, retry);
  4102     __ delayed()->nop();
  4105   if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
  4106     // The object is initialized before the header.  If the object size is
  4107     // zero, go directly to the header initialization.
  4108     __ bind(initialize_object);
  4109     __ set64(AT, - sizeof(oopDesc));
  4110     __ daddu(T0, T0, AT);
  4111     __ beq(T0, R0, initialize_header);
  4112     __ delayed()->nop();
  4114     // initialize remaining object fields: T0 is a multiple of 2
  4116       Label loop;
  4117       __ dadd(T1, FSR, T0);
  4118       __ daddi(T1, T1, -oopSize);
  4120       __ bind(loop);
  4121       __ sd(R0, T1, sizeof(oopDesc) + 0 * oopSize);
  4122       __ bne(T1, FSR, loop); //dont clear header
  4123       __ delayed()->daddi(T1, T1, -oopSize);
  4126     //klass in T3,
  4127     // initialize object header only.
  4128     __ bind(initialize_header);
  4129     if (UseBiasedLocking) {
  4130       __ ld(AT, T3, in_bytes(Klass::prototype_header_offset()));
  4131       __ sd(AT, FSR, oopDesc::mark_offset_in_bytes ());
  4132     } else {
  4133       __ set64(AT, (long)markOopDesc::prototype());
  4134       __ sd(AT, FSR, oopDesc::mark_offset_in_bytes());
  4137     __ store_klass_gap(FSR, R0);
  4138     __ store_klass(FSR, T3);
  4141       SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0);
  4142       // Trigger dtrace event for fastpath
  4143       __ push(atos);
  4144       __ call_VM_leaf(
  4145            CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), FSR);
  4146       __ pop(atos);
  4149     __ b(done);
  4150     __ delayed()->nop();
  4153   // slow case
  4154   __ bind(slow_case);
  4155   call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), A1, A2);
  4157   // continue
  4158   __ bind(done);
  4159   __ sync();
  4162 void TemplateTable::newarray() {
  4163   transition(itos, atos);
  4164   __ lbu(A1, at_bcp(1));
  4165   //type, count
  4166   call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR);
  4167   __ sync();
  4170 void TemplateTable::anewarray() {
  4171   transition(itos, atos);
  4172   __ get_2_byte_integer_at_bcp(A2, AT, 1);
  4173   __ huswap(A2);
  4174   __ get_constant_pool(A1);
  4175   // cp, index, count
  4176   call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), A1, A2, FSR);
  4177   __ sync();
  4180 void TemplateTable::arraylength() {
  4181   transition(atos, itos);
  4182   __ null_check(FSR, arrayOopDesc::length_offset_in_bytes());
  4183   __ lw(FSR, FSR, arrayOopDesc::length_offset_in_bytes());
  4186 // i use T2 as ebx, T3 as ecx, T1 as edx
  4187 // when invoke gen_subtype_check, super in T3, sub in T2, object in FSR(it's always)
  4188 // T2 : sub klass
  4189 // T3 : cpool
  4190 // T3 : super klass
  4191 void TemplateTable::checkcast() {
  4192   transition(atos, atos);
  4193   Label done, is_null, ok_is_subtype, quicked, resolved;
  4194   __ beq(FSR, R0, is_null);
  4195   __ delayed()->nop();
  4197   // Get cpool & tags index
  4198   __ get_cpool_and_tags(T3, T1);
  4199   __ get_2_byte_integer_at_bcp(T2, AT, 1);
  4200   __ huswap(T2);
  4202   // See if bytecode has already been quicked
  4203   __ dadd(AT, T1, T2);
  4204   __ lb(AT, AT, Array<u1>::base_offset_in_bytes());
  4205   __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
  4206   __ beq(AT, R0, quicked);
  4207   __ delayed()->nop();
  4209   /* 2012/6/2 Jin: In InterpreterRuntime::quicken_io_cc, lots of new classes may be loaded.
  4210    *  Then, GC will move the object in V0 to another places in heap.
  4211    *  Therefore, We should never save such an object in register.
  4212    *  Instead, we should save it in the stack. It can be modified automatically by the GC thread.
  4213    *  After GC, the object address in FSR is changed to a new place.
  4214    */
  4215   __ push(atos);
  4216   const Register thread = TREG;
  4217 #ifndef OPT_THREAD
  4218   __ get_thread(thread);
  4219 #endif
  4220   call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
  4221   __ get_vm_result_2(T3, thread);
  4222   __ pop_ptr(FSR);
  4223   __ b(resolved);
  4224   __ delayed()->nop();
  4226   // klass already in cp, get superklass in T3
  4227   __ bind(quicked);
  4228   __ dsll(AT, T2, Address::times_8);
  4229   __ dadd(AT, T3, AT);
  4230   __ ld(T3, AT, sizeof(ConstantPool));
  4232   __ bind(resolved);
  4234   // get subklass in T2
  4235   //add for compressedoops
  4236   __ load_klass(T2, FSR);
  4237   // Superklass in T3.  Subklass in T2.
  4238   __ gen_subtype_check(T3, T2, ok_is_subtype);
  4240   // Come here on failure
  4241   // object is at FSR
  4242   __ jmp(Interpreter::_throw_ClassCastException_entry);
  4243   __ delayed()->nop();
  4245   // Come here on success
  4246   __ bind(ok_is_subtype);
  4248   // Collect counts on whether this check-cast sees NULLs a lot or not.
  4249   if (ProfileInterpreter) {
  4250     __ b(done);
  4251     __ delayed()->nop();
  4252     __ bind(is_null);
  4253     __ profile_null_seen(T3);
  4254   } else {
  4255     __ bind(is_null);
  4257   __ bind(done);
  4260 // i use T3 as cpool, T1 as tags, T2 as index
  4261 // object always in FSR, superklass in T3, subklass in T2
  4262 void TemplateTable::instanceof() {
  4263   transition(atos, itos);
  4264   Label done, is_null, ok_is_subtype, quicked, resolved;
  4266   __ beq(FSR, R0, is_null);
  4267   __ delayed()->nop();
  4269   // Get cpool & tags index
  4270   __ get_cpool_and_tags(T3, T1);
  4271   // get index
  4272   __ get_2_byte_integer_at_bcp(T2, AT, 1);
  4273   __ hswap(T2);
  4275   // See if bytecode has already been quicked
  4276   // quicked
  4277   __ daddu(AT, T1, T2);
  4278   __ lb(AT, AT, Array<u1>::base_offset_in_bytes());
  4279   __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
  4280   __ beq(AT, R0, quicked);
  4281   __ delayed()->nop();
  4283   __ push(atos);
  4284   const Register thread = TREG;
  4285 #ifndef OPT_THREAD
  4286   __ get_thread(thread);
  4287 #endif
  4288   call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
  4289   __ get_vm_result_2(T3, thread);
  4290   __ pop_ptr(FSR);
  4291   __ b(resolved);
  4292   __ delayed()->nop();
  4294   // get superklass in T3, subklass in T2
  4295   __ bind(quicked);
  4296   __ dsll(AT, T2, Address::times_8);
  4297   __ daddu(AT, T3, AT);
  4298   __ ld(T3, AT, sizeof(ConstantPool));
  4300   __ bind(resolved);
  4301   // get subklass in T2
  4302   //add for compressedoops
  4303   __ load_klass(T2, FSR);
  4305   // Superklass in T3.  Subklass in T2.
  4306   __ gen_subtype_check(T3, T2, ok_is_subtype);
  4307   // Come here on failure
  4308   __ b(done);
  4309   __ delayed(); __ move(FSR, R0);
  4311   // Come here on success
  4312   __ bind(ok_is_subtype);
  4313   __ move(FSR, 1);
  4315   // Collect counts on whether this test sees NULLs a lot or not.
  4316   if (ProfileInterpreter) {
  4317     __ beq(R0, R0, done);
  4318     __ nop();
  4319     __ bind(is_null);
  4320     __ profile_null_seen(T3);
  4321   } else {
  4322     __ bind(is_null);   // same as 'done'
  4324   __ bind(done);
  4325   // FSR = 0: obj == NULL or  obj is not an instanceof the specified klass
  4326   // FSR = 1: obj != NULL and obj is     an instanceof the specified klass
  4329 //--------------------------------------------------------
  4330 //--------------------------------------------
  4331 // Breakpoints
  4332 void TemplateTable::_breakpoint() {
  4333   // Note: We get here even if we are single stepping..
  4334   // jbug inists on setting breakpoints at every bytecode
  4335   // even if we are in single step mode.
  4337   transition(vtos, vtos);
  4339   // get the unpatched byte code
  4340   __ get_method(A1);
  4341   __ call_VM(NOREG,
  4342              CAST_FROM_FN_PTR(address,
  4343                               InterpreterRuntime::get_original_bytecode_at),
  4344              A1, BCP);
  4345   __ move(Rnext, V0); // Jin: Rnext will be used in dispatch_only_normal
  4347   // post the breakpoint event
  4348   __ get_method(A1);
  4349   __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), A1, BCP);
  4351   // complete the execution of original bytecode
  4352   __ dispatch_only_normal(vtos);
  4355 //-----------------------------------------------------------------------------
  4356 // Exceptions
  4358 void TemplateTable::athrow() {
  4359   transition(atos, vtos);
  4360   __ null_check(FSR);
  4361   __ jmp(Interpreter::throw_exception_entry());
  4362   __ delayed()->nop();
  4365 //-----------------------------------------------------------------------------
  4366 // Synchronization
  4367 //
  4368 // Note: monitorenter & exit are symmetric routines; which is reflected
  4369 //       in the assembly code structure as well
  4370 //
  4371 // Stack layout:
  4372 //
  4373 // [expressions  ] <--- SP               = expression stack top
  4374 // ..
  4375 // [expressions  ]
  4376 // [monitor entry] <--- monitor block top = expression stack bot
  4377 // ..
  4378 // [monitor entry]
  4379 // [frame data   ] <--- monitor block bot
  4380 // ...
  4381 // [return addr  ] <--- FP
  4383 // we use T2 as monitor entry pointer, T3 as monitor top pointer, c_rarg0 as free slot pointer
  4384 // object always in FSR
  4385 void TemplateTable::monitorenter() {
  4386   transition(atos, vtos);
  4388   // check for NULL object
  4389   __ null_check(FSR);
  4391   const Address monitor_block_top(FP, frame::interpreter_frame_monitor_block_top_offset
  4392       * wordSize);
  4393   const int entry_size = (frame::interpreter_frame_monitor_size()* wordSize);
  4394   Label allocated;
  4396   // initialize entry pointer
  4397   __ move(c_rarg0, R0);
  4399   // find a free slot in the monitor block (result in edx)
  4401     Label entry, loop, exit, next;
  4402     __ ld(T2, monitor_block_top);
  4403     __ b(entry);
  4404     __ delayed()->daddi(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
  4406     // free slot?
  4407     __ bind(loop);
  4408     __ ld(AT, T2, BasicObjectLock::obj_offset_in_bytes());
  4409     __ bne(AT, R0, next);
  4410     __ delayed()->nop();
  4411     __ move(c_rarg0, T2);
  4413     __ bind(next);
  4414     __ beq(FSR, AT, exit);
  4415     __ delayed()->nop();
  4416     __ daddi(T2, T2, entry_size);
  4418     __ bind(entry);
  4419     __ bne(T3, T2, loop);
  4420     __ delayed()->nop();
  4421     __ bind(exit);
  4424   __ bne(c_rarg0, R0, allocated);
  4425   __ delayed()->nop();
  4427   // allocate one if there's no free slot
  4429     Label entry, loop;
  4430     // 1. compute new pointers                   // SP: old expression stack top
  4431     __ ld(c_rarg0, monitor_block_top);
  4432     __ daddi(SP, SP, - entry_size);
  4433     __ daddi(c_rarg0, c_rarg0, - entry_size);
  4434     __ sd(c_rarg0, monitor_block_top);
  4435     __ b(entry);
  4436     __ delayed(); __ move(T3, SP);
  4438     // 2. move expression stack contents
  4439     __ bind(loop);
  4440     __ ld(AT, T3, entry_size);
  4441     __ sd(AT, T3, 0);
  4442     __ daddi(T3, T3, wordSize);
  4443     __ bind(entry);
  4444     __ bne(T3, c_rarg0, loop);
  4445     __ delayed()->nop();
  4448   __ bind(allocated);
  4449   // Increment bcp to point to the next bytecode,
  4450   // so exception handling for async. exceptions work correctly.
  4451   // The object has already been poped from the stack, so the
  4452   // expression stack looks correct.
  4453   __ daddi(BCP, BCP, 1);
  4454   __ sd(FSR, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
  4455   __ lock_object(c_rarg0);
  4456   // check to make sure this monitor doesn't cause stack overflow after locking
  4457   __ save_bcp();  // in case of exception
  4458   __ generate_stack_overflow_check(0);
  4459   // The bcp has already been incremented. Just need to dispatch to next instruction.
  4461   __ dispatch_next(vtos);
  4464 // T2 : top
  4465 // c_rarg0 : entry
  4466 void TemplateTable::monitorexit() {
  4467   transition(atos, vtos);
  4469   __ null_check(FSR);
  4471   const int entry_size =(frame::interpreter_frame_monitor_size()* wordSize);
  4472   Label found;
  4474   // find matching slot
  4476     Label entry, loop;
  4477     __ ld(c_rarg0, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
  4478     __ b(entry);
  4479     __ delayed()->daddiu(T2, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
  4481     __ bind(loop);
  4482     __ ld(AT, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
  4483     __ beq(FSR, AT, found);
  4484     __ delayed()->nop();
  4485     __ daddiu(c_rarg0, c_rarg0, entry_size);
  4486     __ bind(entry);
  4487     __ bne(T2, c_rarg0, loop);
  4488     __ delayed()->nop();
  4491   // error handling. Unlocking was not block-structured
  4492   Label end;
  4493   __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
  4494   InterpreterRuntime::throw_illegal_monitor_state_exception));
  4495   __ should_not_reach_here();
  4497   // call run-time routine
  4498   // c_rarg0: points to monitor entry
  4499   __ bind(found);
  4500   __ move(TSR, FSR);
  4501   __ unlock_object(c_rarg0);
  4502   __ move(FSR, TSR);
  4503   __ bind(end);
  4507 // Wide instructions
  4508 void TemplateTable::wide() {
  4509   transition(vtos, vtos);
  4510   // Note: the esi increment step is part of the individual wide bytecode implementations
  4511   __ lbu(Rnext, at_bcp(1));
  4512   __ dsll(T9, Rnext, Address::times_8);
  4513   __ li(AT, (long)Interpreter::_wentry_point);
  4514   __ dadd(AT, T9, AT);
  4515   __ ld(T9, AT, 0);
  4516   __ jr(T9);
  4517   __ delayed()->nop();
  4521 void TemplateTable::multianewarray() {
  4522   transition(vtos, atos);
  4523   // last dim is on top of stack; we want address of first one:
  4524   // first_addr = last_addr + (ndims - 1) * wordSize
  4525   __ lbu(A1, at_bcp(3));  // dimension
  4526   __ daddi(A1, A1, -1);
  4527   __ dsll(A1, A1, Address::times_8);
  4528   __ dadd(A1, SP, A1);    // now A1 pointer to the count array on the stack
  4529   call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), A1);
  4530   __ lbu(AT, at_bcp(3));
  4531   __ dsll(AT, AT, Address::times_8);
  4532   __ dadd(SP, SP, AT);
  4533   __ sync();
  4535 #endif // !CC_INTERP

mercurial