src/cpu/mips/vm/templateTable_mips_64.cpp

Thu, 07 Sep 2017 09:12:16 +0800

author
aoqi
date
Thu, 07 Sep 2017 09:12:16 +0800
changeset 6880
52ea28d233d2
parent 428
4030eb32ef94
child 6881
633080c2ba82
permissions
-rw-r--r--

#5745 [Code Reorganization] code cleanup and code style fix
This is a huge patch, but only code cleanup, code style fix and useless code deletion are included, for example:
tab -> two spaces, deleted spacees at the end of a line, delete useless comments.

This patch also included:
Declaration and definition of class MacroAssembler is moved from assembler_mips.h/cpp to macroAssembler_mips.h/cpp

     1 /*
     2  * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
     3  * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
     4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     5  *
     6  * This code is free software; you can redistribute it and/or modify it
     7  * under the terms of the GNU General Public License version 2 only, as
     8  * published by the Free Software Foundation.
     9  *
    10  * This code is distributed in the hope that it will be useful, but WITHOUT
    11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    13  * version 2 for more details (a copy is included in the LICENSE file that
    14  * accompanied this code).
    15  *
    16  * You should have received a copy of the GNU General Public License version
    17  * 2 along with this work; if not, write to the Free Software Foundation,
    18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    19  *
    20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    21  * or visit www.oracle.com if you need additional information or have any
    22  * questions.
    23  *
    24  */
    26 #include "precompiled.hpp"
    27 #include "asm/macroAssembler.hpp"
    28 #include "interpreter/interpreter.hpp"
    29 #include "interpreter/interpreterRuntime.hpp"
    30 #include "interpreter/templateTable.hpp"
    31 #include "memory/universe.inline.hpp"
    32 #include "oops/methodData.hpp"
    33 #include "oops/objArrayKlass.hpp"
    34 #include "oops/oop.inline.hpp"
    35 #include "prims/methodHandles.hpp"
    36 #include "runtime/sharedRuntime.hpp"
    37 #include "runtime/stubRoutines.hpp"
    38 #include "runtime/synchronizer.hpp"
    41 #ifndef CC_INTERP
    43 #define __ _masm->
    45 // Platform-dependent initialization
    47 void TemplateTable::pd_initialize() {
    48   // No mips specific initialization
    49 }
    51 // Address computation: local variables
    53 static inline Address iaddress(int n) {
    54   return Address(LVP, Interpreter::local_offset_in_bytes(n));
    55 }
    57 static inline Address laddress(int n) {
    58   return iaddress(n + 1);
    59 }
    61 static inline Address faddress(int n) {
    62   return iaddress(n);
    63 }
    65 static inline Address daddress(int n) {
    66   return laddress(n);
    67 }
    69 static inline Address aaddress(int n) {
    70   return iaddress(n);
    71 }
    72 static inline Address haddress(int n)            { return iaddress(n + 0); }
    75 static inline Address at_sp()             {  return Address(SP,   0); }
    76 static inline Address at_sp_p1()          { return Address(SP,  1 * wordSize); }
    77 static inline Address at_sp_p2()          { return Address(SP,  2 * wordSize); }
    79 // At top of Java expression stack which may be different than esp().  It
    80 // isn't for category 1 objects.
    81 static inline Address at_tos   () {
    82   Address tos = Address(SP,  Interpreter::expr_offset_in_bytes(0));
    83   return tos;
    84 }
    86 static inline Address at_tos_p1() {
    87   return Address(SP,  Interpreter::expr_offset_in_bytes(1));
    88 }
    90 static inline Address at_tos_p2() {
    91   return Address(SP,  Interpreter::expr_offset_in_bytes(2));
    92 }
    94 static inline Address at_tos_p3() {
    95   return Address(SP,  Interpreter::expr_offset_in_bytes(3));
    96 }
    98 // we use S0 as bcp, be sure you have bcp in S0 before you call any of the Template generator
    99 Address TemplateTable::at_bcp(int offset) {
   100   assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
   101   return Address(BCP, offset);
   102 }
   104 // bytecode folding
   105 void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
   106                                    Register tmp_reg, bool load_bc_into_bc_reg/*=true*/,
   107                                    int byte_no) {
   108   if (!RewriteBytecodes)  return;
   109   Label L_patch_done;
   111   switch (bc) {
   112   case Bytecodes::_fast_aputfield:
   113   case Bytecodes::_fast_bputfield:
   114   case Bytecodes::_fast_cputfield:
   115   case Bytecodes::_fast_dputfield:
   116   case Bytecodes::_fast_fputfield:
   117   case Bytecodes::_fast_iputfield:
   118   case Bytecodes::_fast_lputfield:
   119   case Bytecodes::_fast_sputfield:
   120     {
   121       // We skip bytecode quickening for putfield instructions when
   122       // the put_code written to the constant pool cache is zero.
   123       // This is required so that every execution of this instruction
   124       // calls out to InterpreterRuntime::resolve_get_put to do
   125       // additional, required work.
   126       assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
   127       assert(load_bc_into_bc_reg, "we use bc_reg as temp");
   128       __ get_cache_and_index_and_bytecode_at_bcp(tmp_reg, bc_reg, tmp_reg, byte_no, 1);
   129       __ daddi(bc_reg, R0, bc);
   130       __ beq(tmp_reg, R0, L_patch_done);
   131       __ delayed()->nop();
   132     }
   133     break;
   134   default:
   135     assert(byte_no == -1, "sanity");
   136     // the pair bytecodes have already done the load.
   137     if (load_bc_into_bc_reg) {
   138       __ move(bc_reg, bc);
   139     }
   140   }
   142   if (JvmtiExport::can_post_breakpoint()) {
   143     Label L_fast_patch;
   144     // if a breakpoint is present we can't rewrite the stream directly
   145     __ lbu(tmp_reg, at_bcp(0));
   146     __ move(AT, Bytecodes::_breakpoint);
   147     __ bne(tmp_reg, AT, L_fast_patch);
   148     __ delayed()->nop();
   150     __ get_method(tmp_reg);
   151     // Let breakpoint table handling rewrite to quicker bytecode
   152     __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
   153     InterpreterRuntime::set_original_bytecode_at), tmp_reg, BCP, bc_reg);
   155     __ b(L_patch_done);
   156     __ delayed()->nop();
   157     __ bind(L_fast_patch);
   158   }
   160 #ifdef ASSERT
   161   Label L_okay;
   162   __ lbu(tmp_reg, at_bcp(0));
   163   __ move(AT, (int)Bytecodes::java_code(bc));
   164   __ beq(tmp_reg, AT, L_okay);
   165   __ delayed()->nop();
   166   __ beq(tmp_reg, bc_reg, L_patch_done);
   167   __ delayed()->nop();
   168   __ stop("patching the wrong bytecode");
   169   __ bind(L_okay);
   170 #endif
   172   // patch bytecode
   173   __ sb(bc_reg, at_bcp(0));
   174   __ bind(L_patch_done);
   175 }
   178 // Individual instructions
   180 void TemplateTable::nop() {
   181   transition(vtos, vtos);
   182   // nothing to do
   183 }
   185 void TemplateTable::shouldnotreachhere() {
   186   transition(vtos, vtos);
   187   __ stop("shouldnotreachhere bytecode");
   188 }
   190 void TemplateTable::aconst_null() {
   191   transition(vtos, atos);
   192   __ move(FSR, R0);
   193 }
   195 void TemplateTable::iconst(int value) {
   196   transition(vtos, itos);
   197   if (value == 0) {
   198     __ move(FSR, R0);
   199   } else {
   200     __ move(FSR, value);
   201   }
   202 }
   204 void TemplateTable::lconst(int value) {
   205   transition(vtos, ltos);
   206   if (value == 0) {
   207     __ move(FSR, R0);
   208   } else {
   209     __ move(FSR, value);
   210   }
   211   assert(value >= 0, "check this code");
   212   //__ move(SSR, R0);
   213 }
   215 void TemplateTable::fconst(int value) {
   216   static float  _f1 = 1.0, _f2 = 2.0;
   217   transition(vtos, ftos);
   218   float* p;
   219   switch( value ) {
   220     default: ShouldNotReachHere();
   221     case 0:  __ dmtc1(R0, FSF);  return;
   222     case 1:  p = &_f1;   break;
   223     case 2:  p = &_f2;   break;
   224   }
   225   __ li(AT, (address)p);
   226   __ lwc1(FSF, AT, 0);
   227 }
   229 void TemplateTable::dconst(int value) {
   230   static double _d1 = 1.0;
   231   transition(vtos, dtos);
   232   double* p;
   233   switch( value ) {
   234     default: ShouldNotReachHere();
   235     case 0:  __ dmtc1(R0, FSF);  return;
   236     case 1:  p = &_d1;   break;
   237   }
   238   __ li(AT, (address)p);
   239   __ ldc1(FSF, AT, 0);
   240 }
   242 void TemplateTable::bipush() {
   243   transition(vtos, itos);
   244   __ lb(FSR, at_bcp(1));
   245 }
   247 void TemplateTable::sipush() {
   248   transition(vtos, itos);
   249   __ get_2_byte_integer_at_bcp(FSR, AT, 1);
   250   __ hswap(FSR);
   251 }
   253 // T1 : tags
   254 // T2 : index
   255 // T3 : cpool
   256 // T8 : tag
   257 void TemplateTable::ldc(bool wide) {
   258   transition(vtos, vtos);
   259   Label call_ldc, notFloat, notClass, Done;
   260   // get index in cpool
   261   if (wide) {
   262     __ get_2_byte_integer_at_bcp(T2, AT, 1);
   263     __ huswap(T2);
   264   } else {
   265     __ lbu(T2, at_bcp(1));
   266   }
   268   __ get_cpool_and_tags(T3, T1);
   270   const int base_offset = ConstantPool::header_size() * wordSize;
   271   const int tags_offset = Array<u1>::base_offset_in_bytes();
   273   // get type
   274   __ dadd(AT, T1, T2);
   275   __ lb(T1, AT, tags_offset);
   276   //now T1 is the tag
   278   // unresolved class - get the resolved class
   279   __ daddiu(AT, T1, - JVM_CONSTANT_UnresolvedClass);
   280   __ beq(AT, R0, call_ldc);
   281   __ delayed()->nop();
   283   // unresolved class in error (resolution failed) - call into runtime
   284   // so that the same error from first resolution attempt is thrown.
   285   __ daddiu(AT, T1, -JVM_CONSTANT_UnresolvedClassInError);
   286   __ beq(AT, R0, call_ldc);
   287   __ delayed()->nop();
   289   // resolved class - need to call vm to get java mirror of the class
   290   __ daddiu(AT, T1, - JVM_CONSTANT_Class);
   291   __ bne(AT, R0, notClass);
   292   __ delayed()->dsll(T2, T2, Address::times_8);
   294   __ bind(call_ldc);
   296   __ move(A1, wide);
   297   call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), A1);
   298   __ push(atos);
   299   __ b(Done);
   300   __ delayed()->nop();
   301   __ bind(notClass);
   303   __ daddiu(AT, T1, -JVM_CONSTANT_Float);
   304   __ bne(AT, R0, notFloat);
   305   __ delayed()->nop();
   306   // ftos
   307   __ dadd(AT, T3, T2);
   308   __ lwc1(FSF, AT, base_offset);
   309   __ push_f();
   310   __ b(Done);
   311   __ delayed()->nop();
   313   __ bind(notFloat);
   314 #ifdef ASSERT
   315   {
   316     Label L;
   317     __ daddiu(AT, T1, -JVM_CONSTANT_Integer);
   318     __ beq(AT, R0, L);
   319     __ delayed()->nop();
   320     __ stop("unexpected tag type in ldc");
   321     __ bind(L);
   322   }
   323 #endif
   324   // atos and itos
   325   __ dadd(T0, T3, T2);
   326   __ lw(FSR, T0, base_offset);
   327   __ push(itos);
   328   __ b(Done);
   329   __ delayed()->nop();
   332   if (VerifyOops) {
   333     __ verify_oop(FSR);
   334   }
   336   __ bind(Done);
   337 }
   339 // Fast path for caching oop constants.
   340 void TemplateTable::fast_aldc(bool wide) {
   341   transition(vtos, atos);
   343   Register result = FSR;
   344   Register tmp = SSR;
   345   int index_size = wide ? sizeof(u2) : sizeof(u1);
   347   Label resolved;
   349   // We are resolved if the resolved reference cache entry contains a
   350   // non-null object (String, MethodType, etc.)
   351   assert_different_registers(result, tmp);
   352   __ get_cache_index_at_bcp(tmp, 1, index_size);
   353   __ load_resolved_reference_at_index(result, tmp);
   354   __ bne(result, R0, resolved);
   355   __ delayed()->nop();
   357   address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
   358   // first time invocation - must resolve first
   359   int i = (int)bytecode();
   360   __ move(tmp, i);
   361   __ call_VM(result, entry, tmp);
   363   __ bind(resolved);
   365   if (VerifyOops) {
   366     __ verify_oop(result);
   367   }
   368 }
   371 // used register: T2, T3, T1
   372 // T2 : index
   373 // T3 : cpool
   374 // T1 : tag
   375 void TemplateTable::ldc2_w() {
   376   transition(vtos, vtos);
   377   Label Long, Done;
   379   // get index in cpool
   380   __ get_2_byte_integer_at_bcp(T2, AT, 1);
   381   __ huswap(T2);
   383   __ get_cpool_and_tags(T3, T1);
   385   const int base_offset = ConstantPool::header_size() * wordSize;
   386   const int tags_offset = Array<u1>::base_offset_in_bytes();
   388   // get type in T1
   389   __ dadd(AT, T1, T2);
   390   __ lb(T1, AT, tags_offset);
   392   __ daddiu(AT, T1, - JVM_CONSTANT_Double);
   393   __ bne(AT, R0, Long);
   394   __ delayed()->dsll(T2, T2, Address::times_8);
   395   // dtos
   396   __ daddu(AT, T3, T2);
   397   __ ldc1(FSF, AT, base_offset + 0 * wordSize);
   398   __ sdc1(FSF, SP, - 2 * wordSize);
   399   __ b(Done);
   400   __ delayed()->daddi(SP, SP, - 2 * wordSize);
   402   // ltos
   403   __ bind(Long);
   404   __ dadd(AT, T3, T2);
   405   __ ld(FSR, AT, base_offset + 0 * wordSize);
   406   __ push(ltos);
   408   __ bind(Done);
   409 }
   411 // we compute the actual local variable address here
   412 // the x86 dont do so for it has scaled index memory access model, we dont have, so do here
   413 void TemplateTable::locals_index(Register reg, int offset) {
   414   __ lbu(reg, at_bcp(offset));
   415   __ dsll(reg, reg, Address::times_8);
   416   __ dsub(reg, LVP, reg);
   417 }
   419 // this method will do bytecode folding of the two form:
   420 // iload iload      iload caload
   421 // used register : T2, T3
   422 // T2 : bytecode
   423 // T3 : folded code
   424 void TemplateTable::iload() {
   425   transition(vtos, itos);
   426   if (RewriteFrequentPairs) {
   427     Label rewrite, done;
   428     // get the next bytecode in T2
   429     __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
   430     // if _iload, wait to rewrite to iload2.  We only want to rewrite the
   431     // last two iloads in a pair.  Comparing against fast_iload means that
   432     // the next bytecode is neither an iload or a caload, and therefore
   433     // an iload pair.
   434     __ move(AT, Bytecodes::_iload);
   435     __ beq(AT, T2, done);
   436     __ delayed()->nop();
   438     __ move(T3, Bytecodes::_fast_iload2);
   439     __ move(AT, Bytecodes::_fast_iload);
   440     __ beq(AT, T2, rewrite);
   441     __ delayed()->nop();
   443     // if _caload, rewrite to fast_icaload
   444     __ move(T3, Bytecodes::_fast_icaload);
   445     __ move(AT, Bytecodes::_caload);
   446     __ beq(AT, T2, rewrite);
   447     __ delayed()->nop();
   449     // rewrite so iload doesn't check again.
   450     __ move(T3, Bytecodes::_fast_iload);
   452     // rewrite
   453     // T3 : fast bytecode
   454     __ bind(rewrite);
   455     patch_bytecode(Bytecodes::_iload, T3, T2, false);
   456     __ bind(done);
   457   }
   459   // Get the local value into tos
   460   locals_index(T2);
   461   __ lw(FSR, T2, 0);
   462 }
   464 // used register T2
   465 // T2 : index
   466 void TemplateTable::fast_iload2() {
   467   transition(vtos, itos);
   468   locals_index(T2);
   469   __ lw(FSR, T2, 0);
   470   __ push(itos);
   471   locals_index(T2, 3);
   472   __ lw(FSR, T2, 0);
   473 }
   475 // used register T2
   476 // T2 : index
   477 void TemplateTable::fast_iload() {
   478   transition(vtos, itos);
   479   locals_index(T2);
   480   __ lw(FSR, T2, 0);
   481 }
   483 // used register T2
   484 // T2 : index
   485 void TemplateTable::lload() {
   486   transition(vtos, ltos);
   487   locals_index(T2);
   488   __ ld(FSR, T2, -wordSize);
   489   __ ld(SSR, T2, 0);
   490 }
   492 // used register T2
   493 // T2 : index
   494 void TemplateTable::fload() {
   495   transition(vtos, ftos);
   496   locals_index(T2);
   497   __ lwc1(FSF, T2, 0);
   498 }
   500 // used register T2
   501 // T2 : index
   502 void TemplateTable::dload() {
   503   transition(vtos, dtos);
   504   locals_index(T2);
   505   __ ldc1(FSF, T2, -wordSize);
   506   __ ldc1(SSF, T2, 0);
   507 }
   509 // used register T2
   510 // T2 : index
   511 void TemplateTable::aload() {
   512   transition(vtos, atos);
   513   locals_index(T2);
   514   __ ld(FSR, T2, 0);
   515 }
   517 void TemplateTable::locals_index_wide(Register reg) {
   518   __ get_2_byte_integer_at_bcp(reg, AT, 2);
   519   __ huswap(reg);
   520   __ dsll(reg, reg, Address::times_8);
   521   __ dsub(reg, LVP, reg);
   522 }
   524 // used register T2
   525 // T2 : index
   526 void TemplateTable::wide_iload() {
   527   transition(vtos, itos);
   528   locals_index_wide(T2);
   529   __ ld(FSR, T2, 0);
   530 }
   532 // used register T2
   533 // T2 : index
   534 void TemplateTable::wide_lload() {
   535   transition(vtos, ltos);
   536   locals_index_wide(T2);
   537   __ ld(FSR, T2, -4);
   538 }
   540 // used register T2
   541 // T2 : index
   542 void TemplateTable::wide_fload() {
   543   transition(vtos, ftos);
   544   locals_index_wide(T2);
   545   __ lwc1(FSF, T2, 0);
   546 }
   548 // used register T2
   549 // T2 : index
   550 void TemplateTable::wide_dload() {
   551   transition(vtos, dtos);
   552   locals_index_wide(T2);
   553   __ ldc1(FSF, T2, -4);
   554 }
   556 // used register T2
   557 // T2 : index
   558 void TemplateTable::wide_aload() {
   559   transition(vtos, atos);
   560   locals_index_wide(T2);
   561   __ ld(FSR, T2, 0);
   562 }
   564 // we use A2 as the regiser for index, BE CAREFUL!
   565 // we dont use our tge 29 now, for later optimization
   566 void TemplateTable::index_check(Register array, Register index) {
   567   // Pop ptr into array
   568   __ pop_ptr(array);
   569   index_check_without_pop(array, index);
   570 }
   572 void TemplateTable::index_check_without_pop(Register array, Register index) {
   573   // destroys ebx
   574   // check array
   575   __ null_check(array, arrayOopDesc::length_offset_in_bytes());
   577 #ifdef _LP64
   578   // sign extend since tos (index) might contain garbage in upper bits
   579   __ sll(index, index, 0);
   580 #endif // _LP64
   582   // check index
   583   Label ok;
   584   __ lw(AT, array, arrayOopDesc::length_offset_in_bytes());
   585 #ifndef OPT_RANGECHECK
   586   __ sltu(AT, index, AT);
   587   __ bne(AT, R0, ok);
   588   __ delayed()->nop();
   590   //throw_ArrayIndexOutOfBoundsException assume abberrant index in A2
   591   if (A2 != index) __ move(A2, index);
   592   __ jmp(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
   593   __ delayed()->nop();
   594   __ bind(ok);
   595 #else
   596   __ lw(AT, array, arrayOopDesc::length_offset_in_bytes());
   597   __ move(A2, index);
   598   __ tgeu(A2, AT, 29);
   599 #endif
   600 }
   602 void TemplateTable::iaload() {
   603   transition(itos, itos);
   604   if(UseBoundCheckInstruction) {
   605     __ pop(SSR); //SSR:array    FSR: index
   606     __ dsll(FSR, FSR, 2);
   607     __ dadd(FSR, SSR, FSR);
   608     __ addi(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT));
   610     __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
   611     __ dsll(AT, AT, 2);
   612     __ dadd(AT, SSR, AT);
   613     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT));
   615     __ gslwle(FSR, FSR, AT);
   616   } else {
   617     index_check(SSR, FSR);
   618     __ dsll(FSR, FSR, 2);
   619     __ dadd(FSR, SSR, FSR);
   620     //FSR: index
   621     __ lw(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT));
   622   }
   623 }
   625 void TemplateTable::laload() {
   626   transition(itos, ltos);
   627   if(UseBoundCheckInstruction) {
   628     __ pop(SSR); //SSR:array    FSR: index
   629     __ dsll(FSR, FSR, Address::times_8);
   630     __ dadd(FSR, SSR, FSR);
   631     __ addi(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);
   633     __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
   634     __ dsll(AT, AT, Address::times_8);
   635     __ dadd(AT, SSR, AT);
   636     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);
   638     __ gsldle(FSR, FSR, AT);
   639   } else {
   640     index_check(SSR, FSR);
   641     __ dsll(AT, FSR, Address::times_8);
   642     __ dadd(AT, SSR, AT);
   643     __ ld(FSR, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);
   644   }
   645 }
   647 void TemplateTable::faload() {
   648   transition(itos, ftos);
   649   if(UseBoundCheckInstruction) {
   650     __ pop(SSR); //SSR:array    FSR: index
   651     __ shl(FSR, 2);
   652     __ dadd(FSR, SSR, FSR);
   653     __ addi(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
   655     __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
   656     __ shl(AT, 2);
   657     __ dadd(AT, SSR, AT);
   658     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
   660     __ gslwlec1(FSF, FSR, AT);
   661   } else {
   662     index_check(SSR, FSR);
   663     __ shl(FSR, 2);
   664     __ dadd(FSR, SSR, FSR);
   665     __ lwc1(FSF, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
   666   }
   667 }
   669 void TemplateTable::daload() {
   670   transition(itos, dtos);
   671   if(UseBoundCheckInstruction) {
   672     __ pop(SSR); //SSR:array    FSR: index
   673     __ dsll(FSR, FSR, 3);
   674     __ dadd(FSR, SSR, FSR);
   675     __ addi(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
   677     __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
   678     __ dsll(AT, AT, 3);
   679     __ dadd(AT, SSR, AT);
   680     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
   682     __ gsldlec1(FSF, FSR, AT);
   683   } else {
   684     index_check(SSR, FSR);
   685     __ dsll(AT, FSR, 3);
   686     __ dadd(AT, SSR, AT);
   687     __ ldc1(FSF, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
   688   }
   689 }
   691 void TemplateTable::aaload() {
   692   transition(itos, atos);
   693   index_check(SSR, FSR);
   694   __ dsll(FSR, FSR, UseCompressedOops ? Address::times_4 : Address::times_8);
   695   __ dadd(FSR, SSR, FSR);
   696   //add for compressedoops
   697   __ load_heap_oop(FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
   698 }
   700 void TemplateTable::baload() {
   701   transition(itos, itos);
   702   if(UseBoundCheckInstruction) {
   703     __ pop(SSR); //SSR:array   FSR:index
   704     __ dadd(FSR, SSR, FSR);
   705     __ addi(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //base
   707     __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());
   708     __ dadd(AT, SSR, AT);
   709     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //bound
   711     __ gslble(FSR, FSR, AT);
   712   } else {
   713     index_check(SSR, FSR);
   714     __ dadd(FSR, SSR, FSR);
   715     __ lb(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));
   716   }
   717 }
   719 void TemplateTable::caload() {
   720   transition(itos, itos);
   721   index_check(SSR, FSR);
   722   __ dsll(FSR, FSR, Address::times_2);
   723   __ dadd(FSR, SSR, FSR);
   724   __ lhu(FSR, FSR,  arrayOopDesc::base_offset_in_bytes(T_CHAR));
   725 }
   727 // iload followed by caload frequent pair
   728 // used register : T2
   729 // T2 : index
   730 void TemplateTable::fast_icaload() {
   731   transition(vtos, itos);
   732   // load index out of locals
   733   locals_index(T2);
   734   __ lw(FSR, T2, 0);
   735   index_check(SSR, FSR);
   736   __ dsll(FSR, FSR, 1);
   737   __ dadd(FSR, SSR, FSR);
   738   __ lhu(FSR, FSR,  arrayOopDesc::base_offset_in_bytes(T_CHAR));
   739 }
   741 void TemplateTable::saload() {
   742   transition(itos, itos);
   743   if(UseBoundCheckInstruction) {
   744     __ pop(SSR); //SSR:array    FSR: index
   745     __ dsll(FSR, FSR, Address::times_2);
   746     __ dadd(FSR, SSR, FSR);
   747     __ addi(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT));
   749     __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
   750     __ dsll(AT, AT, Address::times_2);
   751     __ dadd(AT, SSR, AT);
   752     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_SHORT));
   754     __ gslhle(FSR, FSR, AT);
   755   } else {
   756     index_check(SSR, FSR);
   757     __ dsll(FSR, FSR, Address::times_2);
   758     __ dadd(FSR, SSR, FSR);
   759     __ lh(FSR, FSR,  arrayOopDesc::base_offset_in_bytes(T_SHORT));
   760   }
   761 }
   763 void TemplateTable::iload(int n) {
   764   transition(vtos, itos);
   765   __ lw(FSR, iaddress(n));
   766 }
   768 void TemplateTable::lload(int n) {
   769   transition(vtos, ltos);
   770   __ ld(FSR, laddress(n));
   771 }
   773 void TemplateTable::fload(int n) {
   774   transition(vtos, ftos);
   775   __ lwc1(FSF, faddress(n));
   776 }
   778 void TemplateTable::dload(int n) {
   779   transition(vtos, dtos);
   780   __ ldc1(FSF, laddress(n));
   781 }
   783 void TemplateTable::aload(int n) {
   784   transition(vtos, atos);
   785   __ ld(FSR, aaddress(n));
   786 }
   788 // used register : T2, T3
   789 // T2 : bytecode
   790 // T3 : folded code
   791 void TemplateTable::aload_0() {
   792   transition(vtos, atos);
   793   // According to bytecode histograms, the pairs:
   794   //
   795   // _aload_0, _fast_igetfield
   796   // _aload_0, _fast_agetfield
   797   // _aload_0, _fast_fgetfield
   798   //
   799   // occur frequently. If RewriteFrequentPairs is set, the (slow)
   800   // _aload_0 bytecode checks if the next bytecode is either
   801   // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
   802   // rewrites the current bytecode into a pair bytecode; otherwise it
   803   // rewrites the current bytecode into _fast_aload_0 that doesn't do
   804   // the pair check anymore.
   805   //
   806   // Note: If the next bytecode is _getfield, the rewrite must be
   807   //       delayed, otherwise we may miss an opportunity for a pair.
   808   //
   809   // Also rewrite frequent pairs
   810   //   aload_0, aload_1
   811   //   aload_0, iload_1
   812   // These bytecodes with a small amount of code are most profitable
   813   // to rewrite
   814   if (RewriteFrequentPairs) {
   815     Label rewrite, done;
   816     // get the next bytecode in T2
   817     __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
   819     // do actual aload_0
   820     aload(0);
   822     // if _getfield then wait with rewrite
   823     __ move(AT, Bytecodes::_getfield);
   824     __ beq(AT, T2, done);
   825     __ delayed()->nop();
   827     // if _igetfield then reqrite to _fast_iaccess_0
   828     assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) ==
   829         Bytecodes::_aload_0,
   830         "fix bytecode definition");
   831     __ move(T3, Bytecodes::_fast_iaccess_0);
   832     __ move(AT, Bytecodes::_fast_igetfield);
   833     __ beq(AT, T2, rewrite);
   834     __ delayed()->nop();
   836     // if _agetfield then reqrite to _fast_aaccess_0
   837     assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) ==
   838         Bytecodes::_aload_0,
   839         "fix bytecode definition");
   840     __ move(T3, Bytecodes::_fast_aaccess_0);
   841     __ move(AT, Bytecodes::_fast_agetfield);
   842     __ beq(AT, T2, rewrite);
   843     __ delayed()->nop();
   845     // if _fgetfield then reqrite to _fast_faccess_0
   846     assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) ==
   847         Bytecodes::_aload_0,
   848         "fix bytecode definition");
   849     __ move(T3, Bytecodes::_fast_faccess_0);
   850     __ move(AT, Bytecodes::_fast_fgetfield);
   851     __ beq(AT, T2, rewrite);
   852     __ delayed()->nop();
   854     // else rewrite to _fast_aload0
   855     assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) ==
   856         Bytecodes::_aload_0,
   857         "fix bytecode definition");
   858     __ move(T3, Bytecodes::_fast_aload_0);
   860     // rewrite
   861     __ bind(rewrite);
   862     patch_bytecode(Bytecodes::_aload_0, T3, T2, false);
   864     __ bind(done);
   865   } else {
   866     aload(0);
   867   }
   868 }
   870 void TemplateTable::istore() {
   871   transition(itos, vtos);
   872   locals_index(T2);
   873   __ sw(FSR, T2, 0);
   874 }
   876 void TemplateTable::lstore() {
   877   transition(ltos, vtos);
   878   locals_index(T2);
   879   __ sd(FSR, T2, -wordSize);
   880 }
   882 void TemplateTable::fstore() {
   883   transition(ftos, vtos);
   884   locals_index(T2);
   885   __ swc1(FSF, T2, 0);
   886 }
   888 void TemplateTable::dstore() {
   889   transition(dtos, vtos);
   890   locals_index(T2);
   891   __ sdc1(FSF, T2, -wordSize);
   892 }
   894 void TemplateTable::astore() {
   895   transition(vtos, vtos);
   896   __ pop_ptr(FSR);
   897   locals_index(T2);
   898   __ sd(FSR, T2, 0);
   899 }
   901 void TemplateTable::wide_istore() {
   902   transition(vtos, vtos);
   903   __ pop_i(FSR);
   904   locals_index_wide(T2);
   905   __ sd(FSR, T2, 0);
   906 }
   908 void TemplateTable::wide_lstore() {
   909   transition(vtos, vtos);
   910   __ pop_l(FSR);
   911   locals_index_wide(T2);
   912   __ sd(FSR, T2, -4);
   913 }
   915 void TemplateTable::wide_fstore() {
   916   wide_istore();
   917 }
   919 void TemplateTable::wide_dstore() {
   920   wide_lstore();
   921 }
   923 void TemplateTable::wide_astore() {
   924   transition(vtos, vtos);
   925   __ pop_ptr(FSR);
   926   locals_index_wide(T2);
   927   __ sd(FSR, T2, 0);
   928 }
   930 // used register : T2
   931 void TemplateTable::iastore() {
   932   transition(itos, vtos);
   933   __ pop_i(SSR);   // T2: array  SSR: index
   934   if(UseBoundCheckInstruction) {
   935     __ pop_ptr(T2);
   936     __ dsll(SSR, SSR, Address::times_4);
   937     __ dadd(SSR, T2, SSR);
   938     __ addi(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_INT));  // base
   940     __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
   941     __ dsll(AT, AT, Address::times_4);
   942     __ dadd(AT, T2, AT);
   943     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT));  //bound
   945     __ gsswle(FSR, SSR, AT);
   946   } else {
   947     index_check(T2, SSR);  // prefer index in ebx
   948     __ dsll(SSR, SSR, Address::times_4);
   949     __ dadd(T2, T2, SSR);
   950     __ sw(FSR, T2, arrayOopDesc::base_offset_in_bytes(T_INT));
   951   }
   952 }
   956 // used register T2, T3
   957 void TemplateTable::lastore() {
   958   transition(ltos, vtos);
   959   __ pop_i (T2);
   960   if(UseBoundCheckInstruction) {
   961     __ pop_ptr(T3);
   962     __ dsll(T2, T2, Address::times_8);
   963     __ dadd(T2, T3, T2);
   964     __ addi(T2, T2, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);  // base
   966     __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes());
   967     __ dsll(AT, AT, Address::times_8);
   968     __ dadd(AT, T3, AT);
   969     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);  //bound
   971     __ gssdle(FSR, T2, AT);
   972   } else {
   973     index_check(T3, T2);
   974     __ dsll(T2, T2, Address::times_8);
   975     __ dadd(T3, T3, T2);
   976     __ sd(FSR, T3, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);
   977   }
   978 }
   980 // used register T2
   981 void TemplateTable::fastore() {
   982   transition(ftos, vtos);
   983   __ pop_i(SSR);
   984   if(UseBoundCheckInstruction) {
   985     __ pop_ptr(T2);
   986     __ dsll(SSR, SSR, Address::times_4);
   987     __ dadd(SSR, T2, SSR);
   988     __ addi(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));  // base
   990     __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
   991     __ dsll(AT, AT, Address::times_4);
   992     __ dadd(AT, T2, AT);
   993     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT));  //bound
   995     __ gsswlec1(FSF, SSR, AT);
   996   } else {
   997     index_check(T2, SSR);
   998     __ dsll(SSR, SSR, Address::times_4);
   999     __ dadd(T2, T2, SSR);
  1000     __ swc1(FSF, T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
  1004 // used register T2, T3
  1005 void TemplateTable::dastore() {
  1006   transition(dtos, vtos);
  1007   __ pop_i (T2);
  1008   if(UseBoundCheckInstruction) {
  1009     __ pop_ptr(T3);
  1010     __ dsll(T2, T2, Address::times_8);
  1011     __ dadd(T2, T3, T2);
  1012     __ addi(T2, T2, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);  // base
  1014     __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes());
  1015     __ dsll(AT, AT, Address::times_8);
  1016     __ dadd(AT, T3, AT);
  1017     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);  //bound
  1019     __ gssdlec1(FSF, T2, AT);
  1020   } else {
  1021     index_check(T3, T2);
  1022     __ dsll(T2, T2, Address::times_8);
  1023     __ daddu(T3, T3, T2);
  1024     __ sdc1(FSF, T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
  1028 // used register : T2, T3, T8
  1029 // T2 : array
  1030 // T3 : subklass
  1031 // T8 : supklass
  1032 void TemplateTable::aastore() {
  1033   Label is_null, ok_is_subtype, done;
  1034   transition(vtos, vtos);
  1035   // stack: ..., array, index, value
  1036   __ ld(FSR, at_tos());     // Value
  1037   __ lw(SSR, at_tos_p1());  // Index
  1038   __ ld(T2, at_tos_p2());  // Array
  1040   // index_check(T2, SSR);
  1041   index_check_without_pop(T2, SSR);
  1042   // do array store check - check for NULL value first
  1043   __ beq(FSR, R0, is_null);
  1044   __ delayed()->nop();
  1046   // Move subklass into T3
  1047   //add for compressedoops
  1048   __ load_klass(T3, FSR);
  1049   // Move superklass into T8
  1050   //add for compressedoops
  1051   __ load_klass(T8, T2);
  1052   __ ld(T8, Address(T8,  ObjArrayKlass::element_klass_offset()));
  1053   // Compress array+index*4+12 into a single register. T2
  1054   __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8);
  1055   __ dadd(T2, T2, AT);
  1056   __ daddi(T2, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
  1058   // Generate subtype check.
  1059   // Superklass in T8.  Subklass in T3.
  1060   __ gen_subtype_check(T8, T3, ok_is_subtype);        // <-- Jin
  1061   // Come here on failure
  1062   // object is at FSR
  1063   __ jmp(Interpreter::_throw_ArrayStoreException_entry);    // <-- Jin
  1064   __ delayed()->nop();
  1065   // Come here on success
  1066   __ bind(ok_is_subtype);
  1067   //replace with do_oop_store->store_heap_oop
  1068   __ store_heap_oop(Address(T2, 0), FSR);          // <-- Jin
  1069   __ store_check(T2);
  1070   __ b(done);
  1071   __ delayed()->nop();
  1073   // Have a NULL in FSR, EDX=T2, SSR=index.  Store NULL at ary[idx]
  1074   __ bind(is_null);
  1075   __ profile_null_seen(T9);
  1076   __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8);
  1077   __ dadd(T2, T2, AT);
  1078   __ store_heap_oop(Address(T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), FSR);  /* FSR is null here */
  1080   __ bind(done);
  1081   __ daddi(SP, SP, 3 * Interpreter::stackElementSize);
  1084 void TemplateTable::bastore() {
  1085   transition(itos, vtos);
  1086   __ pop_i(SSR);
  1087   if(UseBoundCheckInstruction) {
  1088     __ pop_ptr(T2);
  1089     __ dadd(SSR, T2, SSR);
  1090     __ addi(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));  // base
  1092     __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
  1093     __ dadd(AT, T2, AT);
  1094     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE));  //bound
  1096     __ gssble(FSR, SSR, AT);
  1097   } else {
  1098     index_check(T2, SSR);
  1099     __ dadd(SSR, T2, SSR);
  1100     __ sb(FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));
  1104 void TemplateTable::castore() {
  1105   transition(itos, vtos);
  1106   __ pop_i(SSR);
  1107   if(UseBoundCheckInstruction) {
  1108     __ pop_ptr(T2);
  1109     __ dsll(SSR, SSR, Address::times_2);
  1110     __ dadd(SSR, T2, SSR);
  1111     __ addi(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR));  // base
  1113     __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
  1114     __ dsll(AT, AT, Address::times_2);
  1115     __ dadd(AT, T2, AT);
  1116     __ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(T_CHAR));  //bound
  1118     __ gsshle(FSR, SSR, AT);
  1119   } else {
  1120     index_check(T2, SSR);
  1121     __ dsll(SSR, SSR, Address::times_2);
  1122     __ dadd(SSR, T2, SSR);
  1123     __ sh(FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR));
  1127 void TemplateTable::sastore() {
  1128   castore();
  1131 void TemplateTable::istore(int n) {
  1132   transition(itos, vtos);
  1133   __ sw(FSR, iaddress(n));
  1136 void TemplateTable::lstore(int n) {
  1137   transition(ltos, vtos);
  1138   __ sd(FSR, laddress(n));
  1141 void TemplateTable::fstore(int n) {
  1142   transition(ftos, vtos);
  1143   __ swc1(FSF, faddress(n));
  1146 void TemplateTable::dstore(int n) {
  1147   transition(dtos, vtos);
  1148   __ sdc1(FSF, laddress(n));
  1151 void TemplateTable::astore(int n) {
  1152   transition(vtos, vtos);
  1153   __ pop_ptr(FSR);
  1154   __ sd(FSR, aaddress(n));
  1157 void TemplateTable::pop() {
  1158   transition(vtos, vtos);
  1159   __ daddi(SP, SP, Interpreter::stackElementSize);
  1162 void TemplateTable::pop2() {
  1163   transition(vtos, vtos);
  1164   __ daddi(SP, SP, 2 * Interpreter::stackElementSize);
  1167 void TemplateTable::dup() {
  1168   transition(vtos, vtos);
  1169   // stack: ..., a
  1170   __ load_ptr(0, FSR);
  1171   __ push_ptr(FSR);
  1172   // stack: ..., a, a
  1175 // blows FSR
  1176 void TemplateTable::dup_x1() {
  1177   transition(vtos, vtos);
  1178   // stack: ..., a, b
  1179   __ load_ptr(0, FSR);  // load b
  1180   __ load_ptr(1, A5);  // load a
  1181   __ store_ptr(1, FSR); // store b
  1182   __ store_ptr(0, A5); // store a
  1183   __ push_ptr(FSR);             // push b
  1184   // stack: ..., b, a, b
  1187 // blows FSR
  1188 void TemplateTable::dup_x2() {
  1189   transition(vtos, vtos);
  1190   // stack: ..., a, b, c
  1191   __ load_ptr(0, FSR);  // load c
  1192   __ load_ptr(2, A5);  // load a
  1193   __ store_ptr(2, FSR); // store c in a
  1194   __ push_ptr(FSR);             // push c
  1195   // stack: ..., c, b, c, c
  1196   __ load_ptr(2, FSR);  // load b
  1197   __ store_ptr(2, A5); // store a in b
  1198   // stack: ..., c, a, c, c
  1199   __ store_ptr(1, FSR); // store b in c
  1200   // stack: ..., c, a, b, c
  1203 // blows FSR
  1204 void TemplateTable::dup2() {
  1205   transition(vtos, vtos);
  1206   // stack: ..., a, b
  1207   __ load_ptr(1, FSR);  // load a
  1208   __ push_ptr(FSR);             // push a
  1209   __ load_ptr(1, FSR);  // load b
  1210   __ push_ptr(FSR);             // push b
  1211   // stack: ..., a, b, a, b
  1214 // blows FSR
  1215 void TemplateTable::dup2_x1() {
  1216   transition(vtos, vtos);
  1217   // stack: ..., a, b, c
  1218   __ load_ptr(0, T2);  // load c
  1219   __ load_ptr(1, FSR);  // load b
  1220   __ push_ptr(FSR);             // push b
  1221   __ push_ptr(T2);             // push c
  1222   // stack: ..., a, b, c, b, c
  1223   __ store_ptr(3, T2); // store c in b
  1224   // stack: ..., a, c, c, b, c
  1225   __ load_ptr(4, T2);  // load a
  1226   __ store_ptr(2, T2); // store a in 2nd c
  1227   // stack: ..., a, c, a, b, c
  1228   __ store_ptr(4, FSR); // store b in a
  1229   // stack: ..., b, c, a, b, c
  1231   // stack: ..., b, c, a, b, c
  1234 // blows FSR, SSR
  1235 void TemplateTable::dup2_x2() {
  1236   transition(vtos, vtos);
  1237   // stack: ..., a, b, c, d
  1238   // stack: ..., a, b, c, d
  1239   __ load_ptr(0, T2);  // load d
  1240   __ load_ptr(1, FSR);  // load c
  1241   __ push_ptr(FSR);             // push c
  1242   __ push_ptr(T2);             // push d
  1243   // stack: ..., a, b, c, d, c, d
  1244   __ load_ptr(4, FSR);  // load b
  1245   __ store_ptr(2, FSR); // store b in d
  1246   __ store_ptr(4, T2); // store d in b
  1247   // stack: ..., a, d, c, b, c, d
  1248   __ load_ptr(5, T2);  // load a
  1249   __ load_ptr(3, FSR);  // load c
  1250   __ store_ptr(3, T2); // store a in c
  1251   __ store_ptr(5, FSR); // store c in a
  1252   // stack: ..., c, d, a, b, c, d
  1254   // stack: ..., c, d, a, b, c, d
  1257 // blows FSR
  1258 void TemplateTable::swap() {
  1259   transition(vtos, vtos);
  1260   // stack: ..., a, b
  1262   __ load_ptr(1, A5);  // load a
  1263   __ load_ptr(0, FSR);  // load b
  1264   __ store_ptr(0, A5); // store a in b
  1265   __ store_ptr(1, FSR); // store b in a
  1267   // stack: ..., b, a
  1270 void TemplateTable::iop2(Operation op) {
  1271   transition(itos, itos);
  1272   switch (op) {
  1273     case add  :
  1274       __ pop_i(SSR);
  1275       __ addu32(FSR, SSR, FSR);
  1276       break;
  1277     case sub  :
  1278       __ pop_i(SSR);
  1279       __ subu32(FSR, SSR, FSR);
  1280       break;
  1281     case mul  :
  1282       __ lw(SSR, SP, 0);
  1283       __ daddi(SP, SP, wordSize);
  1284                         __ mul(FSR, SSR, FSR);
  1285       break;
  1286     case _and :
  1287       __ pop_i(SSR);
  1288       __ andr(FSR, SSR, FSR);
  1289       break;
  1290     case _or  :
  1291       __ pop_i(SSR);
  1292       __ orr(FSR, SSR, FSR);
  1293       break;
  1294     case _xor :
  1295       __ pop_i(SSR);
  1296       __ xorr(FSR, SSR, FSR);
  1297       break;
  1298     case shl  :
  1299       __ pop_i(SSR);
  1300       __ sllv(FSR, SSR, FSR);
  1301       break; // implicit masking of lower 5 bits by Intel shift instr. mips also
  1302     case shr  :
  1303       __ pop_i(SSR);
  1304       __ srav(FSR, SSR, FSR);
  1305       break; // implicit masking of lower 5 bits by Intel shift instr. mips also
  1306     case ushr :
  1307       __ pop_i(SSR);
  1308       __ srlv(FSR, SSR, FSR);
  1309       break; // implicit masking of lower 5 bits by Intel shift instr. mips also
  1310     default   : ShouldNotReachHere();
  1314 // the result stored in FSR, SSR,
  1315 // used registers : T2, T3
  1316 void TemplateTable::lop2(Operation op) {
  1317   transition(ltos, ltos);
  1318   __ pop_l(T2, T3);
  1319 #ifdef ASSERT
  1321     Label  L;
  1322     __ beq(T3, R0, L);
  1323     __ delayed()->nop();
  1324     __ bind(L);
  1326 #endif
  1327   switch (op) {
  1328     case add :
  1329       __ daddu(FSR, T2, FSR);
  1330       break;
  1331     case sub :
  1332       __ dsubu(FSR, T2, FSR);
  1333       break;
  1334     case _and:
  1335       __ andr(FSR, T2, FSR);
  1336       break;
  1337     case _or :
  1338       __ orr(FSR, T2, FSR);
  1339       break;
  1340     case _xor:
  1341       __ xorr(FSR, T2, FSR);
  1342       break;
  1343     default : ShouldNotReachHere();
  1347 // java require this bytecode could handle 0x80000000/-1, dont cause a overflow exception,
  1348 // the result is 0x80000000
  1349 // the godson2 cpu do the same, so we need not handle this specially like x86
  1350 void TemplateTable::idiv() {
  1351   transition(itos, itos);
  1352   Label not_zero;
  1354   __ bne(FSR, R0, not_zero);
  1355   __ delayed()->nop();
  1356   __ jmp(Interpreter::_throw_ArithmeticException_entry);
  1357   __ delayed()->nop();
  1358   __ bind(not_zero);
  1360   __ pop_i(SSR);
  1361   if (UseLoongsonISA) {
  1362     __ gsdiv(FSR, SSR, FSR);
  1363   } else {
  1364     __ div(SSR, FSR);
  1365     __ mflo(FSR);
  1369 void TemplateTable::irem() {
  1370   transition(itos, itos);
  1371   Label not_zero;
  1372   __ pop_i(SSR);
  1373   __ div(SSR, FSR);
  1375   __ bne(FSR, R0, not_zero);
  1376   __ delayed()->nop();
  1377   //__ brk(7);
  1378   __ jmp(Interpreter::_throw_ArithmeticException_entry);
  1379   __ delayed()->nop();
  1381   __ bind(not_zero);
  1382   __ mfhi(FSR);
  1385 void TemplateTable::lmul() {
  1386   transition(ltos, ltos);
  1387   __ pop_l(T2);
  1388   if(UseLoongsonISA){
  1389     __ gsdmult(FSR, T2, FSR);
  1390   } else {
  1391     __ dmult(T2, FSR);
  1392     __ mflo(FSR);
  1396 // NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
  1397 void TemplateTable::ldiv() {
  1398   transition(ltos, ltos);
  1399   Label normal;
  1401   __ bne(FSR, R0, normal);
  1402   __ delayed()->nop();
  1404   //__ brk(7);    //generate FPE
  1405   __ jmp(Interpreter::_throw_ArithmeticException_entry);
  1406   __ delayed()->nop();
  1408   __ bind(normal);
  1409   __ pop_l(A2, A3);
  1410   if (UseLoongsonISA) {
  1411     __ gsddiv(FSR, A2, FSR);
  1412   } else {
  1413     __ ddiv(A2, FSR);
  1414     __ mflo(FSR);
  1418 // NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
  1419 void TemplateTable::lrem() {
  1420   transition(ltos, ltos);
  1421   Label normal;
  1423   __ bne(FSR, R0, normal);
  1424   __ delayed()->nop();
  1426   __ jmp(Interpreter::_throw_ArithmeticException_entry);
  1427   __ delayed()->nop();
  1429   __ bind(normal);
  1430   __ pop_l (A2, A3);
  1432   if(UseLoongsonISA){
  1433     __ gsdmod(FSR, A2, FSR);
  1434   } else {
  1435     __ ddiv(A2, FSR);
  1436     __ mfhi(FSR);
  1440 // result in FSR
  1441 // used registers : T0
  1442 void TemplateTable::lshl() {
  1443   transition(itos, ltos);
  1444   __ pop_l(T0, T1);
  1445 #ifdef ASSERT
  1447     Label  L;
  1448     __ beq(T1, R0, L);
  1449     __ delayed()->nop();
  1450     //__ stop("lshl, wrong stack");  // <-- Fu 20130930
  1451     __ bind(L);
  1453 #endif
  1454   __ andi(FSR, FSR, 0x3f);        // the bit to be shifted
  1455   __ dsllv(FSR, T0, FSR);
  1458 // used registers : T0
  1459 void TemplateTable::lshr() {
  1460   transition(itos, ltos);
  1461   __ pop_l(T0, T1);
  1462 #ifdef ASSERT
  1464     Label  L;
  1465     __ beq(T1, R0, L);
  1466     __ delayed()->nop();
  1467     __ stop("lshr, wrong stack");
  1468     __ bind(L);
  1470 #endif
  1471   __ andi(FSR, FSR, 0x3f);        // the bit to be shifted
  1472   __ dsrav(FSR, T0, FSR);
  1475 // used registers : T0
  1476 void TemplateTable::lushr() {
  1477   transition(itos, ltos);
  1478   __ pop_l(T0, T1);
  1479 #ifdef ASSERT
  1481     Label  L;
  1482     __ beq(T1, R0, L);
  1483     __ delayed()->nop();
  1484     __ stop("lushr, wrong stack");
  1485     __ bind(L);
  1487 #endif
  1488   __ andi(FSR, FSR, 0x3f);        // the bit to be shifted
  1489   __ dsrlv(FSR, T0, FSR);
  1492 // result in FSF
  1493 void TemplateTable::fop2(Operation op) {
  1494   transition(ftos, ftos);
  1495   switch (op) {
  1496     case add:
  1497       __ lwc1(FTF, at_sp());
  1498       __ add_s(FSF, FTF, FSF);
  1499       break;
  1500     case sub:
  1501       __ lwc1(FTF, at_sp());
  1502       __ sub_s(FSF, FTF, FSF);
  1503       break;
  1504     case mul:
  1505       __ lwc1(FTF, at_sp());
  1506       __ mul_s(FSF, FTF, FSF);
  1507       break;
  1508     case div:
  1509       __ lwc1(FTF, at_sp());
  1510       __ div_s(FSF, FTF, FSF);
  1511       break;
  1512     case rem:
  1513       __ mov_s(F13, FSF);
  1514       __ lwc1(F12, at_sp());
  1515        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2);
  1516       break;
  1517     default : ShouldNotReachHere();
  1520   __ daddi(SP, SP, 1 * wordSize);
  1523 // result in SSF||FSF
  1524 // i dont handle the strict flags
  1525 void TemplateTable::dop2(Operation op) {
  1526   transition(dtos, dtos);
  1527   switch (op) {
  1528     case add:
  1529       __ ldc1(FTF, at_sp());
  1530       __ add_d(FSF, FTF, FSF);
  1531       break;
  1532     case sub:
  1533       __ ldc1(FTF, at_sp());
  1534       __ sub_d(FSF, FTF, FSF);
  1535       break;
  1536     case mul:
  1537       __ ldc1(FTF, at_sp());
  1538       __ mul_d(FSF, FTF, FSF);
  1539       break;
  1540     case div:
  1541       __ ldc1(FTF, at_sp());
  1542       __ div_d(FSF, FTF, FSF);
  1543       break;
  1544     case rem:
  1545       __ mov_d(F13, FSF);
  1546       __ ldc1(F12, at_sp());
  1547       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2);
  1548       break;
  1549     default : ShouldNotReachHere();
  1552   __ daddi(SP, SP, 2 * wordSize);
  1555 void TemplateTable::ineg() {
  1556   transition(itos, itos);
  1557   __ neg(FSR);
  1560 void TemplateTable::lneg() {
  1561   transition(ltos, ltos);
  1562   __ dsubu(FSR, R0, FSR);
  1565 void TemplateTable::fneg() {
  1566   transition(ftos, ftos);
  1567   __ neg_s(FSF, FSF);
  1570 void TemplateTable::dneg() {
  1571   transition(dtos, dtos);
  1572   __ neg_d(FSF, FSF);
  1575 // used registers : T2
  1576 void TemplateTable::iinc() {
  1577   transition(vtos, vtos);
  1578   locals_index(T2);
  1579   __ lw(FSR, T2, 0);
  1580   __ lb(AT, at_bcp(2));           // get constant
  1581   __ daddu(FSR, FSR, AT);
  1582   __ sw(FSR, T2, 0);
  1585 // used register : T2
  1586 void TemplateTable::wide_iinc() {
  1587   transition(vtos, vtos);
  1588   locals_index_wide(T2);
  1589   __ get_2_byte_integer_at_bcp(FSR, AT, 4);
  1590   __ hswap(FSR);
  1591   __ lw(AT, T2, 0);
  1592   __ daddu(FSR, AT, FSR);
  1593   __ sw(FSR, T2, 0);
  1596 void TemplateTable::convert() {
  1597   // Checking
  1598 #ifdef ASSERT
  1600     TosState tos_in  = ilgl;
  1601     TosState tos_out = ilgl;
  1602     switch (bytecode()) {
  1603       case Bytecodes::_i2l: // fall through
  1604       case Bytecodes::_i2f: // fall through
  1605       case Bytecodes::_i2d: // fall through
  1606       case Bytecodes::_i2b: // fall through
  1607       case Bytecodes::_i2c: // fall through
  1608       case Bytecodes::_i2s: tos_in = itos; break;
  1609       case Bytecodes::_l2i: // fall through
  1610       case Bytecodes::_l2f: // fall through
  1611       case Bytecodes::_l2d: tos_in = ltos; break;
  1612       case Bytecodes::_f2i: // fall through
  1613       case Bytecodes::_f2l: // fall through
  1614       case Bytecodes::_f2d: tos_in = ftos; break;
  1615       case Bytecodes::_d2i: // fall through
  1616       case Bytecodes::_d2l: // fall through
  1617       case Bytecodes::_d2f: tos_in = dtos; break;
  1618       default             : ShouldNotReachHere();
  1620     switch (bytecode()) {
  1621       case Bytecodes::_l2i: // fall through
  1622       case Bytecodes::_f2i: // fall through
  1623       case Bytecodes::_d2i: // fall through
  1624       case Bytecodes::_i2b: // fall through
  1625       case Bytecodes::_i2c: // fall through
  1626       case Bytecodes::_i2s: tos_out = itos; break;
  1627       case Bytecodes::_i2l: // fall through
  1628       case Bytecodes::_f2l: // fall through
  1629       case Bytecodes::_d2l: tos_out = ltos; break;
  1630       case Bytecodes::_i2f: // fall through
  1631       case Bytecodes::_l2f: // fall through
  1632       case Bytecodes::_d2f: tos_out = ftos; break;
  1633       case Bytecodes::_i2d: // fall through
  1634       case Bytecodes::_l2d: // fall through
  1635       case Bytecodes::_f2d: tos_out = dtos; break;
  1636       default             : ShouldNotReachHere();
  1638     transition(tos_in, tos_out);
  1640 #endif // ASSERT
  1642   // Conversion
  1643   // (Note: use pushl(ecx)/popl(ecx) for 1/2-word stack-ptr manipulation)
  1644   switch (bytecode()) {
  1645     case Bytecodes::_i2l:
  1646       __ sll(FSR, FSR, 0);
  1647       break;
  1648     case Bytecodes::_i2f:
  1649       __ mtc1(FSR, FSF);
  1650       __ cvt_s_w(FSF, FSF);
  1651       break;
  1652     case Bytecodes::_i2d:
  1653       __ mtc1(FSR, FSF);
  1654       __ cvt_d_w(FSF, FSF);
  1655       break;
  1656     case Bytecodes::_i2b:
  1657       __ seb(FSR, FSR);
  1658       break;
  1659     case Bytecodes::_i2c:
  1660       __ andi(FSR, FSR, 0xFFFF);  // truncate upper 56 bits
  1661       break;
  1662     case Bytecodes::_i2s:
  1663       __ seh(FSR, FSR);
  1664       break;
  1665     case Bytecodes::_l2i:
  1666       __ sll(FSR, FSR, 0);
  1667       break;
  1668     case Bytecodes::_l2f:
  1669       __ dmtc1(FSR, FSF);
  1670       __ cvt_s_l(FSF, FSF);
  1671       break;
  1672     case Bytecodes::_l2d:
  1673       __ dmtc1(FSR, FSF);
  1674       __ cvt_d_l(FSF, FSF);
  1675       break;
  1676     case Bytecodes::_f2i:
  1678       Label L;
  1680       __ trunc_w_s(F12, FSF);
  1681       __ move(AT, 0x7fffffff);
  1682       __ mfc1(FSR, F12);
  1683       __ c_un_s(FSF, FSF);    //NaN?
  1684       __ movt(FSR, R0);
  1686       __ bne(AT, FSR, L);
  1687       __ delayed()->lui(T9, 0x8000);
  1689       __ mfc1(AT, FSF);
  1690       __ andr(AT, AT, T9);
  1692       __ movn(FSR, T9, AT);
  1694       __ bind(L);
  1696       break;
  1697     case Bytecodes::_f2l:
  1699       Label L;
  1701       __ trunc_l_s(F12, FSF);
  1702       __ daddiu(AT, R0, -1);
  1703       __ dsrl(AT, AT, 1);
  1704       __ dmfc1(FSR, F12);
  1705       __ c_un_s(FSF, FSF);    //NaN?
  1706       __ movt(FSR, R0);
  1708       __ bne(AT, FSR, L);
  1709       __ delayed()->lui(T9, 0x8000);
  1711       __ mfc1(AT, FSF);
  1712       __ andr(AT, AT, T9);
  1714       __ dsll32(T9, T9, 0);
  1715       __ movn(FSR, T9, AT);
  1717       __ bind(L);
  1719       break;
  1720     case Bytecodes::_f2d:
  1721       __ cvt_d_s(FSF, FSF);
  1722       break;
  1723     case Bytecodes::_d2i:
  1725       Label L;
  1727       __ trunc_w_d(F12, FSF);
  1728       __ move(AT, 0x7fffffff);
  1729       __ mfc1(FSR, F12);
  1731       __ bne(FSR, AT, L);
  1732       __ delayed()->mtc1(R0, F12);
  1734       __ cvt_d_w(F12, F12);
  1735       __ c_ult_d(FSF, F12);
  1736       __ bc1f(L);
  1737       __ delayed()->addiu(T9, R0, -1);
  1739       __ c_un_d(FSF, FSF);    //NaN?
  1740       __ subu32(FSR, T9, AT);
  1741       __ movt(FSR, R0);
  1743       __ bind(L);
  1745       break;
  1746     case Bytecodes::_d2l:
  1748       Label L;
  1750       __ trunc_l_d(F12, FSF);
  1751       __ daddiu(AT, R0, -1);
  1752       __ dsrl(AT, AT, 1);
  1753       __ dmfc1(FSR, F12);
  1755       __ bne(FSR, AT, L);
  1756       __ delayed()->mtc1(R0, F12);
  1758       __ cvt_d_w(F12, F12);
  1759       __ c_ult_d(FSF, F12);
  1760       __ bc1f(L);
  1761       __ delayed()->daddiu(T9, R0, -1);
  1763       __ c_un_d(FSF, FSF);    //NaN?
  1764       __ subu(FSR, T9, AT);
  1765       __ movt(FSR, R0);
  1767     __ bind(L);
  1769       break;
  1770     case Bytecodes::_d2f:
  1771       __ cvt_s_d(FSF, FSF);
  1772       break;
  1773     default             :
  1774       ShouldNotReachHere();
  1778 void TemplateTable::lcmp() {
  1779   transition(ltos, itos);
  1781   Label low, high, done;
  1782   __ pop(T0);
  1783   __ pop(R0);
  1784   __ slt(AT, T0, FSR);
  1785   __ bne(AT, R0, low);
  1786   __ delayed()->nop();
  1788   __ bne(T0, FSR, high);
  1789   __ delayed()->nop();
  1791   __ li(FSR, (long)0);
  1792   __ b(done);
  1793   __ delayed()->nop();
  1795   __ bind(low);
  1796   __ li(FSR, (long)-1);
  1797   __ b(done);
  1798   __ delayed()->nop();
  1800   __ bind(high);
  1801   __ li(FSR, (long)1);
  1802   __ b(done);
  1803   __ delayed()->nop();
  1805   __ bind(done);
  1808 void TemplateTable::float_cmp(bool is_float, int unordered_result) {
  1809   Label less, done;
  1811   __ move(FSR, R0);
  1813   if (is_float) {
  1814     __ lwc1(FTF, at_sp());
  1815     __ c_eq_s(FTF, FSF);
  1816     __ bc1t(done);
  1817     __ delayed()->daddi(SP, SP, 1 * wordSize);
  1819     if (unordered_result<0)
  1820       __ c_ult_s(FTF, FSF);
  1821     else
  1822       __ c_olt_s(FTF, FSF);
  1823   } else {
  1824     __ ldc1(FTF, at_sp());
  1825     __ c_eq_d(FTF, FSF);
  1826     __ bc1t(done);
  1827     __ delayed()->daddi(SP, SP, 2 * wordSize);
  1829     if (unordered_result<0)
  1830       __ c_ult_d(FTF, FSF);
  1831     else
  1832       __ c_olt_d(FTF, FSF);
  1834   __ bc1t(less);
  1835   __ delayed()->nop();
  1836   __ move(FSR, 1);
  1837   __ b(done);
  1838   __ delayed()->nop();
  1839   __ bind(less);
  1840   __ move(FSR, -1);
  1841   __ bind(done);
  1845 // used registers : T3, A7, Rnext
  1846 // FSR : return bci, this is defined by the vm specification
  1847 // T2 : MDO taken count
  1848 // T3 : method
  1849 // A7 : offset
  1850 // Rnext : next bytecode, this is required by dispatch_base
  1851 void TemplateTable::branch(bool is_jsr, bool is_wide) {
  1852   __ get_method(T3);
  1853   __ profile_taken_branch(A7, T2);    // only C2 meaningful
  1855 #ifndef CORE
  1856   const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
  1857                              InvocationCounter::counter_offset();
  1858   const ByteSize inv_offset = MethodCounters::invocation_counter_offset() +
  1859                               InvocationCounter::counter_offset();
  1860 #endif // CORE
  1862   // Load up T4 with the branch displacement
  1863   if (!is_wide) {
  1864     __ get_2_byte_integer_at_bcp(A7, AT, 1);
  1865     __ hswap(A7);
  1866   } else {
  1867     __ get_4_byte_integer_at_bcp(A7, AT, 1);
  1868     __ swap(A7);
  1871   // Handle all the JSR stuff here, then exit.
  1872   // It's much shorter and cleaner than intermingling with the non-JSR
  1873   // normal-branch stuff occuring below.
  1874   if (is_jsr) {
  1875     // Pre-load the next target bytecode into Rnext
  1876     __ dadd(AT, BCP, A7);
  1877     __ lbu(Rnext, AT, 0);
  1879     // compute return address as bci in FSR
  1880     __ daddi(FSR, BCP, (is_wide?5:3) - in_bytes(ConstMethod::codes_offset()));
  1881     __ ld(AT, T3, in_bytes(Method::const_offset()));
  1882     __ dsub(FSR, FSR, AT);
  1883     // Adjust the bcp in BCP by the displacement in A7
  1884     __ dadd(BCP, BCP, A7);
  1885     // jsr returns atos that is not an oop
  1886     // Push return address
  1887     __ push_i(FSR);
  1888     // jsr returns vtos
  1889     __ dispatch_only_noverify(vtos);
  1891     return;
  1894   // Normal (non-jsr) branch handling
  1896   // Adjust the bcp in S0 by the displacement in T4
  1897   __ dadd(BCP, BCP, A7);
  1899 #ifdef CORE
  1900   // Pre-load the next target bytecode into EBX
  1901   __ lbu(Rnext, BCP, 0);
  1902   // continue with the bytecode @ target
  1903   __ dispatch_only(vtos);
  1904 #else
  1905   assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters");
  1906   Label backedge_counter_overflow;
  1907   Label profile_method;
  1908   Label dispatch;
  1909   if (UseLoopCounter) {
  1910     // increment backedge counter for backward branches
  1911     // eax: MDO
  1912     // ebx: MDO bumped taken-count
  1913     // T3: method
  1914     // T4: target offset
  1915     // BCP: target bcp
  1916     // LVP: locals pointer
  1917     __ bgtz(A7, dispatch);  // check if forward or backward branch
  1918     __ delayed()->nop();
  1920     // check if MethodCounters exists
  1921     Label has_counters;
  1922     __ ld(AT, T3, in_bytes(Method::method_counters_offset()));  // use AT as MDO, TEMP
  1923     __ bne(AT, R0, has_counters);
  1924     __ nop();
  1925     __ push(T3);
  1926     //__ push(A7);
  1927     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters),
  1928                T3);
  1929     //__ pop(A7);
  1930     __ pop(T3);
  1931     __ ld(AT, T3, in_bytes(Method::method_counters_offset()));  // use AT as MDO, TEMP
  1932     __ beq(AT, R0, dispatch);
  1933     __ nop();
  1934     __ bind(has_counters);
  1936     // increment back edge counter
  1937     __ ld(T1, T3, in_bytes(Method::method_counters_offset()));
  1938     __ lw(T0, T1, in_bytes(be_offset));
  1939     __ increment(T0, InvocationCounter::count_increment);
  1940     __ sw(T0, T1, in_bytes(be_offset));
  1942     // load invocation counter
  1943     __ lw(T1, T1, in_bytes(inv_offset));
  1944     // buffer bit added, mask no needed
  1946     // dadd backedge counter & invocation counter
  1947     __ dadd(T1, T1, T0);
  1949     if (ProfileInterpreter) {
  1950       // Test to see if we should create a method data oop
  1951       //__ lui(AT, Assembler::split_high(int(&InvocationCounter::InterpreterProfileLimit)));
  1952       //__ lw(AT, AT, Assembler::split_low(int(&InvocationCounter::InterpreterProfileLimit)));
  1953       // T1 : backedge counter & invocation counter
  1954       __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit);
  1955       __ lw(AT, AT, 0);
  1956       __ slt(AT, T1, AT);
  1957       __ bne(AT, R0, dispatch);
  1958       __ delayed()->nop();
  1960       // if no method data exists, go to profile method
  1961       __ test_method_data_pointer(T1, profile_method);
  1963       if (UseOnStackReplacement) {
  1964         // check for overflow against ebx which is the MDO taken count
  1965         __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
  1966         __ lw(AT, AT, 0);
  1967         // the value Rnext Is get from the beginning profile_taken_branch
  1968         __ slt(AT, T2, AT);
  1969         __ bne(AT, R0, dispatch);
  1970         __ delayed()->nop();
  1972         // When ProfileInterpreter is on, the backedge_count comes
  1973         // from the methodDataOop, which value does not get reset on
  1974         // the call to  frequency_counter_overflow().
  1975         // To avoid excessive calls to the overflow routine while
  1976         // the method is being compiled, dadd a second test to make
  1977         // sure the overflow function is called only once every
  1978         // overflow_frequency.
  1979         const int overflow_frequency = 1024;
  1980         __ andi(AT, T2, overflow_frequency-1);
  1981         __ beq(AT, R0, backedge_counter_overflow);
  1982         __ delayed()->nop();
  1984     } else {
  1985       if (UseOnStackReplacement) {
  1986         // check for overflow against eax, which is the sum of the counters
  1987         __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
  1988         __ lw(AT, AT, 0);
  1989         __ slt(AT, T1, AT);
  1990         __ beq(AT, R0, backedge_counter_overflow);
  1991         __ delayed()->nop();
  1994     __ bind(dispatch);
  1997   // Pre-load the next target bytecode into Rnext
  1998   __ lbu(Rnext, BCP, 0);
  2000   // continue with the bytecode @ target
  2001   // FSR: return bci for jsr's, unused otherwise
  2002   // Rnext: target bytecode
  2003   // BCP: target bcp
  2004   __ dispatch_only(vtos);
  2006   if (UseLoopCounter) {
  2007     if (ProfileInterpreter) {
  2008       // Out-of-line code to allocate method data oop.
  2009       __ bind(profile_method);
  2010       __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
  2011       __ lbu(Rnext, BCP, 0);
  2012       __ set_method_data_pointer_for_bcp();
  2013       __ b(dispatch);
  2014       __ delayed()->nop();
  2017     if (UseOnStackReplacement) {
  2018       // invocation counter overflow
  2019       __ bind(backedge_counter_overflow);
  2020       __ sub(A7, BCP, A7);  // branch bcp
  2021       call_VM(NOREG, CAST_FROM_FN_PTR(address,
  2022       InterpreterRuntime::frequency_counter_overflow), A7);
  2023       __ lbu(Rnext, BCP, 0);
  2025       // V0: osr nmethod (osr ok) or NULL (osr not possible)
  2026       // V1: osr adapter frame return address
  2027       // Rnext: target bytecode
  2028       // LVP: locals pointer
  2029       // BCP: bcp
  2030       __ beq(V0, R0, dispatch);
  2031       __ delayed()->nop();
  2032       // nmethod may have been invalidated (VM may block upon call_VM return)
  2033       __ lw(T3, V0, nmethod::entry_bci_offset());
  2034       __ move(AT, InvalidOSREntryBci);
  2035       __ beq(AT, T3, dispatch);
  2036       __ delayed()->nop();
  2037       // We need to prepare to execute the OSR method. First we must
  2038       // migrate the locals and monitors off of the stack.
  2039       //eax V0: osr nmethod (osr ok) or NULL (osr not possible)
  2040       //ebx V1: osr adapter frame return address
  2041       //edx  Rnext: target bytecode
  2042       //edi  LVP: locals pointer
  2043       //esi  BCP: bcp
  2044       __ move(BCP, V0);
  2045       // const Register thread = ecx;
  2046       const Register thread = TREG;
  2047 #ifndef OPT_THREAD
  2048       __ get_thread(thread);
  2049 #endif
  2050       call_VM(noreg, CAST_FROM_FN_PTR(address,
  2051       SharedRuntime::OSR_migration_begin));
  2052       // eax is OSR buffer, move it to expected parameter location
  2053       //refer to osrBufferPointer in c1_LIRAssembler_mips.cpp
  2054       __ move(T0, V0);
  2056       // pop the interpreter frame
  2057       __ ld(A7, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
  2058       //FIXME, shall we keep the return address on the stack?
  2059       __ leave();                                // remove frame anchor
  2060       __ move(LVP, RA);
  2061       __ move(SP, A7);
  2063       __ move(AT, -(StackAlignmentInBytes));
  2064       __ andr(SP , SP , AT);
  2066       // push the (possibly adjusted) return address
  2067       //refer to osr_entry in c1_LIRAssembler_mips.cpp
  2068       __ ld(AT, BCP, nmethod::osr_entry_point_offset());
  2069       __ jr(AT);
  2070       __ delayed()->nop();
  2073 #endif // not CORE
  2077 void TemplateTable::if_0cmp(Condition cc) {
  2078   transition(itos, vtos);
  2079   // assume branch is more often taken than not (loops use backward branches)
  2080   Label not_taken;
  2081   switch(cc) {
  2082     case not_equal:
  2083       __ beq(FSR, R0, not_taken);
  2084       break;
  2085     case equal:
  2086       __ bne(FSR, R0, not_taken);
  2087       break;
  2088     case less:
  2089       __ bgez(FSR, not_taken);
  2090       break;
  2091     case less_equal:
  2092       __ bgtz(FSR, not_taken);
  2093       break;
  2094     case greater:
  2095       __ blez(FSR, not_taken);
  2096       break;
  2097     case greater_equal:
  2098       __ bltz(FSR, not_taken);
  2099       break;
  2101   __ delayed()->nop();
  2103   branch(false, false);
  2105   __ bind(not_taken);
  2106   __ profile_not_taken_branch(FSR);
  2109 void TemplateTable::if_icmp(Condition cc) {
  2110   transition(itos, vtos);
  2111   // assume branch is more often taken than not (loops use backward branches)
  2112   Label not_taken;
  2114   __ pop_i(SSR);
  2115   switch(cc) {
  2116     case not_equal:
  2117       __ beq(SSR, FSR, not_taken);
  2118       break;
  2119     case equal:
  2120       __ bne(SSR, FSR, not_taken);
  2121       break;
  2122     case less:
  2123       __ slt(AT, SSR, FSR);
  2124       __ beq(AT, R0, not_taken);
  2125       break;
  2126     case less_equal:
  2127       __ slt(AT, FSR, SSR);
  2128       __ bne(AT, R0, not_taken);
  2129       break;
  2130     case greater:
  2131       __ slt(AT, FSR, SSR);
  2132       __ beq(AT, R0, not_taken);
  2133       break;
  2134     case greater_equal:
  2135       __ slt(AT, SSR, FSR);
  2136       __ bne(AT, R0, not_taken);
  2137       break;
  2139   __ delayed()->nop();
  2141   branch(false, false);
  2142   __ bind(not_taken);
  2143   __ profile_not_taken_branch(FSR);
  2146 void TemplateTable::if_nullcmp(Condition cc) {
  2147   transition(atos, vtos);
  2148   // assume branch is more often taken than not (loops use backward branches)
  2149   Label not_taken;
  2150   switch(cc) {
  2151     case not_equal:
  2152       __ beq(FSR, R0, not_taken);
  2153       break;
  2154     case equal:
  2155       __ bne(FSR, R0, not_taken);
  2156       break;
  2157     default:
  2158       ShouldNotReachHere();
  2160   __ delayed()->nop();
  2162   branch(false, false);
  2163   __ bind(not_taken);
  2164   __ profile_not_taken_branch(FSR);
  2168 void TemplateTable::if_acmp(Condition cc) {
  2169   transition(atos, vtos);
  2170   // assume branch is more often taken than not (loops use backward branches)
  2171   Label not_taken;
  2172   //  __ lw(SSR, SP, 0);
  2173   __ pop_ptr(SSR);
  2174   switch(cc) {
  2175     case not_equal:
  2176       __ beq(SSR, FSR, not_taken);
  2177       break;
  2178     case equal:
  2179       __ bne(SSR, FSR, not_taken);
  2180       break;
  2181     default:
  2182       ShouldNotReachHere();
  2184   __ delayed()->nop();
  2186   branch(false, false);
  2188   __ bind(not_taken);
  2189   __ profile_not_taken_branch(FSR);
  2192 // used registers : T1, T2, T3
  2193 // T1 : method
  2194 // T2 : returb bci
  2195 void TemplateTable::ret() {
  2196   transition(vtos, vtos);
  2198   locals_index(T2);
  2199   __ ld(T2, T2, 0);
  2200   __ profile_ret(T2, T3);
  2202   __ get_method(T1);
  2203   __ ld(BCP, T1, in_bytes(Method::const_offset()));
  2204   __ dadd(BCP, BCP, T2);
  2205   __ daddi(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
  2207   __ dispatch_next(vtos);
  2210 // used registers : T1, T2, T3
  2211 // T1 : method
  2212 // T2 : returb bci
  2213 void TemplateTable::wide_ret() {
  2214   transition(vtos, vtos);
  2216   locals_index_wide(T2);
  2217   __ ld(T2, T2, 0);                   // get return bci, compute return bcp
  2218   __ profile_ret(T2, T3);
  2220   __ get_method(T1);
  2221   __ ld(BCP, T1, in_bytes(Method::const_offset()));
  2222   __ dadd(BCP, BCP, T2);
  2223   __ daddi(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
  2225   __ dispatch_next(vtos);
  2228 // used register T2, T3, A7, Rnext
  2229 // T2 : bytecode pointer
  2230 // T3 : low
  2231 // A7 : high
  2232 // Rnext : dest bytecode, required by dispatch_base
  2233 void TemplateTable::tableswitch() {
  2234   Label default_case, continue_execution;
  2235   transition(itos, vtos);
  2237   // align BCP
  2238   __ daddi(T2, BCP, BytesPerInt);
  2239   __ li(AT, -BytesPerInt);
  2240   __ andr(T2, T2, AT);
  2242   // load lo & hi
  2243   __ lw(T3, T2, 1 * BytesPerInt);
  2244   __ swap(T3);
  2245   __ lw(A7, T2, 2 * BytesPerInt);
  2246   __ swap(A7);
  2248   // check against lo & hi
  2249   __ slt(AT, FSR, T3);
  2250   __ bne(AT, R0, default_case);
  2251   __ delayed()->nop();
  2253   __ slt(AT, A7, FSR);
  2254   __ bne(AT, R0, default_case);
  2255   __ delayed()->nop();
  2257   // lookup dispatch offset, in A7 big endian
  2258   __ dsub(FSR, FSR, T3);
  2259   __ dsll(AT, FSR, Address::times_4);
  2260   __ dadd(AT, T2, AT);
  2261   __ lw(A7, AT, 3 * BytesPerInt);
  2262   __ profile_switch_case(FSR, T9, T3);
  2264   __ bind(continue_execution);
  2265   __ swap(A7);
  2266   __ dadd(BCP, BCP, A7);
  2267   __ lbu(Rnext, BCP, 0);
  2268   __ dispatch_only(vtos);
  2270   // handle default
  2271   __ bind(default_case);
  2272   __ profile_switch_default(FSR);
  2273   __ lw(A7, T2, 0);
  2274   __ b(continue_execution);
  2275   __ delayed()->nop();
  2278 void TemplateTable::lookupswitch() {
  2279   transition(itos, itos);
  2280   __ stop("lookupswitch bytecode should have been rewritten");
  2283 // used registers : T2, T3, A7, Rnext
  2284 // T2 : bytecode pointer
  2285 // T3 : pair index
  2286 // A7 : offset
  2287 // Rnext : dest bytecode
  2288 // the data after the opcode is the same as lookupswitch
  2289 // see Rewriter::rewrite_method for more information
  2290 void TemplateTable::fast_linearswitch() {
  2291   transition(itos, vtos);
  2292   Label loop_entry, loop, found, continue_execution;
  2294   // swap eax so we can avoid swapping the table entries
  2295   __ swap(FSR);
  2297   // align BCP
  2298   __ daddi(T2, BCP, BytesPerInt);
  2299   __ li(AT, -BytesPerInt);
  2300   __ andr(T2, T2, AT);
  2302   // set counter
  2303   __ lw(T3, T2, BytesPerInt);
  2304   __ swap(T3);
  2305   __ b(loop_entry);
  2306   __ delayed()->nop();
  2308   // table search
  2309   __ bind(loop);
  2310   // get the entry value
  2311   __ dsll(AT, T3, Address::times_8);
  2312   __ dadd(AT, T2, AT);
  2313   __ lw(AT, AT, 2 * BytesPerInt);
  2315   // found?
  2316   __ beq(FSR, AT, found);
  2317   __ delayed()->nop();
  2319   __ bind(loop_entry);
  2320   __ bgtz(T3, loop);
  2321   __ delayed()->daddiu(T3, T3, -1);
  2323   // default case
  2324   __ profile_switch_default(FSR);
  2325   __ lw(A7, T2, 0);
  2326   __ b(continue_execution);
  2327   __ delayed()->nop();
  2329   // entry found -> get offset
  2330   __ bind(found);
  2331   __ dsll(AT, T3, Address::times_8);
  2332   __ dadd(AT, T2, AT);
  2333   __ lw(A7, AT, 3 * BytesPerInt);
  2334   __ profile_switch_case(T3, FSR, T2);
  2336   // continue execution
  2337   __ bind(continue_execution);
  2338   __ swap(A7);
  2339   __ dadd(BCP, BCP, A7);
  2340   __ lbu(Rnext, BCP, 0);
  2341   __ dispatch_only(vtos);
  2344 // used registers : T0, T1, T2, T3, A7, Rnext
  2345 // T2 : pairs address(array)
  2346 // Rnext : dest bytecode
  2347 // the data after the opcode is the same as lookupswitch
  2348 // see Rewriter::rewrite_method for more information
  2349 void TemplateTable::fast_binaryswitch() {
  2350   transition(itos, vtos);
  2351   // Implementation using the following core algorithm:
  2352   //
  2353   // int binary_search(int key, LookupswitchPair* array, int n) {
  2354   //   // Binary search according to "Methodik des Programmierens" by
  2355   //   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
  2356   //   int i = 0;
  2357   //   int j = n;
  2358   //   while (i+1 < j) {
  2359   //     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
  2360   //     // with      Q: for all i: 0 <= i < n: key < a[i]
  2361   //     // where a stands for the array and assuming that the (inexisting)
  2362   //     // element a[n] is infinitely big.
  2363   //     int h = (i + j) >> 1;
  2364   //     // i < h < j
  2365   //     if (key < array[h].fast_match()) {
  2366   //       j = h;
  2367   //     } else {
  2368   //       i = h;
  2369   //     }
  2370   //   }
  2371   //   // R: a[i] <= key < a[i+1] or Q
  2372   //   // (i.e., if key is within array, i is the correct index)
  2373   //   return i;
  2374   // }
  2376   // register allocation
  2377   const Register array = T2;
  2378   const Register i = T3, j = A7;
  2379   const Register h = T1;
  2380   const Register temp = T0;
  2381   const Register key = FSR;
  2383   // setup array
  2384   __ daddi(array, BCP, 3*BytesPerInt);
  2385   __ li(AT, -BytesPerInt);
  2386   __ andr(array, array, AT);
  2388   // initialize i & j
  2389   __ move(i, R0);
  2390   __ lw(j, array, - 1 * BytesPerInt);
  2391   // Convert j into native byteordering
  2392   __ swap(j);
  2394   // and start
  2395   Label entry;
  2396   __ b(entry);
  2397   __ delayed()->nop();
  2399   // binary search loop
  2401     Label loop;
  2402     __ bind(loop);
  2403     // int h = (i + j) >> 1;
  2404     __ dadd(h, i, j);
  2405     __ dsrl(h, h, 1);
  2406     // if (key < array[h].fast_match()) {
  2407     //   j = h;
  2408     // } else {
  2409     //   i = h;
  2410     // }
  2411     // Convert array[h].match to native byte-ordering before compare
  2412     __ dsll(AT, h, Address::times_8);
  2413     __ dadd(AT, array, AT);
  2414     __ lw(temp, AT, 0 * BytesPerInt);
  2415     __ swap(temp);
  2418       Label set_i, end_of_if;
  2419       __ slt(AT, key, temp);
  2420       __ beq(AT, R0, set_i);
  2421       __ delayed()->nop();
  2423       __ b(end_of_if);
  2424       __ delayed(); __ move(j, h);
  2426       __ bind(set_i);
  2427       __ move(i, h);
  2429       __ bind(end_of_if);
  2431     // while (i+1 < j)
  2432     __ bind(entry);
  2433     __ daddi(h, i, 1);
  2434     __ slt(AT, h, j);
  2435     __ bne(AT, R0, loop);
  2436     __ delayed()->nop();
  2439   // end of binary search, result index is i (must check again!)
  2440   Label default_case;
  2441   // Convert array[i].match to native byte-ordering before compare
  2442   __ dsll(AT, i, Address::times_8);
  2443   __ dadd(AT, array, AT);
  2444   __ lw(temp, AT, 0 * BytesPerInt);
  2445   __ swap(temp);
  2446   __ bne(key, temp, default_case);
  2447   __ delayed()->nop();
  2449   // entry found -> j = offset
  2450   __ dsll(AT, i, Address::times_8);
  2451   __ dadd(AT, array, AT);
  2452   __ lw(j, AT, 1 * BytesPerInt);
  2453   __ profile_switch_case(i, key, array);
  2454   __ swap(j);
  2456   __ dadd(BCP, BCP, j);
  2457   __ lbu(Rnext, BCP, 0);
  2458   __ dispatch_only(vtos);
  2460   // default case -> j = default offset
  2461   __ bind(default_case);
  2462   __ profile_switch_default(i);
  2463   __ lw(j, array, - 2 * BytesPerInt);
  2464   __ swap(j);
  2465   __ dadd(BCP, BCP, j);
  2466   __ lbu(Rnext, BCP, 0);
  2467   __ dispatch_only(vtos);
  2470 void TemplateTable::_return(TosState state) {
  2471   transition(state, state);
  2472   assert(_desc->calls_vm(),
  2473       "inconsistent calls_vm information"); // call in remove_activation
  2475   if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
  2476     assert(state == vtos, "only valid state");
  2477     __ ld(T1, aaddress(0));
  2478     __ load_klass(LVP, T1);
  2479     __ lw(LVP, LVP, in_bytes(Klass::access_flags_offset()));
  2480     __ move(AT, JVM_ACC_HAS_FINALIZER);
  2481     __ andr(AT, AT, LVP);//by_css
  2482     Label skip_register_finalizer;
  2483     __ beq(AT, R0, skip_register_finalizer);
  2484     __ delayed()->nop();
  2485     __ call_VM(noreg, CAST_FROM_FN_PTR(address,
  2486     InterpreterRuntime::register_finalizer), T1);
  2487     __ bind(skip_register_finalizer);
  2489   __ remove_activation(state, T9);
  2490   __ sync();
  2492   __ jr(T9);
  2493   __ delayed()->nop();
  2496 // ----------------------------------------------------------------------------
  2497 // Volatile variables demand their effects be made known to all CPU's
  2498 // in order.  Store buffers on most chips allow reads & writes to
  2499 // reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode
  2500 // without some kind of memory barrier (i.e., it's not sufficient that
  2501 // the interpreter does not reorder volatile references, the hardware
  2502 // also must not reorder them).
  2503 //
  2504 // According to the new Java Memory Model (JMM):
  2505 // (1) All volatiles are serialized wrt to each other.  ALSO reads &
  2506 //     writes act as aquire & release, so:
  2507 // (2) A read cannot let unrelated NON-volatile memory refs that
  2508 //     happen after the read float up to before the read.  It's OK for
  2509 //     non-volatile memory refs that happen before the volatile read to
  2510 //     float down below it.
  2511 // (3) Similar a volatile write cannot let unrelated NON-volatile
  2512 //     memory refs that happen BEFORE the write float down to after the
  2513 //     write.  It's OK for non-volatile memory refs that happen after the
  2514 //     volatile write to float up before it.
  2515 //
  2516 // We only put in barriers around volatile refs (they are expensive),
  2517 // not _between_ memory refs (that would require us to track the
  2518 // flavor of the previous memory refs).  Requirements (2) and (3)
  2519 // require some barriers before volatile stores and after volatile
  2520 // loads.  These nearly cover requirement (1) but miss the
  2521 // volatile-store-volatile-load case.  This final case is placed after
  2522 // volatile-stores although it could just as well go before
  2523 // volatile-loads.
  2524 //void TemplateTable::volatile_barrier(Assembler::Membar_mask_bits
  2525 //                                     order_constraint) {
  2526 void TemplateTable::volatile_barrier( ) {
  2527   // Helper function to insert a is-volatile test and memory barrier
  2528   //if (os::is_MP()) { // Not needed on single CPU
  2529   //  __ membar(order_constraint);
  2530   //}
  2531   if( !os::is_MP() ) return;  // Not needed on single CPU
  2532   __ sync();
  2535 // we dont shift left 2 bits in get_cache_and_index_at_bcp
  2536 // for we always need shift the index we use it. the ConstantPoolCacheEntry
  2537 // is 16-byte long, index is the index in
  2538 // ConstantPoolCache, so cache + base_offset() + index * 16 is
  2539 // the corresponding ConstantPoolCacheEntry
  2540 // used registers : T2
  2541 // NOTE : the returned index need also shift left 4 to get the address!
  2542 void TemplateTable::resolve_cache_and_index(int byte_no,
  2543                                             Register Rcache,
  2544                                             Register index,
  2545                                             size_t index_size) {
  2546   assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
  2547   const Register temp = A1;
  2548   assert_different_registers(Rcache, index);
  2550   Label resolved;
  2551   __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
  2552   // is resolved?
  2553   int i = (int)bytecode();
  2554   __ addi(temp, temp, -i);
  2555   __ beq(temp, R0, resolved);
  2556   __ delayed()->nop();
  2557   // resolve first time through
  2558   address entry;
  2559   switch (bytecode()) {
  2560     case Bytecodes::_getstatic      : // fall through
  2561     case Bytecodes::_putstatic      : // fall through
  2562     case Bytecodes::_getfield       : // fall through
  2563     case Bytecodes::_putfield       :
  2564       entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put);
  2565       break;
  2566     case Bytecodes::_invokevirtual  : // fall through
  2567     case Bytecodes::_invokespecial  : // fall through
  2568     case Bytecodes::_invokestatic   : // fall through
  2569     case Bytecodes::_invokeinterface:
  2570       entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);
  2571       break;
  2572     case Bytecodes::_invokehandle:
  2573       entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokehandle);
  2574       break;
  2575     case Bytecodes::_invokedynamic:
  2576       entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic);
  2577       break;
  2578     default                          :
  2579       fatal(err_msg("unexpected bytecode: %s", Bytecodes::name(bytecode())));
  2580       break;
  2583   __ move(temp, i);
  2584   __ call_VM(NOREG, entry, temp);
  2586   // Update registers with resolved info
  2587   __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
  2588   __ bind(resolved);
  2591 // The Rcache and index registers must be set before call
  2592 void TemplateTable::load_field_cp_cache_entry(Register obj,
  2593                                               Register cache,
  2594                                               Register index,
  2595                                               Register off,
  2596                                               Register flags,
  2597                                               bool is_static = false) {
  2598   assert_different_registers(cache, index, flags, off);
  2600   ByteSize cp_base_offset = ConstantPoolCache::base_offset();
  2601   // Field offset
  2602   __ dsll(AT, index, Address::times_ptr);
  2603   __ dadd(AT, cache, AT);
  2604   __ ld(off, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset()));
  2605   // Flags
  2606   __ ld(flags, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset()));
  2608   // klass overwrite register
  2609   if (is_static) {
  2610     __ ld(obj, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset()));
  2611     const int mirror_offset = in_bytes(Klass::java_mirror_offset());
  2612     __ ld(obj, Address(obj, mirror_offset));
  2614     __ verify_oop(obj);
  2618 // get the method, itable_index and flags of the current invoke
  2619 void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
  2620                                                Register method,
  2621                                                Register itable_index,
  2622                                                Register flags,
  2623                                                bool is_invokevirtual,
  2624                                                bool is_invokevfinal, /*unused*/
  2625                                                bool is_invokedynamic) {
  2626   // setup registers
  2627   const Register cache = T3;
  2628   const Register index = T1;
  2629   assert_different_registers(method, flags);
  2630   assert_different_registers(method, cache, index);
  2631   assert_different_registers(itable_index, flags);
  2632   assert_different_registers(itable_index, cache, index);
  2633   assert(is_invokevirtual == (byte_no == f2_byte), "is invokevirtual flag redundant");
  2634   // determine constant pool cache field offsets
  2635   const int method_offset = in_bytes(
  2636     ConstantPoolCache::base_offset() +
  2637       ((byte_no == f2_byte)
  2638        ? ConstantPoolCacheEntry::f2_offset()
  2639        : ConstantPoolCacheEntry::f1_offset()));
  2640   const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
  2641                                     ConstantPoolCacheEntry::flags_offset());
  2642   // access constant pool cache fields
  2643   const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
  2644                                     ConstantPoolCacheEntry::f2_offset());
  2646   size_t index_size = (is_invokedynamic ? sizeof(u4): sizeof(u2));
  2647   resolve_cache_and_index(byte_no, cache, index, index_size);
  2649   //assert(wordSize == 8, "adjust code below");
  2650   // note we shift 4 not 2, for we get is the true inde
  2651   // of ConstantPoolCacheEntry, not the shifted 2-bit index as x86 version
  2652   __ dsll(AT, index, Address::times_ptr);
  2653   __ dadd(AT, cache, AT);
  2654   __ ld(method, AT, method_offset);
  2656   if (itable_index != NOREG) {
  2657     __ ld(itable_index, AT, index_offset);
  2659   __ ld(flags, AT, flags_offset);
  2662 // The registers cache and index expected to be set before call.
  2663 // Correct values of the cache and index registers are preserved.
  2664 void TemplateTable::jvmti_post_field_access(Register cache, Register index,
  2665                                             bool is_static, bool has_tos) {
  2666   // do the JVMTI work here to avoid disturbing the register state below
  2667   // We use c_rarg registers here because we want to use the register used in
  2668   // the call to the VM
  2669   if (JvmtiExport::can_post_field_access()) {
  2670     // Check to see if a field access watch has been set before we
  2671     // take the time to call into the VM.
  2672     Label L1;
  2673     assert_different_registers(cache, index, FSR);
  2674     __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr());
  2675     __ lw(FSR, AT, 0);
  2676     __ beq(FSR, R0, L1);
  2677     __ delayed()->nop();
  2679     // We rely on the bytecode being resolved and the cpCache entry filled in.
  2680     // cache entry pointer
  2681     __ daddi(cache, cache, in_bytes(ConstantPoolCache::base_offset()));
  2682     __ shl(index, 4);
  2683     __ dadd(cache, cache, index);
  2684     if (is_static) {
  2685       __ move(FSR, R0);
  2686     } else {
  2687       __ lw(FSR, SP, 0);
  2688       __ verify_oop(FSR);
  2690     // FSR: object pointer or NULL
  2691     // cache: cache entry pointer
  2692     __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
  2693                                        InterpreterRuntime::post_field_access), FSR, cache);
  2694     __ get_cache_and_index_at_bcp(cache, index, 1);
  2695     __ bind(L1);
  2699 void TemplateTable::pop_and_check_object(Register r) {
  2700   __ pop_ptr(r);
  2701   __ null_check(r);  // for field access must check obj.
  2702   __ verify_oop(r);
  2705 // used registers : T1, T2, T3, T1
  2706 // T1 : flags
  2707 // T2 : off
  2708 // T3 : obj
  2709 // T1 : field address
  2710 // The flags 31, 30, 29, 28 together build a 4 bit number 0 to 8 with the
  2711 // following mapping to the TosState states:
  2712 // btos: 0
  2713 // ctos: 1
  2714 // stos: 2
  2715 // itos: 3
  2716 // ltos: 4
  2717 // ftos: 5
  2718 // dtos: 6
  2719 // atos: 7
  2720 // vtos: 8
  2721 // see ConstantPoolCacheEntry::set_field for more info
  2722 void TemplateTable::getfield_or_static(int byte_no, bool is_static) {
  2723   transition(vtos, vtos);
  2725   const Register cache = T3;
  2726   const Register index = T0;
  2728   const Register obj   = T3;
  2729   const Register off   = T2;
  2730   const Register flags = T1;
  2731   resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
  2732   //jvmti_post_field_access(cache, index, is_static, false);
  2733   load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
  2735   if (!is_static) pop_and_check_object(obj);
  2736   __ dadd(index, obj, off);
  2739   Label Done, notByte, notInt, notShort, notChar,
  2740               notLong, notFloat, notObj, notDouble;
  2742   assert(btos == 0, "change code, btos != 0");
  2743   __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
  2744   __ andi(flags, flags, 0xf);
  2745   __ bne(flags, R0, notByte);
  2746   __ delayed()->nop();
  2748   // btos
  2749   __ lb(FSR, index, 0);
  2750   __ sd(FSR, SP, - wordSize);
  2752   // Rewrite bytecode to be faster
  2753   if (!is_static) {
  2754     patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2);
  2756   __ b(Done);
  2757   __ delayed()->daddi(SP, SP, - wordSize);
  2759   __ bind(notByte);
  2760   __ move(AT, itos);
  2761   __ bne(flags, AT, notInt);
  2762   __ delayed()->nop();
  2764   // itos
  2765   __ lw(FSR, index, 0);
  2766   __ sd(FSR, SP, - wordSize);
  2768   // Rewrite bytecode to be faster
  2769   if (!is_static) {
  2770     // patch_bytecode(Bytecodes::_fast_igetfield, T3, T2);
  2771     patch_bytecode(Bytecodes::_fast_igetfield, T3, T2);
  2773   __ b(Done);
  2774   __ delayed()->daddi(SP, SP, - wordSize);
  2776   __ bind(notInt);
  2777   __ move(AT, atos);
  2778   __ bne(flags, AT, notObj);
  2779   __ delayed()->nop();
  2781   // atos
  2782   //add for compressedoops
  2783   __ load_heap_oop(FSR, Address(index, 0));
  2784   __ sd(FSR, SP, - wordSize);
  2786   if (!is_static) {
  2787     //patch_bytecode(Bytecodes::_fast_agetfield, T3, T2);
  2788     patch_bytecode(Bytecodes::_fast_agetfield, T3, T2);
  2790   __ b(Done);
  2791   __ delayed()->daddi(SP, SP, - wordSize);
  2793   __ bind(notObj);
  2794   __ move(AT, ctos);
  2795   __ bne(flags, AT, notChar);
  2796   __ delayed()->nop();
  2798   // ctos
  2799   __ lhu(FSR, index, 0);
  2800   __ sd(FSR, SP, - wordSize);
  2802   if (!is_static) {
  2803     patch_bytecode(Bytecodes::_fast_cgetfield, T3, T2);
  2805   __ b(Done);
  2806   __ delayed()->daddi(SP, SP, - wordSize);
  2808   __ bind(notChar);
  2809   __ move(AT, stos);
  2810   __ bne(flags, AT, notShort);
  2811   __ delayed()->nop();
  2813   // stos
  2814   __ lh(FSR, index, 0);
  2815   __ sd(FSR, SP, - wordSize);
  2817   if (!is_static) {
  2818     patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2);
  2820   __ b(Done);
  2821   __ delayed()->daddi(SP, SP, - wordSize);
  2823   __ bind(notShort);
  2824   __ move(AT, ltos);
  2825   __ bne(flags, AT, notLong);
  2826   __ delayed()->nop();
  2828   // FIXME : the load/store should be atomic, we have no simple method to do this in mips32
  2829   // ltos
  2830   __ ld(FSR, index, 0 * wordSize);
  2831   __ sd(FSR, SP, -2 * wordSize);
  2832   __ sd(R0, SP, -1 * wordSize);
  2834   // Don't rewrite to _fast_lgetfield for potential volatile case.
  2835   __ b(Done);
  2836   __ delayed()->daddi(SP, SP, - 2 * wordSize);
  2838   __ bind(notLong);
  2839   __ move(AT, ftos);
  2840   __ bne(flags, AT, notFloat);
  2841   __ delayed()->nop();
  2843   // ftos
  2844   __ lwc1(FSF, index, 0);
  2845   __ sdc1(FSF, SP, - wordSize);
  2847   if (!is_static) {
  2848     patch_bytecode(Bytecodes::_fast_fgetfield, T3, T2);
  2850   __ b(Done);
  2851   __ delayed()->daddi(SP, SP, - wordSize);
  2853   __ bind(notFloat);
  2854   __ move(AT, dtos);
  2855   __ bne(flags, AT, notDouble);
  2856   __ delayed()->nop();
  2858   // dtos
  2859   __ ldc1(FSF, index, 0 * wordSize);
  2860   __ sdc1(FSF, SP, - 2 * wordSize);
  2861   __ sd(R0, SP, - 1 * wordSize);
  2863   if (!is_static) {
  2864     patch_bytecode(Bytecodes::_fast_dgetfield, T3, T2);
  2866   __ b(Done);
  2867   __ delayed()->daddi(SP, SP, - 2 * wordSize);
  2869   __ bind(notDouble);
  2871   __ stop("Bad state");
  2873   __ bind(Done);
  2877 void TemplateTable::getfield(int byte_no) {
  2878   getfield_or_static(byte_no, false);
  2881 void TemplateTable::getstatic(int byte_no) {
  2882   getfield_or_static(byte_no, true);
  2885 // The registers cache and index expected to be set before call.
  2886 // The function may destroy various registers, just not the cache and index registers.
  2887 void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
  2888   ByteSize cp_base_offset = ConstantPoolCache::base_offset();
  2890   if (JvmtiExport::can_post_field_modification()) {
  2891     // Check to see if a field modification watch has been set before we take
  2892     // the time to call into the VM.
  2893     Label L1;
  2894     assert_different_registers(cache, index, AT);
  2896     __ li(AT, JvmtiExport::get_field_modification_count_addr());
  2897     __ lw(FSR, AT, 0);
  2898     __ beq(FSR, R0, L1);
  2899     __ delayed()->nop();
  2901     /* // We rely on the bytecode being resolved and the cpCache entry filled in.
  2902        resolve_cache_and_index(byte_no, T1, T1);
  2903        */
  2904     // The cache and index registers have been already set.
  2905     // This allows to eliminate this call but the cache and index
  2906     // registers have to be correspondingly used after this line.
  2907     __ get_cache_and_index_at_bcp(T1, T9, 1);
  2909     if (is_static) {
  2910       __ move(T2, R0);
  2911     } else {
  2912       // Life is harder. The stack holds the value on top,
  2913       // followed by the object.
  2914       // We don't know the size of the value, though;
  2915       // it could be one or two words
  2916       // depending on its type. As a result, we must find
  2917       // the type to determine where the object is.
  2918       Label two_word, valsize_known;
  2919       __ dsll(AT, T1, 4);
  2920       __ dadd(AT, T1, AT);
  2921       __ lw(T3, AT, in_bytes(cp_base_offset
  2922             + ConstantPoolCacheEntry::flags_offset()));
  2923       __ move(T2, SP);
  2924       __ shr(T3, ConstantPoolCacheEntry::tos_state_shift);
  2926       // Make sure we don't need to mask ecx for tos_state_shift
  2927       // after the above shift
  2928       ConstantPoolCacheEntry::verify_tos_state_shift();
  2929       __ move(AT, ltos);
  2930       __ beq(T3, AT, two_word);
  2931       __ delayed()->nop();
  2932       __ move(AT, dtos);
  2933       __ beq(T3, AT, two_word);
  2934       __ delayed()->nop();
  2935       __ b(valsize_known);
  2936       __ delayed()->daddi(T2, T2,Interpreter::expr_offset_in_bytes(1) );
  2938       __ bind(two_word);
  2939       __ daddi(T2, T2,Interpreter::expr_offset_in_bytes(2));
  2941       __ bind(valsize_known);
  2942       // setup object pointer
  2943       __ lw(T2, T2, 0*wordSize);
  2945     // cache entry pointer
  2946     __ daddi(T1, T1, in_bytes(cp_base_offset));
  2947     __ shl(T1, 4);
  2948     __ daddu(T1, T1, T1);
  2949     // object (tos)
  2950     __ move(T3, SP);
  2951     // T2: object pointer set up above (NULL if static)
  2952     // T1: cache entry pointer
  2953     // T3: jvalue object on the stack
  2954     __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
  2955                InterpreterRuntime::post_field_modification), T2, T1, T3);
  2956     __ get_cache_and_index_at_bcp(cache, index, 1);
  2957     __ bind(L1);
  2961 // used registers : T0, T1, T2, T3, T8
  2962 // T1 : flags
  2963 // T2 : off
  2964 // T3 : obj
  2965 // T8 : volatile bit
  2966 // see ConstantPoolCacheEntry::set_field for more info
  2967 void TemplateTable::putfield_or_static(int byte_no, bool is_static) {
  2968   transition(vtos, vtos);
  2970   const Register cache = T3;
  2971   const Register index = T0;
  2972   const Register obj   = T3;
  2973   const Register off   = T2;
  2974   const Register flags = T1;
  2975   const Register bc    = T3;
  2977   resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
  2978   //jvmti_post_field_mod(cache, index, is_static);
  2979   load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
  2981   Label notVolatile, Done;
  2982   __ move(AT, 1<<ConstantPoolCacheEntry::is_volatile_shift);
  2983   __ andr(T8, flags, AT);
  2985   Label notByte, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
  2987   assert(btos == 0, "change code, btos != 0");
  2988   // btos
  2989   __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
  2990   __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask);
  2991   __ bne(flags, R0, notByte);
  2992   __ delayed()->nop();
  2994   __ pop(btos);
  2995   if (!is_static) {
  2996     pop_and_check_object(obj);
  2998   __ dadd(AT, obj, off);
  2999   __ sb(FSR, AT, 0);
  3001   if (!is_static) {
  3002     patch_bytecode(Bytecodes::_fast_bputfield, bc, off, true, byte_no);
  3004   __ b(Done);
  3005   __ delayed()->nop();
  3007   __ bind(notByte);
  3008   // itos
  3009   __ move(AT, itos);
  3010   __ bne(flags, AT, notInt);
  3011   __ delayed()->nop();
  3013   __ pop(itos);
  3014   if (!is_static) {
  3015     pop_and_check_object(obj);
  3017   __ dadd(AT, obj, off);
  3018   __ sw(FSR, AT, 0);
  3020   if (!is_static) {
  3021     patch_bytecode(Bytecodes::_fast_iputfield, bc, off, true, byte_no);
  3023   __ b(Done);
  3024   __ delayed()->nop();
  3025   __ bind(notInt);
  3026   // atos
  3027   __ move(AT, atos);
  3028   __ bne(flags, AT, notObj);
  3029   __ delayed()->nop();
  3031   __ pop(atos);
  3032   if (!is_static) {
  3033     pop_and_check_object(obj);
  3036   __ dadd(AT, obj, off);
  3037   __ store_heap_oop(Address(AT, 0), FSR);
  3038   __ store_check(obj);
  3040   if (!is_static) {
  3041     patch_bytecode(Bytecodes::_fast_aputfield, bc, off, true, byte_no);
  3043   __ b(Done);
  3044   __ delayed()->nop();
  3045   __ bind(notObj);
  3046   // ctos
  3047   __ move(AT, ctos);
  3048   __ bne(flags, AT, notChar);
  3049   __ delayed()->nop();
  3051   __ pop(ctos);
  3052   if (!is_static) {
  3053     pop_and_check_object(obj);
  3055   __ dadd(AT, obj, off);
  3056   __ sh(FSR, AT, 0);
  3057   if (!is_static) {
  3058     patch_bytecode(Bytecodes::_fast_cputfield, bc, off, true, byte_no);
  3060   __ b(Done);
  3061   __ delayed()->nop();
  3062   __ bind(notChar);
  3063   // stos
  3064   __ move(AT, stos);
  3065   __ bne(flags, AT, notShort);
  3066   __ delayed()->nop();
  3068   __ pop(stos);
  3069   if (!is_static) {
  3070     pop_and_check_object(obj);
  3072   __ dadd(AT, obj, off);
  3073   __ sh(FSR, AT, 0);
  3074   if (!is_static) {
  3075     patch_bytecode(Bytecodes::_fast_sputfield, bc, off, true, byte_no);
  3077   __ b(Done);
  3078   __ delayed()->nop();
  3079   __ bind(notShort);
  3080   // ltos
  3081   __ move(AT, ltos);
  3082   __ bne(flags, AT, notLong);
  3083   __ delayed()->nop();
  3085   // FIXME: there is no simple method to load/store 64-bit data in a atomic operation
  3086   // we just ignore the volatile flag.
  3087   //Label notVolatileLong;
  3088   //__ beq(T1, R0, notVolatileLong);
  3089   //__ delayed()->nop();
  3091   //addent = 2 * wordSize;
  3092   // no need
  3093   //__ lw(FSR, SP, 0);
  3094   //__ lw(SSR, SP, 1 * wordSize);
  3095   //if (!is_static) {
  3096   //  __ lw(T3, SP, addent);
  3097   //  addent += 1 * wordSize;
  3098   //  __ verify_oop(T3);
  3099   //}
  3101   //__ daddu(AT, T3, T2);
  3103   // Replace with real volatile test
  3104   // NOTE : we assume that sdc1&ldc1 operate in 32-bit, this is true for Godson2 even in 64-bit kernel
  3105   // last modified by yjl 7/12/2005
  3106   //__ ldc1(FSF, SP, 0);
  3107   //__ sdc1(FSF, AT, 0);
  3108   //volatile_barrier();
  3110   // Don't rewrite volatile version
  3111   //__ b(notVolatile);
  3112   //__ delayed()->addiu(SP, SP, addent);
  3114   //__ bind(notVolatileLong);
  3116   //__ pop(ltos);  // overwrites edx
  3117   //  __ lw(FSR, SP, 0 * wordSize);
  3118   //  __ lw(SSR, SP, 1 * wordSize);
  3119   //  __ daddi(SP, SP, 2*wordSize);
  3120   __ pop(ltos);
  3121   if (!is_static) {
  3122     pop_and_check_object(obj);
  3124   __ dadd(AT, obj, off);
  3125   __ sd(FSR, AT, 0);
  3126   if (!is_static) {
  3127     patch_bytecode(Bytecodes::_fast_lputfield, bc, off, true, byte_no);
  3129   __ b(notVolatile);
  3130   __ delayed()->nop();
  3132   __ bind(notLong);
  3133   // ftos
  3134   __ move(AT, ftos);
  3135   __ bne(flags, AT, notFloat);
  3136   __ delayed()->nop();
  3138   __ pop(ftos);
  3139   if (!is_static) {
  3140     pop_and_check_object(obj);
  3142   __ dadd(AT, obj, off);
  3143   __ swc1(FSF, AT, 0);
  3144   if (!is_static) {
  3145     patch_bytecode(Bytecodes::_fast_fputfield, bc, off, true, byte_no);
  3147   __ b(Done);
  3148   __ delayed()->nop();
  3149   __ bind(notFloat);
  3150   // dtos
  3151   __ move(AT, dtos);
  3152   __ bne(flags, AT, notDouble);
  3153   __ delayed()->nop();
  3155   __ pop(dtos);
  3156   if (!is_static) {
  3157     pop_and_check_object(obj);
  3159   __ dadd(AT, obj, off);
  3160   __ sdc1(FSF, AT, 0);
  3161   if (!is_static) {
  3162     patch_bytecode(Bytecodes::_fast_dputfield, bc, off, true, byte_no);
  3165 #ifdef ASSERT
  3166   __ b(Done);
  3167   __ delayed()->nop();
  3169   __ bind(notDouble);
  3170   __ stop("Bad state");
  3171 #endif
  3173   __ bind(Done);
  3175   // Check for volatile store
  3176   __ beq(T8, R0, notVolatile);
  3177   __ delayed()->nop();
  3178   volatile_barrier( );
  3179   __ bind(notVolatile);
  3182 void TemplateTable::putfield(int byte_no) {
  3183   putfield_or_static(byte_no, false);
  3186 void TemplateTable::putstatic(int byte_no) {
  3187   putfield_or_static(byte_no, true);
  3190 // used registers : T1, T2, T3
  3191 // T1 : cp_entry
  3192 // T2 : obj
  3193 // T3 : value pointer
  3194 void TemplateTable::jvmti_post_fast_field_mod() {
  3195   if (JvmtiExport::can_post_field_modification()) {
  3196     // Check to see if a field modification watch has been set before
  3197     // we take the time to call into the VM.
  3198     Label L2;
  3199     __ li(AT, JvmtiExport::get_field_modification_count_addr());
  3200     __ lw(T3, AT, 0);
  3201     __ beq(T3, R0, L2);
  3202     __ delayed()->nop();
  3203     __ pop_ptr(T2);
  3204     __ verify_oop(T2);
  3205     __ push_ptr(T2);
  3206     __ li(AT, -sizeof(jvalue));
  3207     __ daddu(SP, SP, AT);
  3208     __ move(T3, SP);
  3210     switch (bytecode()) {          // load values into the jvalue object
  3211       case Bytecodes::_fast_bputfield:
  3212         __ sb(FSR, SP, 0);
  3213         break;
  3214       case Bytecodes::_fast_sputfield:
  3215         __ sh(FSR, SP, 0);
  3216         break;
  3217       case Bytecodes::_fast_cputfield:
  3218         __ sh(FSR, SP, 0);
  3219         break;
  3220       case Bytecodes::_fast_iputfield:
  3221         __ sw(FSR, SP, 0);
  3222         break;
  3223       case Bytecodes::_fast_lputfield:
  3224         __ sd(FSR, SP, 0);
  3225         break;
  3226       case Bytecodes::_fast_fputfield:
  3227         __ swc1(FSF, SP, 0);
  3228         break;
  3229       case Bytecodes::_fast_dputfield:
  3230         __ sdc1(FSF, SP, 0);
  3231         break;
  3232       case Bytecodes::_fast_aputfield:
  3233         __ sd(FSR, SP, 0);
  3234         break;
  3235       default:  ShouldNotReachHere();
  3238     // Save eax and sometimes edx because call_VM() will clobber them,
  3239     // then use them for JVM/DI purposes
  3240     __ push(FSR);
  3241     if (bytecode() == Bytecodes::_fast_lputfield) __ push(SSR);
  3242     // access constant pool cache entry
  3243     __ get_cache_entry_pointer_at_bcp(T1, T2, 1);
  3244     // no need, verified ahead
  3245     __ verify_oop(T2);
  3247     // ebx: object pointer copied above
  3248     // eax: cache entry pointer
  3249     // ecx: jvalue object on the stack
  3250     __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
  3251                                        InterpreterRuntime::post_field_modification), T2, T1, T3);
  3252     if (bytecode() == Bytecodes::_fast_lputfield) __ pop(SSR);  // restore high value
  3253     __ lw(FSR, SP, 0);
  3254     __ daddiu(SP, SP, sizeof(jvalue) + 1 * wordSize);
  3255     __ bind(L2);
  3259 // used registers : T2, T3, T1
  3260 // T2 : index & off & field address
  3261 // T3 : cache & obj
  3262 // T1 : flags
  3263 void TemplateTable::fast_storefield(TosState state) {
  3264   transition(state, vtos);
  3266   ByteSize base = ConstantPoolCache::base_offset();
  3268   jvmti_post_fast_field_mod();
  3270   // access constant pool cache
  3271   __ get_cache_and_index_at_bcp(T3, T2, 1);
  3273   // test for volatile with edx but edx is tos register for lputfield.
  3274   __ dsll(AT, T2, Address::times_8);
  3275   __ dadd(AT, T3, AT);
  3276   __ ld(T1, AT, in_bytes(base + ConstantPoolCacheEntry::flags_offset()));
  3278   // replace index with field offset from cache entry
  3279   __ ld(T2, AT, in_bytes(base + ConstantPoolCacheEntry::f2_offset()));
  3281   // Doug Lea believes this is not needed with current Sparcs (TSO) and Intel (PSO).
  3282   // volatile_barrier( );
  3284   Label notVolatile, Done;
  3285   // Check for volatile store
  3286   __ move(AT, 1<<ConstantPoolCacheEntry::is_volatile_shift);
  3287   __ andr(AT, T1, AT);
  3288   __ beq(AT, R0, notVolatile);
  3289   __ delayed()->nop();
  3292   // Get object from stack
  3293   pop_and_check_object(T3);
  3295   // field address
  3296   __ dadd(T2, T3, T2);
  3298   // access field
  3299   switch (bytecode()) {
  3300     case Bytecodes::_fast_bputfield:
  3301       __ sb(FSR, T2, 0);
  3302       break;
  3303     case Bytecodes::_fast_sputfield: // fall through
  3304     case Bytecodes::_fast_cputfield:
  3305       __ sh(FSR, T2, 0);
  3306       break;
  3307     case Bytecodes::_fast_iputfield:
  3308       __ sw(FSR, T2, 0);
  3309       break;
  3310     case Bytecodes::_fast_lputfield:
  3311       __ sd(FSR, T2, 0 * wordSize);
  3312       break;
  3313     case Bytecodes::_fast_fputfield:
  3314       __ swc1(FSF, T2, 0);
  3315       break;
  3316     case Bytecodes::_fast_dputfield:
  3317       __ sdc1(FSF, T2, 0 * wordSize);
  3318       break;
  3319     case Bytecodes::_fast_aputfield:
  3320       __ store_heap_oop(Address(T2, 0), FSR);
  3321       __ store_check(T3);
  3322       break;
  3323     default:
  3324       ShouldNotReachHere();
  3327   Label done;
  3328   volatile_barrier( );
  3329   __ b(done);
  3330   __ delayed()->nop();
  3332   // Same code as above, but don't need edx to test for volatile.
  3333   __ bind(notVolatile);
  3334   pop_and_check_object(T3);
  3335   //get the field address
  3336   __ dadd(T2, T3, T2);
  3338   // access field
  3339   switch (bytecode()) {
  3340     case Bytecodes::_fast_bputfield:
  3341       __ sb(FSR, T2, 0);
  3342       break;
  3343     case Bytecodes::_fast_sputfield: // fall through
  3344     case Bytecodes::_fast_cputfield:
  3345       __ sh(FSR, T2, 0);
  3346       break;
  3347     case Bytecodes::_fast_iputfield:
  3348       __ sw(FSR, T2, 0);
  3349       break;
  3350     case Bytecodes::_fast_lputfield:
  3351       __ sd(FSR, T2, 0 * wordSize);
  3352       break;
  3353     case Bytecodes::_fast_fputfield:
  3354       __ swc1(FSF, T2, 0);
  3355       break;
  3356     case Bytecodes::_fast_dputfield:
  3357       __ sdc1(FSF, T2, 0 * wordSize);
  3358       break;
  3359     case Bytecodes::_fast_aputfield:
  3360       //add for compressedoops
  3361       __ store_heap_oop(Address(T2, 0), FSR);
  3362       __ store_check(T3);
  3363       break;
  3364     default:
  3365       ShouldNotReachHere();
  3367   __ bind(done);
  3370 // used registers : T2, T3, T1
  3371 // T3 : cp_entry & cache
  3372 // T2 : index & offset
  3373 void TemplateTable::fast_accessfield(TosState state) {
  3374   transition(atos, state);
  3376   // do the JVMTI work here to avoid disturbing the register state below
  3377   if (JvmtiExport::can_post_field_access()) {
  3378     // Check to see if a field access watch has been set before we take
  3379     // the time to call into the VM.
  3380     Label L1;
  3381     __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr());
  3382     __ lw(T3, AT, 0);
  3383     __ beq(T3, R0, L1);
  3384     __ delayed()->nop();
  3385     // access constant pool cache entry
  3386     __ get_cache_entry_pointer_at_bcp(T3, T1, 1);
  3387     __ move(TSR, FSR);
  3388     __ verify_oop(FSR);
  3389     // FSR: object pointer copied above
  3390     // T3: cache entry pointer
  3391     __ call_VM(NOREG,
  3392                CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access),
  3393                FSR, T3);
  3394     __ move(FSR, TSR);
  3395     __ bind(L1);
  3398   // access constant pool cache
  3399   __ get_cache_and_index_at_bcp(T3, T2, 1);
  3400   // replace index with field offset from cache entry
  3401   __ dsll(AT, T2, Address::times_8);
  3402   __ dadd(AT, T3, AT);
  3403   __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset()
  3404                          + ConstantPoolCacheEntry::f2_offset()));
  3406   // eax: object
  3407   __ verify_oop(FSR);
  3408   __ null_check(FSR);
  3409   // field addresses
  3410   __ dadd(FSR, FSR, T2);
  3412   // access field
  3413   switch (bytecode()) {
  3414     case Bytecodes::_fast_bgetfield:
  3415       __ lb(FSR, FSR, 0);
  3416       break;
  3417     case Bytecodes::_fast_sgetfield:
  3418       __ lh(FSR, FSR, 0);
  3419       break;
  3420     case Bytecodes::_fast_cgetfield:
  3421       __ lhu(FSR, FSR, 0);
  3422       break;
  3423     case Bytecodes::_fast_igetfield:
  3424       __ lw(FSR, FSR, 0);
  3425       break;
  3426     case Bytecodes::_fast_lgetfield:
  3427       __ stop("should not be rewritten");
  3428       break;
  3429     case Bytecodes::_fast_fgetfield:
  3430       __ lwc1(FSF, FSR, 0);
  3431       break;
  3432     case Bytecodes::_fast_dgetfield:
  3433       __ ldc1(FSF, FSR, 0);
  3434       break;
  3435     case Bytecodes::_fast_agetfield:
  3436       //add for compressedoops
  3437       __ load_heap_oop(FSR, Address(FSR, 0));
  3438       __ verify_oop(FSR);
  3439       break;
  3440     default:
  3441       ShouldNotReachHere();
  3444   // Doug Lea believes this is not needed with current Sparcs(TSO) and Intel(PSO)
  3445   // volatile_barrier( );
  3448 // generator for _fast_iaccess_0, _fast_aaccess_0, _fast_faccess_0
  3449 // used registers : T1, T2, T3, T1
  3450 // T1 : obj & field address
  3451 // T2 : off
  3452 // T3 : cache
  3453 // T1 : index
  3454 void TemplateTable::fast_xaccess(TosState state) {
  3455   transition(vtos, state);
  3457   // get receiver
  3458   __ ld(T1, aaddress(0));
  3459   // access constant pool cache
  3460   __ get_cache_and_index_at_bcp(T3, T2, 2);
  3461   __ dsll(AT, T2, Address::times_8);
  3462   __ dadd(AT, T3, AT);
  3463   __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
  3465   // make sure exception is reported in correct bcp range (getfield is
  3466   // next instruction)
  3467   __ daddi(BCP, BCP, 1);
  3468   __ null_check(T1);
  3469   __ dadd(T1, T1, T2);
  3471   if (state == itos) {
  3472     __ lw(FSR, T1, 0);
  3473   } else if (state == atos) {
  3474     __ load_heap_oop(FSR, Address(T1, 0));
  3475     __ verify_oop(FSR);
  3476   } else if (state == ftos) {
  3477     __ lwc1(FSF, T1, 0);
  3478   } else {
  3479     ShouldNotReachHere();
  3481   __ daddi(BCP, BCP, -1);
  3486 //-----------------------------------------------------------------------------
  3487 // Calls
  3489 void TemplateTable::count_calls(Register method, Register temp) {
  3490   // implemented elsewhere
  3491   ShouldNotReachHere();
  3494 // method, index, recv, flags: T1, T2, T3, T1
  3495 // byte_no = 2 for _invokevirtual, 1 else
  3496 // T0 : return address
  3497 // get the method & index of the invoke, and push the return address of
  3498 // the invoke(first word in the frame)
  3499 // this address is where the return code jmp to.
  3500 // NOTE : this method will set T3&T1 as recv&flags
  3501 void TemplateTable::prepare_invoke(int byte_no,
  3502                                    Register method,  // linked method (or i-klass)
  3503                                    Register index,   // itable index, MethodType, etc.
  3504                                    Register recv,    // if caller wants to see it
  3505                                    Register flags    // if caller wants to test it
  3506                                    ) {
  3507   // determine flags
  3508   const Bytecodes::Code code = bytecode();
  3509   const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
  3510   const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
  3511   const bool is_invokehandle     = code == Bytecodes::_invokehandle;
  3512   const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
  3513   const bool is_invokespecial    = code == Bytecodes::_invokespecial;
  3514   const bool load_receiver       = (recv  != noreg);
  3515   const bool save_flags          = (flags != noreg);
  3516   assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic),"");
  3517   assert(save_flags    == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
  3518   assert(flags == noreg || flags == T1, "error flags reg.");
  3519   assert(recv  == noreg || recv  == T3, "error recv reg.");
  3521   // setup registers & access constant pool cache
  3522   if(recv == noreg) recv  = T3;
  3523   if(flags == noreg) flags  = T1;
  3524   assert_different_registers(method, index, recv, flags);
  3526   // save 'interpreter return address'
  3527   __ save_bcp();
  3529   load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
  3531   if (is_invokedynamic || is_invokehandle) {
  3532    Label L_no_push;
  3533      __ move(AT, (1 << ConstantPoolCacheEntry::has_appendix_shift));
  3534      __ andr(AT, AT, flags);
  3535      __ beq(AT, R0, L_no_push);
  3536      __ delayed()->nop();
  3537      // Push the appendix as a trailing parameter.
  3538      // This must be done before we get the receiver,
  3539      // since the parameter_size includes it.
  3540      Register tmp = SSR;
  3541      __ push(tmp);
  3542      __ move(tmp, index);
  3543      assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0");
  3544      __ load_resolved_reference_at_index(index, tmp);
  3545      __ pop(tmp);
  3546      __ push(index);  // push appendix (MethodType, CallSite, etc.)
  3547      __ bind(L_no_push);
  3550   // load receiver if needed (after appendix is pushed so parameter size is correct)
  3551   // Note: no return address pushed yet
  3552   if (load_receiver) {
  3553     __ move(AT, ConstantPoolCacheEntry::parameter_size_mask);
  3554     __ andr(recv, flags, AT);
  3555     // 2014/07/31 Fu: Since we won't push RA on stack, no_return_pc_pushed_yet should be 0.
  3556     const int no_return_pc_pushed_yet = 0;  // argument slot correction before we push return address
  3557     const int receiver_is_at_end      = -1;  // back off one slot to get receiver
  3558     Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end);
  3559     __ ld(recv, recv_addr);
  3560     __ verify_oop(recv);
  3562   if(save_flags) {
  3563     __ move(BCP, flags);
  3566   // compute return type
  3567   __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
  3568   __ andi(flags, flags, 0xf);
  3570   // Make sure we don't need to mask flags for tos_state_shift after the above shift
  3571   ConstantPoolCacheEntry::verify_tos_state_shift();
  3572   // load return address
  3574     const address table = (address) Interpreter::invoke_return_entry_table_for(code);
  3575     __ li(AT, (long)table);
  3576     __ dsll(flags, flags, LogBytesPerWord);
  3577     __ dadd(AT, AT, flags);
  3578     __ ld(RA, AT, 0);
  3581   if (save_flags) {
  3582     __ move(flags, BCP);
  3583     __ restore_bcp();
  3587 // used registers : T0, T3, T1, T2
  3588 // T3 : recv, this two register using convention is by prepare_invoke
  3589 // T1 : flags, klass
  3590 // Rmethod : method, index must be Rmethod
  3591 void TemplateTable::invokevirtual_helper(Register index,
  3592                                          Register recv,
  3593                                          Register flags) {
  3595   assert_different_registers(index, recv, flags, T2);
  3597   // Test for an invoke of a final method
  3598   Label notFinal;
  3599   __ move(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
  3600   __ andr(AT, flags, AT);
  3601   __ beq(AT, R0, notFinal);
  3602   __ delayed()->nop();
  3604   Register method = index;  // method must be Rmethod
  3605   assert(method == Rmethod, "methodOop must be Rmethod for interpreter calling convention");
  3607   // do the call - the index is actually the method to call
  3608   // the index is indeed methodOop, for this is vfinal,
  3609   // see ConstantPoolCacheEntry::set_method for more info
  3611   __ verify_oop(method);
  3613   // It's final, need a null check here!
  3614   __ null_check(recv);
  3616   // profile this call
  3617   __ profile_final_call(T2);
  3619   // 2014/11/24 Fu
  3620   // T2: tmp, used for mdp
  3621   // method: callee
  3622   // T9: tmp
  3623   // is_virtual: true
  3624   __ profile_arguments_type(T2, method, T9, true);
  3626   __ jump_from_interpreted(method, T2);
  3628   __ bind(notFinal);
  3630   // get receiver klass
  3631   __ null_check(recv, oopDesc::klass_offset_in_bytes());
  3632   __ load_klass(T2, recv);
  3633   __ verify_oop(T2);
  3635   // profile this call
  3636   __ profile_virtual_call(T2, T0, T1);
  3638   // get target methodOop & entry point
  3639   const int base = InstanceKlass::vtable_start_offset() * wordSize;
  3640   assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
  3641   __ dsll(AT, index, Address::times_ptr);
  3642   // T2: receiver
  3643   __ dadd(AT, T2, AT);
  3644   //this is a ualign read
  3645   __ ld(method, AT, base + vtableEntry::method_offset_in_bytes());
  3646   __ profile_arguments_type(T2, method, T9, true);
  3647   __ jump_from_interpreted(method, T2);
  3651 void TemplateTable::invokevirtual(int byte_no) {
  3652   transition(vtos, vtos);
  3653   assert(byte_no == f2_byte, "use this argument");
  3654   prepare_invoke(byte_no, Rmethod, NOREG, T3, T1);
  3655   // now recv & flags in T3, T1
  3656   invokevirtual_helper(Rmethod, T3, T1);
  3659 // T9 : entry
  3660 // Rmethod : method
  3661 void TemplateTable::invokespecial(int byte_no) {
  3662   transition(vtos, vtos);
  3663   assert(byte_no == f1_byte, "use this argument");
  3664   prepare_invoke(byte_no, Rmethod, NOREG, T3);
  3665   // now recv & flags in T3, T1
  3666   __ verify_oop(T3);
  3667   __ null_check(T3);
  3668   __ profile_call(T9);
  3670   // 2014/11/24 Fu
  3671   // T8: tmp, used for mdp
  3672   // Rmethod: callee
  3673   // T9: tmp
  3674   // is_virtual: false
  3675   __ profile_arguments_type(T8, Rmethod, T9, false);
  3677   __ jump_from_interpreted(Rmethod, T9);
  3678   __ move(T0, T3);//aoqi ?
  3681 void TemplateTable::invokestatic(int byte_no) {
  3682   transition(vtos, vtos);
  3683   assert(byte_no == f1_byte, "use this argument");
  3684   prepare_invoke(byte_no, Rmethod, NOREG);
  3685   __ verify_oop(Rmethod);
  3687   __ profile_call(T9);
  3689   // 2014/11/24 Fu
  3690   // T8: tmp, used for mdp
  3691   // Rmethod: callee
  3692   // T9: tmp
  3693   // is_virtual: false
  3694   __ profile_arguments_type(T8, Rmethod, T9, false);
  3696   __ jump_from_interpreted(Rmethod, T9);
  3699 // i have no idea what to do here, now. for future change. FIXME.
  3700 void TemplateTable::fast_invokevfinal(int byte_no) {
  3701   transition(vtos, vtos);
  3702   assert(byte_no == f2_byte, "use this argument");
  3703   __ stop("fast_invokevfinal not used on mips64");
  3706 // used registers : T0, T1, T2, T3, T1, A7
  3707 // T0 : itable, vtable, entry
  3708 // T1 : interface
  3709 // T3 : receiver
  3710 // T1 : flags, klass
  3711 // Rmethod : index, method, this is required by interpreter_entry
  3712 void TemplateTable::invokeinterface(int byte_no) {
  3713   transition(vtos, vtos);
  3714   //this method will use T1-T4 and T0
  3715   assert(byte_no == f1_byte, "use this argument");
  3716   prepare_invoke(byte_no, T2, Rmethod, T3, T1);
  3717   // T2: Interface
  3718   // Rmethod: index
  3719   // T3: receiver
  3720   // T1: flags
  3722   // Special case of invokeinterface called for virtual method of
  3723   // java.lang.Object.  See cpCacheOop.cpp for details.
  3724   // This code isn't produced by javac, but could be produced by
  3725   // another compliant java compiler.
  3726   Label notMethod;
  3727   __ move(AT, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift));
  3728   __ andr(AT, T1, AT);
  3729   __ beq(AT, R0, notMethod);
  3730   __ delayed()->nop();
  3732   invokevirtual_helper(Rmethod, T3, T1);
  3733   __ bind(notMethod);
  3734   // Get receiver klass into T1 - also a null check
  3735   //add for compressedoops
  3736   __ load_klass(T1, T3);
  3737   __ verify_oop(T1);
  3739   // profile this call
  3740   __ profile_virtual_call(T1, T0, FSR);
  3742   // Compute start of first itableOffsetEntry (which is at the end of the vtable)
  3743   // TODO: x86 add a new method lookup_interface_method  // LEE
  3744   const int base = InstanceKlass::vtable_start_offset() * wordSize;
  3745   assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below");
  3746   __ lw(AT, T1, InstanceKlass::vtable_length_offset() * wordSize);
  3747   __ dsll(AT, AT, Address::times_8);
  3748   __ dadd(T0, T1, AT);
  3749   __ daddi(T0, T0, base);
  3750   if (HeapWordsPerLong > 1) {
  3751     // Round up to align_object_offset boundary
  3752     __ round_to(T0, BytesPerLong);
  3754   // now T0 is the begin of the itable
  3756   Label entry, search, interface_ok;
  3758   ///__ jmp(entry);
  3759   __ b(entry);
  3760   __ delayed()->nop();
  3762   __ bind(search);
  3763   __ increment(T0, itableOffsetEntry::size() * wordSize);
  3765   __ bind(entry);
  3767   // Check that the entry is non-null.  A null entry means that the receiver
  3768   // class doesn't implement the interface, and wasn't the same as the
  3769   // receiver class checked when the interface was resolved.
  3770   __ ld(AT, T0, itableOffsetEntry::interface_offset_in_bytes());
  3771   __ bne(AT, R0, interface_ok);
  3772   __ delayed()->nop();
  3773   // throw exception
  3774   // the call_VM checks for exception, so we should never return here.
  3776   //__ pop();//FIXME here,
  3777   // pop return address (pushed by prepare_invoke).
  3778   // no need now, we just save the value in RA now
  3780   __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeError));
  3781   __ should_not_reach_here();
  3783   __ bind(interface_ok);
  3784   //NOTICE here, no pop as x86 do
  3785   __ bne(AT, T2, search);
  3786   __ delayed()->nop();
  3788   // now we get vtable of the interface
  3789   __ ld(T0, T0, itableOffsetEntry::offset_offset_in_bytes());
  3790   __ daddu(T0, T1, T0);
  3791   assert(itableMethodEntry::size() * wordSize == 8, "adjust the scaling in the code below");
  3792   __ dsll(AT, Rmethod, Address::times_8);
  3793   __ daddu(AT, T0, AT);
  3794   // now we get the method
  3795   __ ld(Rmethod, AT, 0);
  3796   // Rnext: methodOop to call
  3797   // T3: receiver
  3798   // Check for abstract method error
  3799   // Note: This should be done more efficiently via a throw_abstract_method_error
  3800   //       interpreter entry point and a conditional jump to it in case of a null
  3801   //       method.
  3803     Label L;
  3804     __ bne(Rmethod, R0, L);
  3805     __ delayed()->nop();
  3807     // throw exception
  3808     // note: must restore interpreter registers to canonical
  3809     //       state for exception handling to work correctly!
  3810     ///__ popl(ebx);          // pop return address (pushed by prepare_invoke)
  3811     //__ restore_bcp();      // esi must be correct for exception handler
  3812     //(was destroyed)
  3813     //__ restore_locals();   // make sure locals pointer
  3814     //is correct as well (was destroyed)
  3815     ///__ call_VM(noreg, CAST_FROM_FN_PTR(address,
  3816     //InterpreterRuntime::throw_AbstractMethodError));
  3817     __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
  3818     // the call_VM checks for exception, so we should never return here.
  3819     __ should_not_reach_here();
  3820     __ bind(L);
  3823   // 2014/11/24 Fu
  3824   // T8: tmp, used for mdp
  3825   // Rmethod: callee
  3826   // T9: tmp
  3827   // is_virtual: true
  3828   __ profile_arguments_type(T8, Rmethod, T9, true);
  3830   __ jump_from_interpreted(Rmethod, T9);
  3834 void TemplateTable::invokehandle(int byte_no) {
  3835   transition(vtos, vtos);
  3836   assert(byte_no == f1_byte, "use this argument");
  3837   const Register T2_method = Rmethod;
  3838   const Register FSR_mtype  = FSR;
  3839   const Register T3_recv   = T3;
  3841   if (!EnableInvokeDynamic) {
  3842      // rewriter does not generate this bytecode
  3843      __ should_not_reach_here();
  3844      return;
  3847    prepare_invoke(byte_no, T2_method, FSR_mtype, T3_recv);
  3848    //??__ verify_method_ptr(T2_method);
  3849    __ verify_oop(T3_recv);
  3850    __ null_check(T3_recv);
  3852    // rax: MethodType object (from cpool->resolved_references[f1], if necessary)
  3853    // rbx: MH.invokeExact_MT method (from f2)
  3855    // Note:  rax_mtype is already pushed (if necessary) by prepare_invoke
  3857    // FIXME: profile the LambdaForm also
  3858    __ profile_final_call(T9);
  3860    // 2014/11/24 Fu
  3861    // T8: tmp, used for mdp
  3862    // T2_method: callee
  3863    // T9: tmp
  3864    // is_virtual: true
  3865    __ profile_arguments_type(T8, T2_method, T9, true);
  3867   __ jump_from_interpreted(T2_method, T9);
  3870  void TemplateTable::invokedynamic(int byte_no) {
  3871    transition(vtos, vtos);
  3872    assert(byte_no == f1_byte, "use this argument");
  3874    if (!EnableInvokeDynamic) {
  3875      // We should not encounter this bytecode if !EnableInvokeDynamic.
  3876      // The verifier will stop it.  However, if we get past the verifier,
  3877      // this will stop the thread in a reasonable way, without crashing the JVM.
  3878      __ call_VM(noreg, CAST_FROM_FN_PTR(address,
  3879                       InterpreterRuntime::throw_IncompatibleClassChangeError));
  3880      // the call_VM checks for exception, so we should never return here.
  3881      __ should_not_reach_here();
  3882      return;
  3885    //const Register Rmethod   = T2;
  3886    const Register T2_callsite = T2;
  3888    prepare_invoke(byte_no, Rmethod, T2_callsite);
  3890    // rax: CallSite object (from cpool->resolved_references[f1])
  3891    // rbx: MH.linkToCallSite method (from f2)
  3893    // Note:  rax_callsite is already pushed by prepare_invoke
  3894    // %%% should make a type profile for any invokedynamic that takes a ref argument
  3895    // profile this call
  3896    __ profile_call(T9);
  3898    // 2014/11/24 Fu
  3899    // T8: tmp, used for mdp
  3900    // Rmethod: callee
  3901    // T9: tmp
  3902    // is_virtual: false
  3903    __ profile_arguments_type(T8, Rmethod, T9, false);
  3905    __ verify_oop(T2_callsite);
  3907    __ jump_from_interpreted(Rmethod, T9);
  3910 //-----------------------------------------------------------------------------
  3911 // Allocation
  3912 // T1 : tags & buffer end & thread
  3913 // T2 : object end
  3914 // T3 : klass
  3915 // T1 : object size
  3916 // A1 : cpool
  3917 // A2 : cp index
  3918 // return object in FSR
  3919 void TemplateTable::_new() {
  3920   transition(vtos, atos);
  3921   __ get_2_byte_integer_at_bcp(A2, AT, 1);
  3922   __ huswap(A2);
  3924   Label slow_case;
  3925   Label done;
  3926   Label initialize_header;
  3927   Label initialize_object; // including clearing the fields
  3928   Label allocate_shared;
  3930   // get InstanceKlass in T3
  3931   __ get_cpool_and_tags(A1, T1);
  3932   __ dsll(AT, A2, Address::times_8);
  3933   __ dadd(AT, A1, AT);
  3934   __ ld(T3, AT, sizeof(ConstantPool));
  3936   // make sure the class we're about to instantiate has been resolved.
  3937   // Note: slow_case does a pop of stack, which is why we loaded class/pushed above
  3938   const int tags_offset = Array<u1>::base_offset_in_bytes();
  3939   __ dadd(T1, T1, A2);
  3940   __ lb(AT, T1, tags_offset);
  3941   __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
  3942   __ bne(AT, R0, slow_case);
  3943   __ delayed()->nop();
  3946   // make sure klass is initialized & doesn't have finalizer
  3947   // make sure klass is fully initialized
  3948   __ lhu(T1, T3, in_bytes(InstanceKlass::init_state_offset()));
  3949   __ daddiu(AT, T1, - (int)InstanceKlass::fully_initialized);
  3950   __ bne(AT, R0, slow_case);
  3951   __ delayed()->nop();
  3953   // has_finalizer
  3954   //__ lw(T1, T3, Klass::access_flags_offset() + sizeof(oopDesc));
  3955   //__ move(AT, JVM_ACC_CAN_BE_FASTPATH_ALLOCATED);
  3956   //__ andr(AT, T1, AT);
  3957   __ lw(T1, T3, in_bytes(Klass::layout_helper_offset()) );
  3958   __ andi(AT, T1, Klass::_lh_instance_slow_path_bit);
  3959   __ bne(AT, R0, slow_case);
  3960   __ delayed()->nop();
  3962   // get instance_size in InstanceKlass (already aligned) in T0,
  3963   // be sure to preserve this value
  3964   __ lw(T0, T3, in_bytes(Klass::layout_helper_offset()) );
  3966   // Allocate the instance
  3967   // 1) Try to allocate in the TLAB
  3968   // 2) if fail and the object is large allocate in the shared Eden
  3969   // 3) if the above fails (or is not applicable), go to a slow case
  3970   // (creates a new TLAB, etc.)
  3972   const bool allow_shared_alloc =
  3973     Universe::heap()->supports_inline_contig_alloc() && !CMSIncrementalMode;
  3975   if (UseTLAB) {
  3976 #ifndef OPT_THREAD
  3977     const Register thread = T8;
  3978     __ get_thread(thread);
  3979 #else
  3980     const Register thread = TREG;
  3981 #endif
  3982     // get tlab_top
  3983     __ ld(FSR, thread, in_bytes(JavaThread::tlab_top_offset()));
  3984     __ dadd(T2, FSR, T0);
  3985     // get tlab_end
  3986     __ ld(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
  3987     __ slt(AT, AT, T2);
  3988     __ bne(AT, R0, allow_shared_alloc ? allocate_shared : slow_case);
  3989     __ delayed()->nop();
  3990     __ sd(T2, thread, in_bytes(JavaThread::tlab_top_offset()));
  3992     if (ZeroTLAB) {
  3993       // the fields have been already cleared
  3994       __ b_far(initialize_header);
  3995     } else {
  3996       // initialize both the header and fields
  3997       __ b_far(initialize_object);
  3999     __ delayed()->nop();
  4002   // Allocation in the shared Eden , if allowed
  4003   // T0 : instance size in words
  4004   if(allow_shared_alloc){
  4005     __ bind(allocate_shared);
  4007     Label retry;
  4008     Address heap_top(T1);
  4009     __ li(T1, (long)Universe::heap()->top_addr());
  4011     __ ld(FSR, heap_top);
  4012     __ bind(retry);
  4013     __ dadd(T2, FSR, T0);
  4014     __ li(AT, (long)Universe::heap()->end_addr());
  4015     __ ld(AT, AT, 0);
  4016     __ slt(AT, AT, T2);
  4017     __ bne(AT, R0, slow_case);
  4018     __ delayed()->nop();
  4020     // Compare FSR with the top addr, and if still equal, store the new
  4021     // top addr in ebx at the address of the top addr pointer. Sets ZF if was
  4022     // equal, and clears it otherwise. Use lock prefix for atomicity on MPs.
  4023     //
  4024     // FSR: object begin
  4025     // T2: object end
  4026     // T0: instance size in words
  4028     // if someone beat us on the allocation, try again, otherwise continue
  4029     __ cmpxchg(T2, heap_top, FSR);
  4030     __ beq(AT, R0, retry);
  4031     __ delayed()->nop();
  4034   if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
  4035     // The object is initialized before the header.  If the object size is
  4036     // zero, go directly to the header initialization.
  4037     __ bind(initialize_object);
  4038     __ li(AT, - sizeof(oopDesc));
  4039     __ daddu(T0, T0, AT);
  4040     __ beq_far(T0, R0, initialize_header);
  4041     __ delayed()->nop();
  4044     // T0 must have been multiple of 2
  4045 #ifdef ASSERT
  4046     // make sure T0 was multiple of 2
  4047     Label L;
  4048     __ andi(AT, T0, 1);
  4049     __ beq(AT, R0, L);
  4050     __ delayed()->nop();
  4051     __ stop("object size is not multiple of 2 - adjust this code");
  4052     __ bind(L);
  4053     // edx must be > 0, no extra check needed here
  4054 #endif
  4056     // initialize remaining object fields: T0 is a multiple of 2
  4058       Label loop;
  4059       __ dadd(T1, FSR, T0);
  4060       __ daddi(T1, T1, -oopSize);
  4062       __ bind(loop);
  4063       __ sd(R0, T1, sizeof(oopDesc) + 0 * oopSize);
  4064       __ bne(T1, FSR, loop); //dont clear header
  4065       __ delayed()->daddi(T1, T1, -oopSize);
  4066       // actually sizeof(oopDesc)==8, so we can move
  4067       // __ addiu(AT, AT, -8) to delay slot, and compare FSR with T1
  4069     //klass in T3,
  4070     // initialize object header only.
  4071     __ bind(initialize_header);
  4072     if (UseBiasedLocking) {
  4073       __ ld(AT, T3, in_bytes(Klass::prototype_header_offset()));
  4074       __ sd(AT, FSR, oopDesc::mark_offset_in_bytes ());
  4075     } else {
  4076       __ li(AT, (long)markOopDesc::prototype());
  4077       __ sd(AT, FSR, oopDesc::mark_offset_in_bytes());
  4080     __ store_klass_gap(FSR, R0);
  4081     __ store_klass(FSR, T3);
  4084       SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0);
  4085       // Trigger dtrace event for fastpath
  4086       __ push(atos);
  4087       __ call_VM_leaf(
  4088            CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), FSR);
  4089       __ pop(atos);
  4092     __ b(done);
  4093     __ delayed()->nop();
  4096   // slow case
  4097   __ bind(slow_case);
  4098   call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), A1, A2);
  4100   // continue
  4101   __ bind(done);
  4102   __ sync();
  4105 void TemplateTable::newarray() {
  4106   transition(itos, atos);
  4107   __ lbu(A1, at_bcp(1));
  4108   //type, count
  4109   call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR);
  4110   __ sync();
  4113 void TemplateTable::anewarray() {
  4114   transition(itos, atos);
  4115   __ get_2_byte_integer_at_bcp(A2, AT, 1);
  4116   __ huswap(A2);
  4117   __ get_constant_pool(A1);
  4118   // cp, index, count
  4119   call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), A1, A2, FSR);
  4120   __ sync();
  4123 void TemplateTable::arraylength() {
  4124   transition(atos, itos);
  4125   __ null_check(FSR, arrayOopDesc::length_offset_in_bytes());
  4126   __ lw(FSR, FSR, arrayOopDesc::length_offset_in_bytes());
  4129 // i use T2 as ebx, T3 as ecx, T1 as edx
  4130 // when invoke gen_subtype_check, super in T3, sub in T2, object in FSR(it's always)
  4131 // T2 : sub klass
  4132 // T3 : cpool
  4133 // T3 : super klass
  4134 void TemplateTable::checkcast() {
  4135   transition(atos, atos);
  4136   Label done, is_null, ok_is_subtype, quicked, resolved;
  4137   __ beq(FSR, R0, is_null);
  4138   __ delayed()->nop();
  4140   // Get cpool & tags index
  4141   __ get_cpool_and_tags(T3, T1);
  4142   __ get_2_byte_integer_at_bcp(T2, AT, 1);
  4143   __ huswap(T2);
  4145   // See if bytecode has already been quicked
  4146   __ dadd(AT, T1, T2);
  4147   __ lb(AT, AT, Array<u1>::base_offset_in_bytes());
  4148   __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
  4149   __ beq(AT, R0, quicked);
  4150   __ delayed()->nop();
  4152   /* 2012/6/2 Jin: In InterpreterRuntime::quicken_io_cc, lots of new classes may be loaded.
  4153    *  Then, GC will move the object in V0 to another places in heap.
  4154    *  Therefore, We should never save such an object in register.
  4155    *  Instead, we should save it in the stack. It can be modified automatically by the GC thread.
  4156    *  After GC, the object address in FSR is changed to a new place.
  4157    */
  4158   __ push(atos);
  4159   const Register thread = TREG;
  4160 #ifndef OPT_THREAD
  4161   __ get_thread(thread);
  4162 #endif
  4163   call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
  4164   __ get_vm_result_2(T3, thread);
  4165   __ pop_ptr(FSR);
  4166   __ b(resolved);
  4167   __ delayed()->nop();
  4169   // klass already in cp, get superklass in T3
  4170   __ bind(quicked);
  4171   __ dsll(AT, T2, Address::times_8);
  4172   __ dadd(AT, T3, AT);
  4173   __ ld(T3, AT, sizeof(ConstantPool));
  4175   __ bind(resolved);
  4177   // get subklass in T2
  4178   //add for compressedoops
  4179   __ load_klass(T2, FSR);
  4180   // Superklass in T3.  Subklass in T2.
  4181   __ gen_subtype_check(T3, T2, ok_is_subtype);
  4183   // Come here on failure
  4184   // object is at FSR
  4185   __ jmp(Interpreter::_throw_ClassCastException_entry);
  4186   __ delayed()->nop();
  4188   // Come here on success
  4189   __ bind(ok_is_subtype);
  4191   // Collect counts on whether this check-cast sees NULLs a lot or not.
  4192   if (ProfileInterpreter) {
  4193     __ b(done);
  4194     __ delayed()->nop();
  4195     __ bind(is_null);
  4196     __ profile_null_seen(T3);
  4197   } else {
  4198     __ bind(is_null);
  4200   __ bind(done);
  4203 // i use T3 as cpool, T1 as tags, T2 as index
  4204 // object always in FSR, superklass in T3, subklass in T2
  4205 void TemplateTable::instanceof() {
  4206   transition(atos, itos);
  4207   Label done, is_null, ok_is_subtype, quicked, resolved;
  4209   __ beq(FSR, R0, is_null);
  4210   __ delayed()->nop();
  4212   // Get cpool & tags index
  4213   __ get_cpool_and_tags(T3, T1);
  4214   // get index
  4215   __ get_2_byte_integer_at_bcp(T2, AT, 1);
  4216   __ hswap(T2);
  4218   // See if bytecode has already been quicked
  4219   // quicked
  4220   __ daddu(AT, T1, T2);
  4221   __ lb(AT, AT, Array<u1>::base_offset_in_bytes());
  4222   __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
  4223   __ beq(AT, R0, quicked);
  4224   __ delayed()->nop();
  4226   __ push(atos);
  4227   const Register thread = TREG;
  4228 #ifndef OPT_THREAD
  4229   __ get_thread(thread);
  4230 #endif
  4231   call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
  4232   __ get_vm_result_2(T3, thread);
  4233   __ pop_ptr(FSR);
  4234   __ b(resolved);
  4235   __ delayed()->nop();
  4237   // get superklass in T3, subklass in T2
  4238   __ bind(quicked);
  4239   __ dsll(AT, T2, Address::times_8);
  4240   __ daddu(AT, T3, AT);
  4241   __ ld(T3, AT, sizeof(ConstantPool));
  4243   __ bind(resolved);
  4244   // get subklass in T2
  4245   //add for compressedoops
  4246   __ load_klass(T2, FSR);
  4248   // Superklass in T3.  Subklass in T2.
  4249   __ gen_subtype_check(T3, T2, ok_is_subtype);
  4250   // Come here on failure
  4251   __ b(done);
  4252   __ delayed(); __ move(FSR, R0);
  4254   // Come here on success
  4255   __ bind(ok_is_subtype);
  4256   __ move(FSR, 1);
  4258   // Collect counts on whether this test sees NULLs a lot or not.
  4259   if (ProfileInterpreter) {
  4260     __ beq(R0, R0, done);
  4261     __ nop();
  4262     __ bind(is_null);
  4263     __ profile_null_seen(T3);
  4264   } else {
  4265     __ bind(is_null);   // same as 'done'
  4267   __ bind(done);
  4268   // FSR = 0: obj == NULL or  obj is not an instanceof the specified klass
  4269   // FSR = 1: obj != NULL and obj is     an instanceof the specified klass
  4272 //--------------------------------------------------------
  4273 //--------------------------------------------
  4274 // Breakpoints
  4275 void TemplateTable::_breakpoint() {
  4276   // Note: We get here even if we are single stepping..
  4277   // jbug inists on setting breakpoints at every bytecode
  4278   // even if we are in single step mode.
  4280   transition(vtos, vtos);
  4282   // get the unpatched byte code
  4283   __ get_method(A1);
  4284   __ call_VM(NOREG,
  4285              CAST_FROM_FN_PTR(address,
  4286                               InterpreterRuntime::get_original_bytecode_at),
  4287              A1, BCP);
  4288   __ move(Rnext, V0); // Jin: Rnext will be used in dispatch_only_normal
  4290   // post the breakpoint event
  4291   __ get_method(A1);
  4292   __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), A1, BCP);
  4294   // complete the execution of original bytecode
  4295   __ dispatch_only_normal(vtos);
  4298 //-----------------------------------------------------------------------------
  4299 // Exceptions
  4301 void TemplateTable::athrow() {
  4302   transition(atos, vtos);
  4303   __ null_check(FSR);
  4304   __ jmp(Interpreter::throw_exception_entry());
  4305   __ delayed()->nop();
  4308 //-----------------------------------------------------------------------------
  4309 // Synchronization
  4310 //
  4311 // Note: monitorenter & exit are symmetric routines; which is reflected
  4312 //       in the assembly code structure as well
  4313 //
  4314 // Stack layout:
  4315 //
  4316 // [expressions  ] <--- SP               = expression stack top
  4317 // ..
  4318 // [expressions  ]
  4319 // [monitor entry] <--- monitor block top = expression stack bot
  4320 // ..
  4321 // [monitor entry]
  4322 // [frame data   ] <--- monitor block bot
  4323 // ...
  4324 // [return addr  ] <--- FP
  4326 // we use T2 as monitor entry pointer, T3 as monitor top pointer, c_rarg0 as free slot pointer
  4327 // object always in FSR
  4328 void TemplateTable::monitorenter() {
  4329   transition(atos, vtos);
  4331   // check for NULL object
  4332   __ null_check(FSR);
  4334   const Address monitor_block_top(FP, frame::interpreter_frame_monitor_block_top_offset
  4335       * wordSize);
  4336   const int entry_size = (frame::interpreter_frame_monitor_size()* wordSize);
  4337   Label allocated;
  4339   // initialize entry pointer
  4340   __ move(c_rarg0, R0);
  4342   // find a free slot in the monitor block (result in edx)
  4344     Label entry, loop, exit, next;
  4345     __ ld(T2, monitor_block_top);
  4346     __ b(entry);
  4347     __ delayed()->daddi(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
  4349     // free slot?
  4350     __ bind(loop);
  4351     __ ld(AT, T2, BasicObjectLock::obj_offset_in_bytes());
  4352     __ bne(AT, R0, next);
  4353     __ delayed()->nop();
  4354     __ move(c_rarg0, T2);
  4356     __ bind(next);
  4357     __ beq(FSR, AT, exit);
  4358     __ delayed()->nop();
  4359     __ daddi(T2, T2, entry_size);
  4361     __ bind(entry);
  4362     __ bne(T3, T2, loop);
  4363     __ delayed()->nop();
  4364     __ bind(exit);
  4367   __ bne(c_rarg0, R0, allocated);
  4368   __ delayed()->nop();
  4370   // allocate one if there's no free slot
  4372     Label entry, loop;
  4373     // 1. compute new pointers                   // SP: old expression stack top
  4374     __ ld(c_rarg0, monitor_block_top);
  4375     __ daddi(SP, SP, - entry_size);
  4376     __ daddi(c_rarg0, c_rarg0, - entry_size);
  4377     __ sd(c_rarg0, monitor_block_top);
  4378     __ b(entry);
  4379     __ delayed(); __ move(T3, SP);
  4381     // 2. move expression stack contents
  4382     __ bind(loop);
  4383     __ ld(AT, T3, entry_size);
  4384     __ sd(AT, T3, 0);
  4385     __ daddi(T3, T3, wordSize);
  4386     __ bind(entry);
  4387     __ bne(T3, c_rarg0, loop);
  4388     __ delayed()->nop();
  4391   __ bind(allocated);
  4392   // Increment bcp to point to the next bytecode,
  4393   // so exception handling for async. exceptions work correctly.
  4394   // The object has already been poped from the stack, so the
  4395   // expression stack looks correct.
  4396   __ daddi(BCP, BCP, 1);
  4397   __ sd(FSR, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
  4398   __ lock_object(c_rarg0);
  4399   // check to make sure this monitor doesn't cause stack overflow after locking
  4400   __ save_bcp();  // in case of exception
  4401   __ generate_stack_overflow_check(0);
  4402   // The bcp has already been incremented. Just need to dispatch to next instruction.
  4404   __ dispatch_next(vtos);
  4407 // T2 : top
  4408 // c_rarg0 : entry
  4409 void TemplateTable::monitorexit() {
  4410   transition(atos, vtos);
  4412   __ null_check(FSR);
  4414   const int entry_size =(frame::interpreter_frame_monitor_size()* wordSize);
  4415   Label found;
  4417   // find matching slot
  4419     Label entry, loop;
  4420     __ ld(c_rarg0, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
  4421     __ b(entry);
  4422     __ delayed()->daddiu(T2, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
  4424     __ bind(loop);
  4425     __ ld(AT, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
  4426     __ beq(FSR, AT, found);
  4427     __ delayed()->nop();
  4428     __ daddiu(c_rarg0, c_rarg0, entry_size);
  4429     __ bind(entry);
  4430     __ bne(T2, c_rarg0, loop);
  4431     __ delayed()->nop();
  4434   // error handling. Unlocking was not block-structured
  4435   Label end;
  4436   __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
  4437   InterpreterRuntime::throw_illegal_monitor_state_exception));
  4438   __ should_not_reach_here();
  4440   // call run-time routine
  4441   // c_rarg0: points to monitor entry
  4442   __ bind(found);
  4443   __ move(TSR, FSR);
  4444   __ unlock_object(c_rarg0);
  4445   __ move(FSR, TSR);
  4446   __ bind(end);
  4450 // Wide instructions
  4451 void TemplateTable::wide() {
  4452   transition(vtos, vtos);
  4453   // Note: the esi increment step is part of the individual wide bytecode implementations
  4454   __ lbu(Rnext, at_bcp(1));
  4455   __ dsll(T9, Rnext, Address::times_8);
  4456   __ li(AT, (long)Interpreter::_wentry_point);
  4457   __ dadd(AT, T9, AT);
  4458   __ ld(T9, AT, 0);
  4459   __ jr(T9);
  4460   __ delayed()->nop();
  4464 void TemplateTable::multianewarray() {
  4465   transition(vtos, atos);
  4466   // last dim is on top of stack; we want address of first one:
  4467   // first_addr = last_addr + (ndims - 1) * wordSize
  4468   __ lbu(A1, at_bcp(3));  // dimension
  4469   __ daddi(A1, A1, -1);
  4470   __ dsll(A1, A1, Address::times_8);
  4471   __ dadd(A1, SP, A1);    // now A1 pointer to the count array on the stack
  4472   call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), A1);
  4473   __ lbu(AT, at_bcp(3));
  4474   __ dsll(AT, AT, Address::times_8);
  4475   __ dadd(SP, SP, AT);
  4476   __ sync();
  4478 #endif // !CC_INTERP

mercurial