src/cpu/mips/vm/macroAssembler_mips.cpp

Thu, 24 May 2018 19:26:50 +0800

author
aoqi
date
Thu, 24 May 2018 19:26:50 +0800
changeset 8862
fd13a567f179
parent 8019
3fb3ceb7398f
child 8865
ffcdff41a92f
permissions
-rw-r--r--

#7046 C2 supports long branch
Contributed-by: fujie

     1 /*
     2  * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
     3  * Copyright (c) 2017, Loongson Technology. All rights reserved.
     4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     5  *
     6  * This code is free software; you can redistribute it and/or modify it
     7  * under the terms of the GNU General Public License version 2 only, as
     8  * published by the Free Software Foundation.
     9  *
    10  * This code is distributed in the hope that it will be useful, but WITHOUT
    11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    13  * version 2 for more details (a copy is included in the LICENSE file that
    14  * accompanied this code).
    15  *
    16  * You should have received a copy of the GNU General Public License version
    17  * 2 along with this work; if not, write to the Free Software Foundation,
    18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    19  *
    20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    21  * or visit www.oracle.com if you need additional information or have any
    22  * questions.
    23  *
    24  */
    26 #include "precompiled.hpp"
    27 #include "asm/assembler.hpp"
    28 #include "asm/assembler.inline.hpp"
    29 #include "asm/macroAssembler.inline.hpp"
    30 #include "compiler/disassembler.hpp"
    31 #include "gc_interface/collectedHeap.inline.hpp"
    32 #include "interpreter/interpreter.hpp"
    33 #include "memory/cardTableModRefBS.hpp"
    34 #include "memory/resourceArea.hpp"
    35 #include "memory/universe.hpp"
    36 #include "prims/methodHandles.hpp"
    37 #include "runtime/biasedLocking.hpp"
    38 #include "runtime/interfaceSupport.hpp"
    39 #include "runtime/objectMonitor.hpp"
    40 #include "runtime/os.hpp"
    41 #include "runtime/sharedRuntime.hpp"
    42 #include "runtime/stubRoutines.hpp"
    43 #include "utilities/macros.hpp"
    44 #if INCLUDE_ALL_GCS
    45 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
    46 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
    47 #include "gc_implementation/g1/heapRegion.hpp"
    48 #endif // INCLUDE_ALL_GCS
    50 // Implementation of MacroAssembler
    52 intptr_t MacroAssembler::i[32] = {0};
    53 float MacroAssembler::f[32] = {0.0};
    55 void MacroAssembler::print(outputStream *s) {
    56   unsigned int k;
    57   for(k=0; k<sizeof(i)/sizeof(i[0]); k++) {
    58     s->print_cr("i%d = 0x%.16lx", k, i[k]);
    59   }
    60   s->cr();
    62   for(k=0; k<sizeof(f)/sizeof(f[0]); k++) {
    63     s->print_cr("f%d = %f", k, f[k]);
    64   }
    65   s->cr();
    66 }
    68 int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; }
    69 int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; }
    71 void MacroAssembler::save_registers(MacroAssembler *masm) {
    72 #define __ masm->
    73   for(int k=0; k<32; k++) {
    74     __ sw (as_Register(k), A0, i_offset(k));
    75   }
    77   for(int k=0; k<32; k++) {
    78     __ swc1 (as_FloatRegister(k), A0, f_offset(k));
    79   }
    80 #undef __
    81 }
    83 void MacroAssembler::restore_registers(MacroAssembler *masm) {
    84 #define __ masm->
    85   for(int k=0; k<32; k++) {
    86     __ lw (as_Register(k), A0, i_offset(k));
    87   }
    89   for(int k=0; k<32; k++) {
    90     __ lwc1 (as_FloatRegister(k), A0, f_offset(k));
    91   }
    92 #undef __
    93 }
    96 void MacroAssembler::pd_patch_instruction(address branch, address target) {
    97   jint& stub_inst = *(jint*) branch;
    98   jint *pc = (jint *)branch;
   100 /* *
   101   move(AT, RA); // dadd
   102   emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
   103   nop();
   104         lui(T9, 0); // to be patched
   105         ori(T9, 0);
   106   daddu(T9, T9, RA);
   107   move(RA, AT);
   108   jr(T9);
   109  */
   110   if((opcode(stub_inst) == special_op) && (special(stub_inst) == dadd_op)) {
   112     assert(opcode(pc[3]) == lui_op
   113           && opcode(pc[4]) == ori_op
   114           && special(pc[5]) == daddu_op, "Not a branch label patch");
   115     if(!(opcode(pc[3]) == lui_op
   116           && opcode(pc[4]) == ori_op
   117           && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); }
   119     int offset = target - branch;
   120     if (!is_simm16(offset)) {
   121       pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12);
   122       pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12);
   123     } else {
   124       /* revert to "beq + nop" */
   125       CodeBuffer cb(branch, 4 * 10);
   126       MacroAssembler masm(&cb);
   127 #define __ masm.
   128       __ b(target);
   129       __ nop();
   130       __ nop();
   131       __ nop();
   132       __ nop();
   133       __ nop();
   134       __ nop();
   135       __ nop();
   136     }
   137     return;
   138   } else if (special(pc[4]) == jr_op
   139              && opcode(pc[4]) == special_op
   140              && (((opcode(pc[0]) == lui_op) || opcode(pc[0]) == daddiu_op) || (opcode(pc[0]) == ori_op))) {
   142     CodeBuffer cb(branch, 4 * 4);
   143     MacroAssembler masm(&cb);
   144     masm.patchable_set48(T9, (long)(target));
   145     return;
   146   }
   148 #ifndef PRODUCT
   149   if (!is_simm16((target - branch - 4) >> 2)) {
   150     tty->print_cr("Illegal patching: target=0x%lx", target);
   151     int *p = (int *)branch;
   152     for (int i = -10; i < 10; i++) {
   153        tty->print("0x%lx, ", p[i]);
   154     }
   155     tty->print_cr("");
   156   }
   157 #endif
   159   stub_inst = patched_branch(target - branch, stub_inst, 0);
   160 }
   162 static inline address first_cache_address() {
   163   return CodeCache::low_bound() + sizeof(HeapBlock::Header);
   164 }
   166 static inline address last_cache_address() {
   167   return CodeCache::high_bound() - Assembler::InstructionSize;
   168 }
   170 int MacroAssembler::call_size(address target, bool far, bool patchable) {
   171   if (patchable) return 6 << Assembler::LogInstructionSize;
   172   if (!far) return 2 << Assembler::LogInstructionSize; // jal + nop
   173   return (insts_for_set64((jlong)target) + 2) << Assembler::LogInstructionSize;
   174 }
   176 // Can we reach target using jal/j from anywhere
   177 // in the code cache (because code can be relocated)?
   178 bool MacroAssembler::reachable_from_cache(address target) {
   179   address cl = first_cache_address();
   180   address ch = last_cache_address();
   182   return fit_in_jal(target, cl) && fit_in_jal(target, ch);
   183 }
   185 void MacroAssembler::general_jump(address target) {
   186   if (reachable_from_cache(target)) {
   187     j(target);
   188     nop();
   189   } else {
   190     set64(T9, (long)target);
   191     jr(T9);
   192     nop();
   193   }
   194 }
   196 int MacroAssembler::insts_for_general_jump(address target) {
   197   if (reachable_from_cache(target)) {
   198     //j(target);
   199     //nop();
   200     return 2;
   201   } else {
   202     //set64(T9, (long)target);
   203     //jr(T9);
   204     //nop();
   205     return insts_for_set64((jlong)target) + 2;
   206   }
   207 }
   209 void MacroAssembler::patchable_jump(address target) {
   210   if (reachable_from_cache(target)) {
   211     nop();
   212     nop();
   213     nop();
   214     nop();
   215     j(target);
   216     nop();
   217   } else {
   218     patchable_set48(T9, (long)target);
   219     jr(T9);
   220     nop();
   221   }
   222 }
   224 int MacroAssembler::insts_for_patchable_jump(address target) {
   225   return 6;
   226 }
   228 void MacroAssembler::general_call(address target) {
   229   if (reachable_from_cache(target)) {
   230     jal(target);
   231     nop();
   232   } else {
   233     set64(T9, (long)target);
   234     jalr(T9);
   235     nop();
   236   }
   237 }
   239 int MacroAssembler::insts_for_general_call(address target) {
   240   if (reachable_from_cache(target)) {
   241     //jal(target);
   242     //nop();
   243     return 2;
   244   } else {
   245     //set64(T9, (long)target);
   246     //jalr(T9);
   247     //nop();
   248     return insts_for_set64((jlong)target) + 2;
   249   }
   250 }
   252 void MacroAssembler::patchable_call(address target) {
   253   if (reachable_from_cache(target)) {
   254     nop();
   255     nop();
   256     nop();
   257     nop();
   258     jal(target);
   259     nop();
   260   } else {
   261     patchable_set48(T9, (long)target);
   262     jalr(T9);
   263     nop();
   264   }
   265 }
   267 int MacroAssembler::insts_for_patchable_call(address target) {
   268   return 6;
   269 }
   271 void MacroAssembler::beq_far(Register rs, Register rt, address entry) {
   272   u_char * cur_pc = pc();
   274   /* Jin: Near/Far jump */
   275   if(is_simm16((entry - pc() - 4) / 4)) {
   276     Assembler::beq(rs, rt, offset(entry));
   277   } else {
   278     Label not_jump;
   279     bne(rs, rt, not_jump);
   280     delayed()->nop();
   282     b_far(entry);
   283     delayed()->nop();
   285     bind(not_jump);
   286     has_delay_slot();
   287   }
   288 }
   290 void MacroAssembler::beq_far(Register rs, Register rt, Label& L) {
   291   if (L.is_bound()) {
   292     beq_far(rs, rt, target(L));
   293   } else {
   294     u_char * cur_pc = pc();
   295     Label not_jump;
   296     bne(rs, rt, not_jump);
   297     delayed()->nop();
   299     b_far(L);
   300     delayed()->nop();
   302     bind(not_jump);
   303     has_delay_slot();
   304   }
   305 }
   307 void MacroAssembler::bne_far(Register rs, Register rt, address entry) {
   308   u_char * cur_pc = pc();
   310   /* Jin: Near/Far jump */
   311   if(is_simm16((entry - pc() - 4) / 4)) {
   312     Assembler::bne(rs, rt, offset(entry));
   313   } else {
   314     Label not_jump;
   315     beq(rs, rt, not_jump);
   316     delayed()->nop();
   318     b_far(entry);
   319     delayed()->nop();
   321     bind(not_jump);
   322     has_delay_slot();
   323   }
   324 }
   326 void MacroAssembler::bne_far(Register rs, Register rt, Label& L) {
   327   if (L.is_bound()) {
   328     bne_far(rs, rt, target(L));
   329   } else {
   330     u_char * cur_pc = pc();
   331     Label not_jump;
   332     beq(rs, rt, not_jump);
   333     delayed()->nop();
   335     b_far(L);
   336     delayed()->nop();
   338     bind(not_jump);
   339     has_delay_slot();
   340   }
   341 }
   343 void MacroAssembler::beq_long(Register rs, Register rt, Label& L) {
   344   Label not_taken;
   346   bne(rs, rt, not_taken);
   347   nop();
   349   jmp_far(L);
   351   bind(not_taken);
   352 }
   354 void MacroAssembler::bne_long(Register rs, Register rt, Label& L) {
   355   Label not_taken;
   357   beq(rs, rt, not_taken);
   358   nop();
   360   jmp_far(L);
   362   bind(not_taken);
   363 }
   365 void MacroAssembler::bc1t_long(Label& L) {
   366   Label not_taken;
   368   bc1f(not_taken);
   369   nop();
   371   jmp_far(L);
   373   bind(not_taken);
   374 }
   376 void MacroAssembler::bc1f_long(Label& L) {
   377   Label not_taken;
   379   bc1t(not_taken);
   380   nop();
   382   jmp_far(L);
   384   bind(not_taken);
   385 }
   387 void MacroAssembler::b_far(Label& L) {
   388   if (L.is_bound()) {
   389     b_far(target(L));
   390   } else {
   391     volatile address dest = target(L);
   392 /*
   393 MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8
   394    0x00000055651ed514: dadd at, ra, zero
   395    0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520
   397    0x00000055651ed51c: sll zero, zero, 0
   398    0x00000055651ed520: lui t9, 0x0
   399    0x00000055651ed524: ori t9, t9, 0x21b8
   400    0x00000055651ed528: daddu t9, t9, ra
   401    0x00000055651ed52c: dadd ra, at, zero
   402    0x00000055651ed530: jr t9
   403    0x00000055651ed534: sll zero, zero, 0
   404 */
   405     move(AT, RA);
   406     emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
   407     nop();
   408     lui(T9, 0); // to be patched
   409     ori(T9, T9, 0);
   410     daddu(T9, T9, RA);
   411     move(RA, AT);
   412     jr(T9);
   413   }
   414 }
   416 void MacroAssembler::b_far(address entry) {
   417   u_char * cur_pc = pc();
   419   /* Jin: Near/Far jump */
   420   if(is_simm16((entry - pc() - 4) / 4)) {
   421     b(offset(entry));
   422   } else {
   423     /* address must be bounded */
   424     move(AT, RA);
   425     emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
   426     nop();
   427     li32(T9, entry - pc());
   428     daddu(T9, T9, RA);
   429     move(RA, AT);
   430     jr(T9);
   431   }
   432 }
   434 void MacroAssembler::ld_ptr(Register rt, Register offset, Register base) {
   435   addu_long(AT, base, offset);
   436   ld_ptr(rt, 0, AT);
   437 }
   439 void MacroAssembler::st_ptr(Register rt, Register offset, Register base) {
   440   addu_long(AT, base, offset);
   441   st_ptr(rt, 0, AT);
   442 }
   444 void MacroAssembler::ld_long(Register rt, Register offset, Register base) {
   445   addu_long(AT, base, offset);
   446   ld_long(rt, 0, AT);
   447 }
   449 void MacroAssembler::st_long(Register rt, Register offset, Register base) {
   450   addu_long(AT, base, offset);
   451   st_long(rt, 0, AT);
   452 }
   454 Address MacroAssembler::as_Address(AddressLiteral adr) {
   455   return Address(adr.target(), adr.rspec());
   456 }
   458 Address MacroAssembler::as_Address(ArrayAddress adr) {
   459   return Address::make_array(adr);
   460 }
   462 // tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved).
   463 void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) {
   464   Label again;
   466   li(tmp_reg1, counter_addr);
   467   bind(again);
   468   if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
   469   ll(tmp_reg2, tmp_reg1, 0);
   470   addi(tmp_reg2, tmp_reg2, inc);
   471   sc(tmp_reg2, tmp_reg1, 0);
   472   beq(tmp_reg2, R0, again);
   473   delayed()->nop();
   474 }
   476 int MacroAssembler::biased_locking_enter(Register lock_reg,
   477                                          Register obj_reg,
   478                                          Register swap_reg,
   479                                          Register tmp_reg,
   480                                          bool swap_reg_contains_mark,
   481                                          Label& done,
   482                                          Label* slow_case,
   483                                          BiasedLockingCounters* counters) {
   484   assert(UseBiasedLocking, "why call this otherwise?");
   485   bool need_tmp_reg = false;
   486   if (tmp_reg == noreg) {
   487     need_tmp_reg = true;
   488     tmp_reg = T9;
   489   }
   490   assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT);
   491   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
   492   Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
   493   Address saved_mark_addr(lock_reg, 0);
   495   // Biased locking
   496   // See whether the lock is currently biased toward our thread and
   497   // whether the epoch is still valid
   498   // Note that the runtime guarantees sufficient alignment of JavaThread
   499   // pointers to allow age to be placed into low bits
   500   // First check to see whether biasing is even enabled for this object
   501   Label cas_label;
   502   int null_check_offset = -1;
   503   if (!swap_reg_contains_mark) {
   504     null_check_offset = offset();
   505     ld_ptr(swap_reg, mark_addr);
   506   }
   508   if (need_tmp_reg) {
   509     push(tmp_reg);
   510   }
   511   move(tmp_reg, swap_reg);
   512   andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
   513 #ifdef _LP64
   514   daddi(AT, R0, markOopDesc::biased_lock_pattern);
   515   dsub(AT, AT, tmp_reg);
   516 #else
   517   addi(AT, R0, markOopDesc::biased_lock_pattern);
   518   sub(AT, AT, tmp_reg);
   519 #endif
   520   if (need_tmp_reg) {
   521     pop(tmp_reg);
   522   }
   524   bne(AT, R0, cas_label);
   525   delayed()->nop();
   528   // The bias pattern is present in the object's header. Need to check
   529   // whether the bias owner and the epoch are both still current.
   530   // Note that because there is no current thread register on MIPS we
   531   // need to store off the mark word we read out of the object to
   532   // avoid reloading it and needing to recheck invariants below. This
   533   // store is unfortunate but it makes the overall code shorter and
   534   // simpler.
   535   st_ptr(swap_reg, saved_mark_addr);
   536   if (need_tmp_reg) {
   537     push(tmp_reg);
   538   }
   539   if (swap_reg_contains_mark) {
   540     null_check_offset = offset();
   541   }
   542   load_prototype_header(tmp_reg, obj_reg);
   543   xorr(tmp_reg, tmp_reg, swap_reg);
   544   get_thread(swap_reg);
   545   xorr(swap_reg, swap_reg, tmp_reg);
   547   move(AT, ~((int) markOopDesc::age_mask_in_place));
   548   andr(swap_reg, swap_reg, AT);
   550   if (PrintBiasedLockingStatistics) {
   551     Label L;
   552     bne(swap_reg, R0, L);
   553     delayed()->nop();
   554     push(tmp_reg);
   555     push(A0);
   556     atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg);
   557     pop(A0);
   558     pop(tmp_reg);
   559     bind(L);
   560   }
   561   if (need_tmp_reg) {
   562     pop(tmp_reg);
   563   }
   564   beq(swap_reg, R0, done);
   565   delayed()->nop();
   566   Label try_revoke_bias;
   567   Label try_rebias;
   569   // At this point we know that the header has the bias pattern and
   570   // that we are not the bias owner in the current epoch. We need to
   571   // figure out more details about the state of the header in order to
   572   // know what operations can be legally performed on the object's
   573   // header.
   575   // If the low three bits in the xor result aren't clear, that means
   576   // the prototype header is no longer biased and we have to revoke
   577   // the bias on this object.
   579   move(AT, markOopDesc::biased_lock_mask_in_place);
   580   andr(AT, swap_reg, AT);
   581   bne(AT, R0, try_revoke_bias);
   582   delayed()->nop();
   583   // Biasing is still enabled for this data type. See whether the
   584   // epoch of the current bias is still valid, meaning that the epoch
   585   // bits of the mark word are equal to the epoch bits of the
   586   // prototype header. (Note that the prototype header's epoch bits
   587   // only change at a safepoint.) If not, attempt to rebias the object
   588   // toward the current thread. Note that we must be absolutely sure
   589   // that the current epoch is invalid in order to do this because
   590   // otherwise the manipulations it performs on the mark word are
   591   // illegal.
   593   move(AT, markOopDesc::epoch_mask_in_place);
   594   andr(AT,swap_reg, AT);
   595   bne(AT, R0, try_rebias);
   596   delayed()->nop();
   597   // The epoch of the current bias is still valid but we know nothing
   598   // about the owner; it might be set or it might be clear. Try to
   599   // acquire the bias of the object using an atomic operation. If this
   600   // fails we will go in to the runtime to revoke the object's bias.
   601   // Note that we first construct the presumed unbiased header so we
   602   // don't accidentally blow away another thread's valid bias.
   604   ld_ptr(swap_reg, saved_mark_addr);
   606   move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
   607   andr(swap_reg, swap_reg, AT);
   609   if (need_tmp_reg) {
   610     push(tmp_reg);
   611   }
   612   get_thread(tmp_reg);
   613   orr(tmp_reg, tmp_reg, swap_reg);
   614   //if (os::is_MP()) {
   615   //  sync();
   616   //}
   617   cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
   618   if (need_tmp_reg) {
   619     pop(tmp_reg);
   620   }
   621   // If the biasing toward our thread failed, this means that
   622   // another thread succeeded in biasing it toward itself and we
   623   // need to revoke that bias. The revocation will occur in the
   624   // interpreter runtime in the slow case.
   625   if (PrintBiasedLockingStatistics) {
   626     Label L;
   627     bne(AT, R0, L);
   628     delayed()->nop();
   629     push(tmp_reg);
   630     push(A0);
   631     atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg);
   632     pop(A0);
   633     pop(tmp_reg);
   634     bind(L);
   635   }
   636   if (slow_case != NULL) {
   637     beq_far(AT, R0, *slow_case);
   638     delayed()->nop();
   639   }
   640   b(done);
   641   delayed()->nop();
   643   bind(try_rebias);
   644   // At this point we know the epoch has expired, meaning that the
   645   // current "bias owner", if any, is actually invalid. Under these
   646   // circumstances _only_, we are allowed to use the current header's
   647   // value as the comparison value when doing the cas to acquire the
   648   // bias in the current epoch. In other words, we allow transfer of
   649   // the bias from one thread to another directly in this situation.
   650   //
   651   // FIXME: due to a lack of registers we currently blow away the age
   652   // bits in this situation. Should attempt to preserve them.
   653   if (need_tmp_reg) {
   654     push(tmp_reg);
   655   }
   656   load_prototype_header(tmp_reg, obj_reg);
   657   get_thread(swap_reg);
   658   orr(tmp_reg, tmp_reg, swap_reg);
   659   ld_ptr(swap_reg, saved_mark_addr);
   661   //if (os::is_MP()) {
   662   //  sync();
   663   //}
   664   cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
   665   if (need_tmp_reg) {
   666     pop(tmp_reg);
   667   }
   668   // If the biasing toward our thread failed, then another thread
   669   // succeeded in biasing it toward itself and we need to revoke that
   670   // bias. The revocation will occur in the runtime in the slow case.
   671   if (PrintBiasedLockingStatistics) {
   672     Label L;
   673     bne(AT, R0, L);
   674     delayed()->nop();
   675     push(AT);
   676     push(tmp_reg);
   677     atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg);
   678     pop(tmp_reg);
   679     pop(AT);
   680     bind(L);
   681   }
   682   if (slow_case != NULL) {
   683     beq_far(AT, R0, *slow_case);
   684     delayed()->nop();
   685   }
   687   b(done);
   688   delayed()->nop();
   689   bind(try_revoke_bias);
   690   // The prototype mark in the klass doesn't have the bias bit set any
   691   // more, indicating that objects of this data type are not supposed
   692   // to be biased any more. We are going to try to reset the mark of
   693   // this object to the prototype value and fall through to the
   694   // CAS-based locking scheme. Note that if our CAS fails, it means
   695   // that another thread raced us for the privilege of revoking the
   696   // bias of this particular object, so it's okay to continue in the
   697   // normal locking code.
   698   //
   699   // FIXME: due to a lack of registers we currently blow away the age
   700   // bits in this situation. Should attempt to preserve them.
   701   ld_ptr(swap_reg, saved_mark_addr);
   703   if (need_tmp_reg) {
   704     push(tmp_reg);
   705   }
   706   load_prototype_header(tmp_reg, obj_reg);
   707   //if (os::is_MP()) {
   708   // lock();
   709   //}
   710   cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
   711   if (need_tmp_reg) {
   712     pop(tmp_reg);
   713   }
   714   // Fall through to the normal CAS-based lock, because no matter what
   715   // the result of the above CAS, some thread must have succeeded in
   716   // removing the bias bit from the object's header.
   717   if (PrintBiasedLockingStatistics) {
   718     Label L;
   719     bne(AT, R0, L);
   720     delayed()->nop();
   721     push(AT);
   722     push(tmp_reg);
   723     atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg);
   724     pop(tmp_reg);
   725     pop(AT);
   726     bind(L);
   727   }
   729   bind(cas_label);
   730   return null_check_offset;
   731 }
   733 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
   734   assert(UseBiasedLocking, "why call this otherwise?");
   736   // Check for biased locking unlock case, which is a no-op
   737   // Note: we do not have to check the thread ID for two reasons.
   738   // First, the interpreter checks for IllegalMonitorStateException at
   739   // a higher level. Second, if the bias was revoked while we held the
   740   // lock, the object could not be rebiased toward another thread, so
   741   // the bias bit would be clear.
   742 #ifdef _LP64
   743   ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
   744   andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
   745   daddi(AT, R0, markOopDesc::biased_lock_pattern);
   746 #else
   747   lw(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
   748   andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
   749   addi(AT, R0, markOopDesc::biased_lock_pattern);
   750 #endif
   752   beq(AT, temp_reg, done);
   753   delayed()->nop();
   754 }
   756 // the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf
   757 // this method will handle the stack problem, you need not to preserve the stack space for the argument now
   758 void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) {
   759   Label L, E;
   761   assert(number_of_arguments <= 4, "just check");
   763   andi(AT, SP, 0xf);
   764   beq(AT, R0, L);
   765   delayed()->nop();
   766   daddi(SP, SP, -8);
   767   call(entry_point, relocInfo::runtime_call_type);
   768   delayed()->nop();
   769   daddi(SP, SP, 8);
   770   b(E);
   771   delayed()->nop();
   773   bind(L);
   774   call(entry_point, relocInfo::runtime_call_type);
   775   delayed()->nop();
   776   bind(E);
   777 }
   780 void MacroAssembler::jmp(address entry) {
   781   patchable_set48(T9, (long)entry);
   782   jr(T9);
   783 }
   785 void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) {
   786   switch (rtype) {
   787     case relocInfo::runtime_call_type:
   788     case relocInfo::none:
   789       jmp(entry);
   790       break;
   791     default:
   792       {
   793       InstructionMark im(this);
   794       relocate(rtype);
   795       patchable_set48(T9, (long)entry);
   796       jr(T9);
   797       }
   798       break;
   799   }
   800 }
   802 void MacroAssembler::jmp_far(Label& L) {
   803   if (L.is_bound()) {
   804     address entry = target(L);
   805     assert(entry != NULL, "jmp most probably wrong");
   806     InstructionMark im(this);
   808     relocate(relocInfo::internal_word_type);
   809     patchable_set48(T9, (long)entry);
   810   } else {
   811     InstructionMark im(this);
   812     L.add_patch_at(code(), locator());
   814     relocate(relocInfo::internal_word_type);
   815     patchable_set48(T9, (long)pc());
   816   }
   818   jr(T9);
   819   nop();
   820 }
   822 void MacroAssembler::call(address entry) {
   823 // c/c++ code assume T9 is entry point, so we just always move entry to t9
   824 // maybe there is some more graceful method to handle this. FIXME
   825 // For more info, see class NativeCall.
   826 #ifndef _LP64
   827   move(T9, (int)entry);
   828 #else
   829   patchable_set48(T9, (long)entry);
   830 #endif
   831   jalr(T9);
   832 }
   834 void MacroAssembler::call(address entry, relocInfo::relocType rtype) {
   835   switch (rtype) {
   836     case relocInfo::runtime_call_type:
   837     case relocInfo::none:
   838       call(entry);
   839       break;
   840     default:
   841       {
   842   InstructionMark im(this);
   843   relocate(rtype);
   844   call(entry);
   845       }
   846       break;
   847   }
   848 }
   850 void MacroAssembler::call(address entry, RelocationHolder& rh)
   851 {
   852   switch (rh.type()) {
   853     case relocInfo::runtime_call_type:
   854     case relocInfo::none:
   855       call(entry);
   856       break;
   857     default:
   858       {
   859   InstructionMark im(this);
   860   relocate(rh);
   861   call(entry);
   862       }
   863       break;
   864   }
   865 }
   867 void MacroAssembler::ic_call(address entry) {
   868   RelocationHolder rh = virtual_call_Relocation::spec(pc());
   869   patchable_set48(IC_Klass, (long)Universe::non_oop_word());
   870   assert(entry != NULL, "call most probably wrong");
   871   InstructionMark im(this);
   872   relocate(rh);
   873         patchable_call(entry);
   874 }
   876 void MacroAssembler::c2bool(Register r) {
   877   Label L;
   878   Assembler::beq(r, R0, L);
   879   delayed()->nop();
   880   move(r, 1);
   881   bind(L);
   882 }
   884 #ifndef PRODUCT
   885 extern "C" void findpc(intptr_t x);
   886 #endif
   888 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
   889   // In order to get locks to work, we need to fake a in_VM state
   890   JavaThread* thread = JavaThread::current();
   891   JavaThreadState saved_state = thread->thread_state();
   892   thread->set_thread_state(_thread_in_vm);
   893   if (ShowMessageBoxOnError) {
   894     JavaThread* thread = JavaThread::current();
   895     JavaThreadState saved_state = thread->thread_state();
   896     thread->set_thread_state(_thread_in_vm);
   897     if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
   898       ttyLocker ttyl;
   899       BytecodeCounter::print();
   900     }
   901     // To see where a verify_oop failed, get $ebx+40/X for this frame.
   902     // This is the value of eip which points to where verify_oop will return.
   903     if (os::message_box(msg, "Execution stopped, print registers?")) {
   904       ttyLocker ttyl;
   905       tty->print_cr("eip = 0x%08x", eip);
   906 #ifndef PRODUCT
   907       tty->cr();
   908       findpc(eip);
   909       tty->cr();
   910 #endif
   911       tty->print_cr("rax, = 0x%08x", rax);
   912       tty->print_cr("rbx, = 0x%08x", rbx);
   913       tty->print_cr("rcx = 0x%08x", rcx);
   914       tty->print_cr("rdx = 0x%08x", rdx);
   915       tty->print_cr("rdi = 0x%08x", rdi);
   916       tty->print_cr("rsi = 0x%08x", rsi);
   917       tty->print_cr("rbp, = 0x%08x", rbp);
   918       tty->print_cr("rsp = 0x%08x", rsp);
   919       BREAKPOINT;
   920     }
   921   } else {
   922     ttyLocker ttyl;
   923     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
   924     assert(false, "DEBUG MESSAGE");
   925   }
   926   ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
   927 }
   929 void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) {
   930   if ( ShowMessageBoxOnError ) {
   931     JavaThreadState saved_state = JavaThread::current()->thread_state();
   932     JavaThread::current()->set_thread_state(_thread_in_vm);
   933     {
   934       // In order to get locks work, we need to fake a in_VM state
   935       ttyLocker ttyl;
   936       ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
   937       if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
   938   BytecodeCounter::print();
   939       }
   941       //      if (os::message_box(msg, "Execution stopped, print registers?"))
   942       //        regs->print(::tty);
   943     }
   944     ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
   945   }
   946   else
   947     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
   948 }
   951 void MacroAssembler::stop(const char* msg) {
   952   li(A0, (long)msg);
   953 #ifndef _LP64
   954   //reserver space for argument. added by yjl 7/10/2005
   955   addiu(SP, SP, - 1 * wordSize);
   956 #endif
   957   call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
   958   delayed()->nop();
   959 #ifndef _LP64
   960   //restore space for argument
   961   addiu(SP, SP, 1 * wordSize);
   962 #endif
   963   brk(17);
   964 }
   966 void MacroAssembler::warn(const char* msg) {
   967 #ifdef _LP64
   968   pushad();
   969   li(A0, (long)msg);
   970   push(S2);
   971   move(AT, -(StackAlignmentInBytes));
   972   move(S2, SP);     // use S2 as a sender SP holder
   973   andr(SP, SP, AT); // align stack as required by ABI
   974   call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
   975   delayed()->nop();
   976   move(SP, S2);     // use S2 as a sender SP holder
   977   pop(S2);
   978   popad();
   979 #else
   980   pushad();
   981   addi(SP, SP, -4);
   982   sw(A0, SP, -1 * wordSize);
   983   li(A0, (long)msg);
   984   addi(SP, SP, -1 * wordSize);
   985   call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
   986   delayed()->nop();
   987   addi(SP, SP, 1 * wordSize);
   988   lw(A0, SP, -1 * wordSize);
   989   addi(SP, SP, 4);
   990   popad();
   991 #endif
   992 }
   994 void MacroAssembler::print_reg(Register reg) {
   995 /*
   996 char *s = getenv("PRINT_REG");
   997 if (s == NULL)
   998   return;
   999 if (strcmp(s, "1") != 0)
  1000   return;
  1001 */
  1002   void * cur_pc = pc();
  1003   pushad();
  1004   NOT_LP64(push(FP);)
  1006   li(A0, (long)reg->name());
  1007   if (reg == SP)
  1008     addiu(A1, SP, wordSize * 23); //23 registers saved in pushad()
  1009   else if (reg == A0)
  1010     ld(A1, SP, wordSize * 19); //A0 has been modified by li(A0, (long)reg->name()). Ugly Code!
  1011   else
  1012     move(A1, reg);
  1013   li(A2, (long)cur_pc);
  1014   push(S2);
  1015   move(AT, -(StackAlignmentInBytes));
  1016   move(S2, SP);     // use S2 as a sender SP holder
  1017   andr(SP, SP, AT); // align stack as required by ABI
  1018   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_reg_with_pc),relocInfo::runtime_call_type);
  1019   delayed()->nop();
  1020   move(SP, S2);     // use S2 as a sender SP holder
  1021   pop(S2);
  1022   NOT_LP64(pop(FP);)
  1023   popad();
  1025 /*
  1026   pushad();
  1027 #ifdef _LP64
  1028   if (reg == SP)
  1029     addiu(A0, SP, wordSize * 23); //23 registers saved in pushad()
  1030   else
  1031     move(A0, reg);
  1032   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type);
  1033   delayed()->nop();
  1034 #else
  1035   push(FP);
  1036   move(A0, reg);
  1037   dsrl32(A1, reg, 0);
  1038   //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_int),relocInfo::runtime_call_type);
  1039   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type);
  1040   delayed()->nop();
  1041   pop(FP);
  1042 #endif
  1043   popad();
  1044   pushad();
  1045   NOT_LP64(push(FP);)
  1046   char b[50];
  1047   sprintf((char *)b, " pc: %p\n",cur_pc);
  1048   li(A0, (long)(char *)b);
  1049   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
  1050   delayed()->nop();
  1051   NOT_LP64(pop(FP);)
  1052   popad();
  1053 */
  1056 void MacroAssembler::print_reg(FloatRegister reg) {
  1057   void * cur_pc = pc();
  1058   pushad();
  1059   NOT_LP64(push(FP);)
  1060   li(A0, (long)reg->name());
  1061   push(S2);
  1062   move(AT, -(StackAlignmentInBytes));
  1063   move(S2, SP);     // use S2 as a sender SP holder
  1064   andr(SP, SP, AT); // align stack as required by ABI
  1065   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
  1066   delayed()->nop();
  1067   move(SP, S2);     // use S2 as a sender SP holder
  1068   pop(S2);
  1069   NOT_LP64(pop(FP);)
  1070   popad();
  1072   pushad();
  1073   NOT_LP64(push(FP);)
  1074 #if 1
  1075   move(FP, SP);
  1076   move(AT, -(StackAlignmentInBytes));
  1077   andr(SP , SP , AT);
  1078   mov_d(F12, reg);
  1079   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_double),relocInfo::runtime_call_type);
  1080   delayed()->nop();
  1081   move(SP, FP);
  1082 #else
  1083   mov_s(F12, reg);
  1084   //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_float),relocInfo::runtime_call_type);
  1085   //delayed()->nop();
  1086 #endif
  1087   NOT_LP64(pop(FP);)
  1088   popad();
  1090 #if 0
  1091   pushad();
  1092   NOT_LP64(push(FP);)
  1093   char* b = new char[50];
  1094   sprintf(b, " pc: %p\n", cur_pc);
  1095   li(A0, (long)b);
  1096   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
  1097   delayed()->nop();
  1098   NOT_LP64(pop(FP);)
  1099   popad();
  1100 #endif
  1103 void MacroAssembler::increment(Register reg, int imm) {
  1104   if (!imm) return;
  1105   if (is_simm16(imm)) {
  1106 #ifdef _LP64
  1107     daddiu(reg, reg, imm);
  1108 #else
  1109     addiu(reg, reg, imm);
  1110 #endif
  1111   } else {
  1112     move(AT, imm);
  1113 #ifdef _LP64
  1114     daddu(reg, reg, AT);
  1115 #else
  1116     addu(reg, reg, AT);
  1117 #endif
  1121 void MacroAssembler::decrement(Register reg, int imm) {
  1122   increment(reg, -imm);
  1126 void MacroAssembler::call_VM(Register oop_result,
  1127                              address entry_point,
  1128                              bool check_exceptions) {
  1129   call_VM_helper(oop_result, entry_point, 0, check_exceptions);
  1132 void MacroAssembler::call_VM(Register oop_result,
  1133                              address entry_point,
  1134                              Register arg_1,
  1135                              bool check_exceptions) {
  1136   if (arg_1!=A1) move(A1, arg_1);
  1137   call_VM_helper(oop_result, entry_point, 1, check_exceptions);
  1140 void MacroAssembler::call_VM(Register oop_result,
  1141                              address entry_point,
  1142                              Register arg_1,
  1143                              Register arg_2,
  1144                              bool check_exceptions) {
  1145   if (arg_1!=A1) move(A1, arg_1);
  1146   if (arg_2!=A2) move(A2, arg_2);
  1147   assert(arg_2 != A1, "smashed argument");
  1148   call_VM_helper(oop_result, entry_point, 2, check_exceptions);
  1151 void MacroAssembler::call_VM(Register oop_result,
  1152                              address entry_point,
  1153                              Register arg_1,
  1154                              Register arg_2,
  1155                              Register arg_3,
  1156                              bool check_exceptions) {
  1157   if (arg_1!=A1) move(A1, arg_1);
  1158   if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
  1159   if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
  1160   call_VM_helper(oop_result, entry_point, 3, check_exceptions);
  1163 void MacroAssembler::call_VM(Register oop_result,
  1164                              Register last_java_sp,
  1165                              address entry_point,
  1166                              int number_of_arguments,
  1167                              bool check_exceptions) {
  1168   call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions);
  1171 void MacroAssembler::call_VM(Register oop_result,
  1172                              Register last_java_sp,
  1173                              address entry_point,
  1174                              Register arg_1,
  1175                              bool check_exceptions) {
  1176   if (arg_1 != A1) move(A1, arg_1);
  1177   call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
  1180 void MacroAssembler::call_VM(Register oop_result,
  1181                              Register last_java_sp,
  1182                              address entry_point,
  1183                              Register arg_1,
  1184                              Register arg_2,
  1185                              bool check_exceptions) {
  1186   if (arg_1 != A1) move(A1, arg_1);
  1187   if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
  1188   call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
  1191 void MacroAssembler::call_VM(Register oop_result,
  1192                              Register last_java_sp,
  1193                              address entry_point,
  1194                              Register arg_1,
  1195                              Register arg_2,
  1196                              Register arg_3,
  1197                              bool check_exceptions) {
  1198   if (arg_1 != A1) move(A1, arg_1);
  1199   if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
  1200   if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
  1201   call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
  1204 void MacroAssembler::call_VM_base(Register oop_result,
  1205                                   Register java_thread,
  1206                                   Register last_java_sp,
  1207                                   address  entry_point,
  1208                                   int      number_of_arguments,
  1209                                   bool     check_exceptions) {
  1211   address before_call_pc;
  1212   // determine java_thread register
  1213   if (!java_thread->is_valid()) {
  1214 #ifndef OPT_THREAD
  1215     java_thread = T2;
  1216     get_thread(java_thread);
  1217 #else
  1218     java_thread = TREG;
  1219 #endif
  1221   // determine last_java_sp register
  1222   if (!last_java_sp->is_valid()) {
  1223     last_java_sp = SP;
  1225   // debugging support
  1226   assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
  1227   assert(number_of_arguments <= 4   , "cannot have negative number of arguments");
  1228   assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
  1229   assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
  1231   assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save ebp");
  1233   // set last Java frame before call
  1234   before_call_pc = (address)pc();
  1235   set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc);
  1237   // do the call
  1238   move(A0, java_thread);
  1239   call(entry_point, relocInfo::runtime_call_type);
  1240   delayed()->nop();
  1241   //MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
  1243   // restore the thread (cannot use the pushed argument since arguments
  1244   // may be overwritten by C code generated by an optimizing compiler);
  1245   // however can use the register value directly if it is callee saved.
  1246 #ifndef OPT_THREAD
  1247   get_thread(java_thread);
  1248 #else
  1249 #ifdef ASSERT
  1251     Label L;
  1252     get_thread(AT);
  1253     beq(java_thread, AT, L);
  1254     delayed()->nop();
  1255     stop("MacroAssembler::call_VM_base: TREG not callee saved?");
  1256     bind(L);
  1258 #endif
  1259 #endif
  1261   // discard thread and arguments
  1262   ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
  1263   // reset last Java frame
  1264   reset_last_Java_frame(java_thread, false, true);
  1266   check_and_handle_popframe(java_thread);
  1267   check_and_handle_earlyret(java_thread);
  1268   if (check_exceptions) {
  1269     // check for pending exceptions (java_thread is set upon return)
  1270     Label L;
  1271 #ifdef _LP64
  1272     ld(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
  1273 #else
  1274     lw(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
  1275 #endif
  1276     beq(AT, R0, L);
  1277     delayed()->nop();
  1278     li(AT, before_call_pc);
  1279     push(AT);
  1280     jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
  1281     delayed()->nop();
  1282     bind(L);
  1285   // get oop result if there is one and reset the value in the thread
  1286   if (oop_result->is_valid()) {
  1287 #ifdef _LP64
  1288     ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
  1289     sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
  1290 #else
  1291     lw(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
  1292     sw(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
  1293 #endif
  1294     verify_oop(oop_result);
  1298 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
  1300   move(V0, SP);
  1301   //we also reserve space for java_thread here
  1302 #ifndef _LP64
  1303   daddi(SP, SP, (1 + number_of_arguments) * (- wordSize));
  1304 #endif
  1305   move(AT, -(StackAlignmentInBytes));
  1306   andr(SP, SP, AT);
  1307   call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions);
  1311 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
  1312   call_VM_leaf_base(entry_point, number_of_arguments);
  1315 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
  1316   if (arg_0 != A0) move(A0, arg_0);
  1317   call_VM_leaf(entry_point, 1);
  1320 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
  1321   if (arg_0 != A0) move(A0, arg_0);
  1322   if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
  1323   call_VM_leaf(entry_point, 2);
  1326 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
  1327   if (arg_0 != A0) move(A0, arg_0);
  1328   if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
  1329   if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument");
  1330   call_VM_leaf(entry_point, 3);
  1332 void MacroAssembler::super_call_VM_leaf(address entry_point) {
  1333   MacroAssembler::call_VM_leaf_base(entry_point, 0);
  1337 void MacroAssembler::super_call_VM_leaf(address entry_point,
  1338                                                    Register arg_1) {
  1339   if (arg_1 != A0) move(A0, arg_1);
  1340   MacroAssembler::call_VM_leaf_base(entry_point, 1);
  1344 void MacroAssembler::super_call_VM_leaf(address entry_point,
  1345                                                    Register arg_1,
  1346                                                    Register arg_2) {
  1347   if (arg_1 != A0) move(A0, arg_1);
  1348   if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
  1349   MacroAssembler::call_VM_leaf_base(entry_point, 2);
  1351 void MacroAssembler::super_call_VM_leaf(address entry_point,
  1352                                                    Register arg_1,
  1353                                                    Register arg_2,
  1354                                                    Register arg_3) {
  1355   if (arg_1 != A0) move(A0, arg_1);
  1356   if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
  1357   if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument");
  1358   MacroAssembler::call_VM_leaf_base(entry_point, 3);
  1361 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
  1364 void MacroAssembler::check_and_handle_popframe(Register java_thread) {
  1367 void MacroAssembler::null_check(Register reg, int offset) {
  1368   if (needs_explicit_null_check(offset)) {
  1369     // provoke OS NULL exception if reg = NULL by
  1370     // accessing M[reg] w/o changing any (non-CC) registers
  1371     // NOTE: cmpl is plenty here to provoke a segv
  1372     lw(AT, reg, 0);
  1373     // Note: should probably use testl(rax, Address(reg, 0));
  1374     //       may be shorter code (however, this version of
  1375     //       testl needs to be implemented first)
  1376   } else {
  1377     // nothing to do, (later) access of M[reg + offset]
  1378     // will provoke OS NULL exception if reg = NULL
  1382 void MacroAssembler::enter() {
  1383   push2(RA, FP);
  1384   move(FP, SP);
  1387 void MacroAssembler::leave() {
  1388 #ifndef _LP64
  1389   //move(SP, FP);
  1390   //pop2(FP, RA);
  1391   addi(SP, FP, 2 * wordSize);
  1392   lw(RA, SP, - 1 * wordSize);
  1393   lw(FP, SP, - 2 * wordSize);
  1394 #else
  1395   daddi(SP, FP, 2 * wordSize);
  1396   ld(RA, SP, - 1 * wordSize);
  1397   ld(FP, SP, - 2 * wordSize);
  1398 #endif
  1400 /*
  1401 void MacroAssembler::os_breakpoint() {
  1402   // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
  1403   // (e.g., MSVC can't call ps() otherwise)
  1404   call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
  1406 */
  1407 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) {
  1408   // determine java_thread register
  1409   if (!java_thread->is_valid()) {
  1410 #ifndef OPT_THREAD
  1411     java_thread = T1;
  1412     get_thread(java_thread);
  1413 #else
  1414     java_thread = TREG;
  1415 #endif
  1417   // we must set sp to zero to clear frame
  1418   st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
  1419   // must clear fp, so that compiled frames are not confused; it is possible
  1420   // that we need it only for debugging
  1421   if(clear_fp)
  1422     st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
  1424   if (clear_pc)
  1425     st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
  1428 void MacroAssembler::reset_last_Java_frame(bool clear_fp,
  1429                                            bool clear_pc) {
  1430   Register thread = TREG;
  1431 #ifndef OPT_THREAD
  1432   get_thread(thread);
  1433 #endif
  1434   // we must set sp to zero to clear frame
  1435   sd(R0, Address(thread, JavaThread::last_Java_sp_offset()));
  1436   // must clear fp, so that compiled frames are not confused; it is
  1437   // possible that we need it only for debugging
  1438   if (clear_fp) {
  1439     sd(R0, Address(thread, JavaThread::last_Java_fp_offset()));
  1442   if (clear_pc) {
  1443     sd(R0, Address(thread, JavaThread::last_Java_pc_offset()));
  1447 // Write serialization page so VM thread can do a pseudo remote membar.
  1448 // We use the current thread pointer to calculate a thread specific
  1449 // offset to write to within the page. This minimizes bus traffic
  1450 // due to cache line collision.
  1451 void MacroAssembler::serialize_memory(Register thread, Register tmp) {
  1452   move(tmp, thread);
  1453   srl(tmp, tmp,os::get_serialize_page_shift_count());
  1454   move(AT, (os::vm_page_size() - sizeof(int)));
  1455   andr(tmp, tmp,AT);
  1456   sw(tmp,Address(tmp, (intptr_t)os::get_memory_serialize_page()));
  1459 // Calls to C land
  1460 //
  1461 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded
  1462 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
  1463 // has to be reset to 0. This is required to allow proper stack traversal.
  1464 void MacroAssembler::set_last_Java_frame(Register java_thread,
  1465                                          Register last_java_sp,
  1466                                          Register last_java_fp,
  1467                                          address  last_java_pc) {
  1468   // determine java_thread register
  1469   if (!java_thread->is_valid()) {
  1470 #ifndef OPT_THREAD
  1471     java_thread = T2;
  1472     get_thread(java_thread);
  1473 #else
  1474     java_thread = TREG;
  1475 #endif
  1477   // determine last_java_sp register
  1478   if (!last_java_sp->is_valid()) {
  1479     last_java_sp = SP;
  1482   // last_java_fp is optional
  1484   if (last_java_fp->is_valid()) {
  1485     st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
  1488   // last_java_pc is optional
  1490   if (last_java_pc != NULL) {
  1491     relocate(relocInfo::internal_pc_type);
  1492     patchable_set48(AT, (long)last_java_pc);
  1493     st_ptr(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
  1495   st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
  1498 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
  1499                                          Register last_java_fp,
  1500                                          address  last_java_pc) {
  1501   // determine last_java_sp register
  1502   if (!last_java_sp->is_valid()) {
  1503     last_java_sp = SP;
  1506   Register thread = TREG;
  1507 #ifndef OPT_THREAD
  1508   get_thread(thread);
  1509 #endif
  1510   // last_java_fp is optional
  1511   if (last_java_fp->is_valid()) {
  1512     sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset()));
  1515   // last_java_pc is optional
  1516   if (last_java_pc != NULL) {
  1517     Address java_pc(thread,
  1518                     JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
  1519     li(AT, (intptr_t)(last_java_pc));
  1520     sd(AT, java_pc);
  1523   sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset()));
  1526 //////////////////////////////////////////////////////////////////////////////////
  1527 #if INCLUDE_ALL_GCS
  1529 void MacroAssembler::g1_write_barrier_pre(Register obj,
  1530                                           Register pre_val,
  1531                                           Register thread,
  1532                                           Register tmp,
  1533                                           bool tosca_live,
  1534                                           bool expand_call) {
  1536   // If expand_call is true then we expand the call_VM_leaf macro
  1537   // directly to skip generating the check by
  1538   // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
  1540 #ifdef _LP64
  1541   assert(thread == TREG, "must be");
  1542 #endif // _LP64
  1544   Label done;
  1545   Label runtime;
  1547   assert(pre_val != noreg, "check this code");
  1549   if (obj != noreg) {
  1550     assert_different_registers(obj, pre_val, tmp);
  1551     assert(pre_val != V0, "check this code");
  1554   Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
  1555                                        PtrQueue::byte_offset_of_active()));
  1556   Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
  1557                                        PtrQueue::byte_offset_of_index()));
  1558   Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
  1559                                        PtrQueue::byte_offset_of_buf()));
  1562   // Is marking active?
  1563   if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
  1564     lw(AT, in_progress);
  1565   } else {
  1566     assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
  1567     lb(AT, in_progress);
  1569   beq(AT, R0, done);
  1570   nop();
  1572   // Do we need to load the previous value?
  1573   if (obj != noreg) {
  1574     load_heap_oop(pre_val, Address(obj, 0));
  1577   // Is the previous value null?
  1578   beq(pre_val, R0, done);
  1579   nop();
  1581   // Can we store original value in the thread's buffer?
  1582   // Is index == 0?
  1583   // (The index field is typed as size_t.)
  1585   ld(tmp, index);
  1586   beq(tmp, R0, runtime);
  1587   nop();
  1589   daddiu(tmp, tmp, -1 * wordSize);
  1590   sd(tmp, index);
  1591   ld(AT, buffer);
  1592   daddu(tmp, tmp, AT);
  1594   // Record the previous value
  1595   sd(pre_val, tmp, 0);
  1596   beq(R0, R0, done);
  1597   nop();
  1599   bind(runtime);
  1600   // save the live input values
  1601   if (tosca_live) push(V0);
  1603   if (obj != noreg && obj != V0) push(obj);
  1605   if (pre_val != V0) push(pre_val);
  1607   // Calling the runtime using the regular call_VM_leaf mechanism generates
  1608   // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
  1609   // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
  1610   //
  1611   // If we care generating the pre-barrier without a frame (e.g. in the
  1612   // intrinsified Reference.get() routine) then ebp might be pointing to
  1613   // the caller frame and so this check will most likely fail at runtime.
  1614   //
  1615   // Expanding the call directly bypasses the generation of the check.
  1616   // So when we do not have have a full interpreter frame on the stack
  1617   // expand_call should be passed true.
  1619   NOT_LP64( push(thread); )
  1621   if (expand_call) {
  1622     LP64_ONLY( assert(pre_val != A1, "smashed arg"); )
  1623     if (thread != A1) move(A1, thread);
  1624     if (pre_val != A0) move(A0, pre_val);
  1625     MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
  1626   } else {
  1627     call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
  1630   NOT_LP64( pop(thread); )
  1632   // save the live input values
  1633   if (pre_val != V0)
  1634     pop(pre_val);
  1636   if (obj != noreg && obj != V0)
  1637     pop(obj);
  1639   if(tosca_live) pop(V0);
  1641   bind(done);
  1644 void MacroAssembler::g1_write_barrier_post(Register store_addr,
  1645                                            Register new_val,
  1646                                            Register thread,
  1647                                            Register tmp,
  1648                                            Register tmp2) {
  1649   assert(tmp  != AT, "must be");
  1650   assert(tmp2 != AT, "must be");
  1651 #ifdef _LP64
  1652   assert(thread == TREG, "must be");
  1653 #endif // _LP64
  1655   Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
  1656                                        PtrQueue::byte_offset_of_index()));
  1657   Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
  1658                                        PtrQueue::byte_offset_of_buf()));
  1660   BarrierSet* bs = Universe::heap()->barrier_set();
  1661   CardTableModRefBS* ct = (CardTableModRefBS*)bs;
  1662   assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
  1664   Label done;
  1665   Label runtime;
  1667   // Does store cross heap regions?
  1668   xorr(AT, store_addr, new_val);
  1669   dsrl(AT, AT, HeapRegion::LogOfHRGrainBytes);
  1670   beq(AT, R0, done);
  1671   nop();
  1674   // crosses regions, storing NULL?
  1675   beq(new_val, R0, done);
  1676   nop();
  1678   // storing region crossing non-NULL, is card already dirty?
  1679   const Register card_addr = tmp;
  1680   const Register cardtable = tmp2;
  1682   move(card_addr, store_addr);
  1683   dsrl(card_addr, card_addr, CardTableModRefBS::card_shift);
  1684   // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
  1685   // a valid address and therefore is not properly handled by the relocation code.
  1686   set64(cardtable, (intptr_t)ct->byte_map_base);
  1687   daddu(card_addr, card_addr, cardtable);
  1689   lb(AT, card_addr, 0);
  1690   daddiu(AT, AT, -1 * (int)G1SATBCardTableModRefBS::g1_young_card_val());
  1691   beq(AT, R0, done);
  1692   nop();
  1694   sync();
  1695   lb(AT, card_addr, 0);
  1696   daddiu(AT, AT, -1 * (int)(int)CardTableModRefBS::dirty_card_val());
  1697   beq(AT, R0, done);
  1698   nop();
  1701   // storing a region crossing, non-NULL oop, card is clean.
  1702   // dirty card and log.
  1703   move(AT, (int)CardTableModRefBS::dirty_card_val());
  1704   sb(AT, card_addr, 0);
  1706   lw(AT, queue_index);
  1707   beq(AT, R0, runtime);
  1708   nop();
  1709   daddiu(AT, AT, -1 * wordSize);
  1710   sw(AT, queue_index);
  1711   ld(tmp2, buffer);
  1712 #ifdef _LP64
  1713   ld(AT, queue_index);
  1714   daddu(tmp2, tmp2, AT);
  1715   sd(card_addr, tmp2, 0);
  1716 #else
  1717   lw(AT, queue_index);
  1718   addu32(tmp2, tmp2, AT);
  1719   sw(card_addr, tmp2, 0);
  1720 #endif
  1721   beq(R0, R0, done);
  1722   nop();
  1724   bind(runtime);
  1725   // save the live input values
  1726   push(store_addr);
  1727   push(new_val);
  1728 #ifdef _LP64
  1729   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, TREG);
  1730 #else
  1731   push(thread);
  1732   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
  1733   pop(thread);
  1734 #endif
  1735   pop(new_val);
  1736   pop(store_addr);
  1738   bind(done);
  1741 #endif // INCLUDE_ALL_GCS
  1742 //////////////////////////////////////////////////////////////////////////////////
  1745 void MacroAssembler::store_check(Register obj) {
  1746   // Does a store check for the oop in register obj. The content of
  1747   // register obj is destroyed afterwards.
  1748   store_check_part_1(obj);
  1749   store_check_part_2(obj);
  1752 void MacroAssembler::store_check(Register obj, Address dst) {
  1753   store_check(obj);
  1757 // split the store check operation so that other instructions can be scheduled inbetween
  1758 void MacroAssembler::store_check_part_1(Register obj) {
  1759   BarrierSet* bs = Universe::heap()->barrier_set();
  1760   assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
  1761 #ifdef _LP64
  1762   dsrl(obj, obj, CardTableModRefBS::card_shift);
  1763 #else
  1764   shr(obj, CardTableModRefBS::card_shift);
  1765 #endif
  1768 void MacroAssembler::store_check_part_2(Register obj) {
  1769   BarrierSet* bs = Universe::heap()->barrier_set();
  1770   assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
  1771   CardTableModRefBS* ct = (CardTableModRefBS*)bs;
  1772   assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
  1774   set64(AT, (long)ct->byte_map_base);
  1775 #ifdef _LP64
  1776   dadd(AT, AT, obj);
  1777 #else
  1778   add(AT, AT, obj);
  1779 #endif
  1780   if (UseConcMarkSweepGC) sync();
  1781   sb(R0, AT, 0);
  1784 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
  1785 void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
  1786                                    Register t1, Register t2, Label& slow_case) {
  1787   assert_different_registers(obj, var_size_in_bytes, t1, t2, AT);
  1789   Register end = t2;
  1790 #ifndef OPT_THREAD
  1791   Register thread = t1;
  1792   get_thread(thread);
  1793 #else
  1794   Register thread = TREG;
  1795 #endif
  1796   verify_tlab(t1, t2);//blows t1&t2
  1798   ld_ptr(obj, thread, in_bytes(JavaThread::tlab_top_offset()));
  1800   if (var_size_in_bytes == NOREG) {
  1801     // i dont think we need move con_size_in_bytes to a register first.
  1802     // by yjl 8/17/2005
  1803     assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first");
  1804     addi(end, obj, con_size_in_bytes);
  1805   } else {
  1806     add(end, obj, var_size_in_bytes);
  1809   ld_ptr(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
  1810   sltu(AT, AT, end);
  1811   bne_far(AT, R0, slow_case);
  1812   delayed()->nop();
  1815   // update the tlab top pointer
  1816   st_ptr(end, thread, in_bytes(JavaThread::tlab_top_offset()));
  1818   // recover var_size_in_bytes if necessary
  1819   /*if (var_size_in_bytes == end) {
  1820     sub(var_size_in_bytes, end, obj);
  1821     }*/
  1823   verify_tlab(t1, t2);
  1826 // Defines obj, preserves var_size_in_bytes
  1827 void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
  1828                                    Register t1, Register t2, Label& slow_case) {
  1829   assert_different_registers(obj, var_size_in_bytes, t1, AT);
  1830   if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq
  1831     // No allocation in the shared eden.
  1832     b_far(slow_case);
  1833     delayed()->nop();
  1834   } else {
  1836 #ifndef _LP64
  1837     Address heap_top(t1, Assembler::split_low((intptr_t)Universe::heap()->top_addr()));
  1838     lui(t1, split_high((intptr_t)Universe::heap()->top_addr()));
  1839 #else
  1840     Address heap_top(t1);
  1841     li(t1, (long)Universe::heap()->top_addr());
  1842 #endif
  1843     ld_ptr(obj, heap_top);
  1845     Register end = t2;
  1846     Label retry;
  1848     bind(retry);
  1849     if (var_size_in_bytes == NOREG) {
  1850     // i dont think we need move con_size_in_bytes to a register first.
  1851       assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first");
  1852       addi(end, obj, con_size_in_bytes);
  1853     } else {
  1854       add(end, obj, var_size_in_bytes);
  1856     // if end < obj then we wrapped around => object too long => slow case
  1857     sltu(AT, end, obj);
  1858     bne_far(AT, R0, slow_case);
  1859     delayed()->nop();
  1861     li(AT, (long)Universe::heap()->end_addr());
  1862     sltu(AT, AT, end);
  1863     bne_far(AT, R0, slow_case);
  1864     delayed()->nop();
  1865     // Compare obj with the top addr, and if still equal, store the new top addr in
  1866     // end at the address of the top addr pointer. Sets ZF if was equal, and clears
  1867     // it otherwise. Use lock prefix for atomicity on MPs.
  1868     //if (os::is_MP()) {
  1869     //  sync();
  1870     //}
  1872     // if someone beat us on the allocation, try again, otherwise continue
  1873     cmpxchg(end, heap_top, obj);
  1874     beq_far(AT, R0, retry);    //by yyq
  1875     delayed()->nop();
  1880 // C2 doesn't invoke this one.
  1881 void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) {
  1882   Register top = T0;
  1883   Register t1  = T1;
  1884 /* Jin: tlab_refill() is called in
  1886      [c1_Runtime1_mips.cpp] Runtime1::generate_code_for(new_type_array_id);
  1888   In generate_code_for(), T2 has been assigned as a register(length), which is used
  1889  after calling tlab_refill();
  1890   Therefore, tlab_refill() should not use T2.
  1892  Source:
  1894 Exception in thread "main" java.lang.ArrayIndexOutOfBoundsException
  1895         at java.lang.System.arraycopy(Native Method)
  1896         at java.util.Arrays.copyOf(Arrays.java:2799)  <-- alloc_array
  1897         at sun.misc.Resource.getBytes(Resource.java:117)
  1898         at java.net.URLClassLoader.defineClass(URLClassLoader.java:273)
  1899         at java.net.URLClassLoader.findClass(URLClassLoader.java:205)
  1900         at java.lang.ClassLoader.loadClass(ClassLoader.java:321)
  1901  */
  1902   Register t2  = T9;
  1903   Register t3  = T3;
  1904   Register thread_reg = T8;
  1905   Label do_refill, discard_tlab;
  1906   if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq
  1907     // No allocation in the shared eden.
  1908     b(slow_case);
  1909     delayed()->nop();
  1912   get_thread(thread_reg);
  1914   ld_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
  1915   ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
  1917   // calculate amount of free space
  1918   sub(t1, t1, top);
  1919   shr(t1, LogHeapWordSize);
  1921   // Retain tlab and allocate object in shared space if
  1922   // the amount free in the tlab is too large to discard.
  1923   ld_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
  1924   slt(AT, t2, t1);
  1925   beq(AT, R0, discard_tlab);
  1926   delayed()->nop();
  1928   // Retain
  1930 #ifndef _LP64
  1931   move(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
  1932 #else
  1933   li(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
  1934 #endif
  1935   add(t2, t2, AT);
  1936   st_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
  1938   if (TLABStats) {
  1939     // increment number of slow_allocations
  1940     lw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
  1941     addiu(AT, AT, 1);
  1942     sw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
  1944   b(try_eden);
  1945   delayed()->nop();
  1947   bind(discard_tlab);
  1948   if (TLABStats) {
  1949     // increment number of refills
  1950     lw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
  1951     addi(AT, AT, 1);
  1952     sw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
  1953     // accumulate wastage -- t1 is amount free in tlab
  1954     lw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
  1955     add(AT, AT, t1);
  1956     sw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
  1959   // if tlab is currently allocated (top or end != null) then
  1960   // fill [top, end + alignment_reserve) with array object
  1961   beq(top, R0, do_refill);
  1962   delayed()->nop();
  1964   // set up the mark word
  1965   li(AT, (long)markOopDesc::prototype()->copy_set_hash(0x2));
  1966   st_ptr(AT, top, oopDesc::mark_offset_in_bytes());
  1968   // set the length to the remaining space
  1969   addi(t1, t1, - typeArrayOopDesc::header_size(T_INT));
  1970   addi(t1, t1, ThreadLocalAllocBuffer::alignment_reserve());
  1971   shl(t1, log2_intptr(HeapWordSize/sizeof(jint)));
  1972   sw(t1, top, arrayOopDesc::length_offset_in_bytes());
  1974   // set klass to intArrayKlass
  1975 #ifndef _LP64
  1976   lui(AT, split_high((intptr_t)Universe::intArrayKlassObj_addr()));
  1977   lw(t1, AT, split_low((intptr_t)Universe::intArrayKlassObj_addr()));
  1978 #else
  1979   li(AT, (intptr_t)Universe::intArrayKlassObj_addr());
  1980   ld_ptr(t1, AT, 0);
  1981 #endif
  1982   //st_ptr(t1, top, oopDesc::klass_offset_in_bytes());
  1983   store_klass(top, t1);
  1985   // refill the tlab with an eden allocation
  1986   bind(do_refill);
  1987   ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
  1988   shl(t1, LogHeapWordSize);
  1989   // add object_size ??
  1990   eden_allocate(top, t1, 0, t2, t3, slow_case);
  1992   // Check that t1 was preserved in eden_allocate.
  1993 #ifdef ASSERT
  1994   if (UseTLAB) {
  1995     Label ok;
  1996     assert_different_registers(thread_reg, t1);
  1997     ld_ptr(AT, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
  1998     shl(AT, LogHeapWordSize);
  1999     beq(AT, t1, ok);
  2000     delayed()->nop();
  2001     stop("assert(t1 != tlab size)");
  2002     should_not_reach_here();
  2004     bind(ok);
  2006 #endif
  2007   st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_start_offset()));
  2008   st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
  2009   add(top, top, t1);
  2010   addi(top, top, - ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
  2011   st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
  2012   verify_tlab(t1, t2);
  2013   b(retry);
  2014   delayed()->nop();
  2017 static const double     pi_4 =  0.7853981633974483;
  2019 // the x86 version is to clumsy, i dont think we need that fuss. maybe i'm wrong, FIXME
  2020 // must get argument(a double) in F12/F13
  2021 //void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) {
  2022 //We need to preseve the register which maybe modified during the Call @Jerome
  2023 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
  2024 //save all modified register here
  2025 //  if (preserve_cpu_regs) {
  2026 //  }
  2027 //FIXME, in the disassembly of tirgfunc, only used V0,V1,T9, SP,RA,so we ony save V0,V1,T9
  2028   pushad();
  2029 //we should preserve the stack space before we call
  2030   addi(SP, SP, -wordSize * 2);
  2031         switch (trig){
  2032     case 's' :
  2033                   call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type );
  2034       delayed()->nop();
  2035       break;
  2036     case 'c':
  2037       call( CAST_FROM_FN_PTR(address, SharedRuntime::dcos), relocInfo::runtime_call_type );
  2038       delayed()->nop();
  2039       break;
  2040     case 't':
  2041       call( CAST_FROM_FN_PTR(address, SharedRuntime::dtan), relocInfo::runtime_call_type );
  2042       delayed()->nop();
  2043       break;
  2044     default:assert (false, "bad intrinsic");
  2045     break;
  2049   addi(SP, SP, wordSize * 2);
  2050   popad();
  2051 //  if (preserve_cpu_regs) {
  2052 //  }
  2055 #ifdef _LP64
  2056 void MacroAssembler::li(Register rd, long imm) {
  2057   if (imm <= max_jint && imm >= min_jint) {
  2058     li32(rd, (int)imm);
  2059   } else if (julong(imm) <= 0xFFFFFFFF) {
  2060     assert_not_delayed();
  2061     // lui sign-extends, so we can't use that.
  2062     ori(rd, R0, julong(imm) >> 16);
  2063     dsll(rd, rd, 16);
  2064     ori(rd, rd, split_low(imm));
  2065   //aoqi_test
  2066   //} else if ((imm > 0) && ((imm >> 48) == 0)) {
  2067   } else if ((imm > 0) && is_simm16(imm >> 32)) {
  2068     /* A 48-bit address */
  2069     li48(rd, imm);
  2070   } else {
  2071     li64(rd, imm);
  2074 #else
  2075 void MacroAssembler::li(Register rd, long imm) {
  2076   li32(rd, (int)imm);
  2078 #endif
  2080 void MacroAssembler::li32(Register reg, int imm) {
  2081   if (is_simm16(imm)) {
  2082     /* Jin: for imm < 0, we should use addi instead of addiu.
  2084      *  java.lang.StringCoding$StringDecoder.decode(jobject, jint, jint)
  2086      *  78 move [int:-1|I] [a0|I]
  2087      *    : daddi a0, zero, 0xffffffff  (correct)
  2088      *    : daddiu a0, zero, 0xffffffff (incorrect)
  2089      */
  2090     if (imm >= 0)
  2091       addiu(reg, R0, imm);
  2092     else
  2093       addi(reg, R0, imm);
  2094   } else {
  2095     lui(reg, split_low(imm >> 16));
  2096     if (split_low(imm))
  2097       ori(reg, reg, split_low(imm));
  2101 #ifdef _LP64
  2102 void MacroAssembler::set64(Register d, jlong value) {
  2103   assert_not_delayed();
  2105   int hi = (int)(value >> 32);
  2106   int lo = (int)(value & ~0);
  2108   if (value == lo) {  // 32-bit integer
  2109     if (is_simm16(value)) {
  2110       daddiu(d, R0, value);
  2111     } else {
  2112       lui(d, split_low(value >> 16));
  2113       if (split_low(value)) {
  2114         ori(d, d, split_low(value));
  2117   } else if (hi == 0) {  // hardware zero-extends to upper 32
  2118       ori(d, R0, julong(value) >> 16);
  2119       dsll(d, d, 16);
  2120       if (split_low(value)) {
  2121         ori(d, d, split_low(value));
  2123   } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
  2124     // 4 insts
  2125     li48(d, value);
  2126   } else {  // li64
  2127     // 6 insts
  2128     li64(d, value);
  2133 int MacroAssembler::insts_for_set64(jlong value) {
  2134   int hi = (int)(value >> 32);
  2135   int lo = (int)(value & ~0);
  2137   int count = 0;
  2139   if (value == lo) {  // 32-bit integer
  2140     if (is_simm16(value)) {
  2141       //daddiu(d, R0, value);
  2142       count++;
  2143     } else {
  2144       //lui(d, split_low(value >> 16));
  2145       count++;
  2146       if (split_low(value)) {
  2147         //ori(d, d, split_low(value));
  2148         count++;
  2151   } else if (hi == 0) {  // hardware zero-extends to upper 32
  2152       //ori(d, R0, julong(value) >> 16);
  2153       //dsll(d, d, 16);
  2154       count += 2;
  2155       if (split_low(value)) {
  2156         //ori(d, d, split_low(value));
  2157         count++;
  2159   } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
  2160     // 4 insts
  2161     //li48(d, value);
  2162     count += 4;
  2163   } else {  // li64
  2164     // 6 insts
  2165     //li64(d, value);
  2166     count += 6;
  2169   return count;
  2172 void MacroAssembler::patchable_set48(Register d, jlong value) {
  2173   assert_not_delayed();
  2175   int hi = (int)(value >> 32);
  2176   int lo = (int)(value & ~0);
  2178   int count = 0;
  2180   if (value == lo) {  // 32-bit integer
  2181     if (is_simm16(value)) {
  2182       daddiu(d, R0, value);
  2183       count += 1;
  2184     } else {
  2185       lui(d, split_low(value >> 16));
  2186       count += 1;
  2187       if (split_low(value)) {
  2188         ori(d, d, split_low(value));
  2189         count += 1;
  2192   } else if (hi == 0) {  // hardware zero-extends to upper 32
  2193       ori(d, R0, julong(value) >> 16);
  2194       dsll(d, d, 16);
  2195       count += 2;
  2196       if (split_low(value)) {
  2197         ori(d, d, split_low(value));
  2198         count += 1;
  2200   } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
  2201     // 4 insts
  2202     li48(d, value);
  2203     count += 4;
  2204   } else {  // li64
  2205     tty->print_cr("value = 0x%x", value);
  2206     guarantee(false, "Not supported yet !");
  2209   for (count; count < 4; count++) {
  2210     nop();
  2214 void MacroAssembler::patchable_set32(Register d, jlong value) {
  2215   assert_not_delayed();
  2217   int hi = (int)(value >> 32);
  2218   int lo = (int)(value & ~0);
  2220   int count = 0;
  2222   if (value == lo) {  // 32-bit integer
  2223     if (is_simm16(value)) {
  2224       daddiu(d, R0, value);
  2225       count += 1;
  2226     } else {
  2227       lui(d, split_low(value >> 16));
  2228       count += 1;
  2229       if (split_low(value)) {
  2230         ori(d, d, split_low(value));
  2231         count += 1;
  2234   } else if (hi == 0) {  // hardware zero-extends to upper 32
  2235       ori(d, R0, julong(value) >> 16);
  2236       dsll(d, d, 16);
  2237       count += 2;
  2238       if (split_low(value)) {
  2239         ori(d, d, split_low(value));
  2240         count += 1;
  2242   } else {
  2243     tty->print_cr("value = 0x%x", value);
  2244     guarantee(false, "Not supported yet !");
  2247   for (count; count < 3; count++) {
  2248     nop();
  2252 void MacroAssembler::patchable_call32(Register d, jlong value) {
  2253   assert_not_delayed();
  2255   int hi = (int)(value >> 32);
  2256   int lo = (int)(value & ~0);
  2258   int count = 0;
  2260   if (value == lo) {  // 32-bit integer
  2261     if (is_simm16(value)) {
  2262       daddiu(d, R0, value);
  2263       count += 1;
  2264     } else {
  2265       lui(d, split_low(value >> 16));
  2266       count += 1;
  2267       if (split_low(value)) {
  2268         ori(d, d, split_low(value));
  2269         count += 1;
  2272   } else {
  2273     tty->print_cr("value = 0x%x", value);
  2274     guarantee(false, "Not supported yet !");
  2277   for (count; count < 2; count++) {
  2278     nop();
  2282 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
  2283   assert(UseCompressedClassPointers, "should only be used for compressed header");
  2284   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
  2286   int klass_index = oop_recorder()->find_index(k);
  2287   RelocationHolder rspec = metadata_Relocation::spec(klass_index);
  2288   long narrowKlass = (long)Klass::encode_klass(k);
  2290   relocate(rspec, Assembler::narrow_oop_operand);
  2291   patchable_set48(dst, narrowKlass);
  2295 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
  2296   assert(UseCompressedOops, "should only be used for compressed header");
  2297   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
  2299   int oop_index = oop_recorder()->find_index(obj);
  2300   RelocationHolder rspec = oop_Relocation::spec(oop_index);
  2302   relocate(rspec, Assembler::narrow_oop_operand);
  2303   patchable_set48(dst, oop_index);
  2306 void MacroAssembler::li64(Register rd, long imm) {
  2307   assert_not_delayed();
  2308   lui(rd, imm >> 48);
  2309   ori(rd, rd, split_low(imm >> 32));
  2310   dsll(rd, rd, 16);
  2311   ori(rd, rd, split_low(imm >> 16));
  2312   dsll(rd, rd, 16);
  2313   ori(rd, rd, split_low(imm));
  2316 void MacroAssembler::li48(Register rd, long imm) {
  2317   assert_not_delayed();
  2318   assert(is_simm16(imm >> 32), "Not a 48-bit address");
  2319   lui(rd, imm >> 32);
  2320   ori(rd, rd, split_low(imm >> 16));
  2321   dsll(rd, rd, 16);
  2322   ori(rd, rd, split_low(imm));
  2324 #endif
  2325 // NOTE: i dont push eax as i486.
  2326 // the x86 save eax for it use eax as the jump register
  2327 void MacroAssembler::verify_oop(Register reg, const char* s) {
  2328   /*
  2329      if (!VerifyOops) return;
  2331   // Pass register number to verify_oop_subroutine
  2332   char* b = new char[strlen(s) + 50];
  2333   sprintf(b, "verify_oop: %s: %s", reg->name(), s);
  2334   push(rax);                          // save rax,
  2335   push(reg);                          // pass register argument
  2336   ExternalAddress buffer((address) b);
  2337   // avoid using pushptr, as it modifies scratch registers
  2338   // and our contract is not to modify anything
  2339   movptr(rax, buffer.addr());
  2340   push(rax);
  2341   // call indirectly to solve generation ordering problem
  2342   movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
  2343   call(rax);
  2344    */
  2345   if (!VerifyOops) return;
  2346   const char * b = NULL;
  2347   stringStream ss;
  2348   ss.print("verify_oop: %s: %s", reg->name(), s);
  2349   b = code_string(ss.as_string());
  2350 #ifdef _LP64
  2351   pushad();
  2352   move(A1, reg);
  2353   li(A0, (long)b);
  2354   li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
  2355   ld(T9, AT, 0);
  2356   jalr(T9);
  2357   delayed()->nop();
  2358   popad();
  2359 #else
  2360   // Pass register number to verify_oop_subroutine
  2361   sw(T0, SP, - wordSize);
  2362   sw(T1, SP, - 2*wordSize);
  2363   sw(RA, SP, - 3*wordSize);
  2364   sw(A0, SP ,- 4*wordSize);
  2365   sw(A1, SP ,- 5*wordSize);
  2366   sw(AT, SP ,- 6*wordSize);
  2367   sw(T9, SP ,- 7*wordSize);
  2368   addiu(SP, SP, - 7 * wordSize);
  2369   move(A1, reg);
  2370   li(A0, (long)b);
  2371   // call indirectly to solve generation ordering problem
  2372   li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
  2373   lw(T9, AT, 0);
  2374   jalr(T9);
  2375   delayed()->nop();
  2376   lw(T0, SP, 6* wordSize);
  2377   lw(T1, SP, 5* wordSize);
  2378   lw(RA, SP, 4* wordSize);
  2379   lw(A0, SP, 3* wordSize);
  2380   lw(A1, SP, 2* wordSize);
  2381   lw(AT, SP, 1* wordSize);
  2382   lw(T9, SP, 0* wordSize);
  2383   addiu(SP, SP, 7 * wordSize);
  2384 #endif
  2388 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
  2389   if (!VerifyOops) {
  2390     nop();
  2391     return;
  2393   // Pass register number to verify_oop_subroutine
  2394   const char * b = NULL;
  2395   stringStream ss;
  2396   ss.print("verify_oop_addr: %s",  s);
  2397   b = code_string(ss.as_string());
  2399   st_ptr(T0, SP, - wordSize);
  2400   st_ptr(T1, SP, - 2*wordSize);
  2401   st_ptr(RA, SP, - 3*wordSize);
  2402   st_ptr(A0, SP, - 4*wordSize);
  2403   st_ptr(A1, SP, - 5*wordSize);
  2404   st_ptr(AT, SP, - 6*wordSize);
  2405   st_ptr(T9, SP, - 7*wordSize);
  2406   ld_ptr(A1, addr);   // addr may use SP, so load from it before change SP
  2407   addiu(SP, SP, - 7 * wordSize);
  2409   li(A0, (long)b);
  2410   // call indirectly to solve generation ordering problem
  2411   li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
  2412   ld_ptr(T9, AT, 0);
  2413   jalr(T9);
  2414   delayed()->nop();
  2415   ld_ptr(T0, SP, 6* wordSize);
  2416   ld_ptr(T1, SP, 5* wordSize);
  2417   ld_ptr(RA, SP, 4* wordSize);
  2418   ld_ptr(A0, SP, 3* wordSize);
  2419   ld_ptr(A1, SP, 2* wordSize);
  2420   ld_ptr(AT, SP, 1* wordSize);
  2421   ld_ptr(T9, SP, 0* wordSize);
  2422   addiu(SP, SP, 7 * wordSize);
  2425 // used registers :  T0, T1
  2426 void MacroAssembler::verify_oop_subroutine() {
  2427   // RA: ra
  2428   // A0: char* error message
  2429   // A1: oop   object to verify
  2431   Label exit, error;
  2432   // increment counter
  2433   li(T0, (long)StubRoutines::verify_oop_count_addr());
  2434   lw(AT, T0, 0);
  2435 #ifdef _LP64
  2436   daddi(AT, AT, 1);
  2437 #else
  2438   addi(AT, AT, 1);
  2439 #endif
  2440   sw(AT, T0, 0);
  2442   // make sure object is 'reasonable'
  2443   beq(A1, R0, exit);         // if obj is NULL it is ok
  2444   delayed()->nop();
  2446   // Check if the oop is in the right area of memory
  2447   //const int oop_mask = Universe::verify_oop_mask();
  2448   //const int oop_bits = Universe::verify_oop_bits();
  2449   const uintptr_t oop_mask = Universe::verify_oop_mask();
  2450   const uintptr_t oop_bits = Universe::verify_oop_bits();
  2451   li(AT, oop_mask);
  2452   andr(T0, A1, AT);
  2453   li(AT, oop_bits);
  2454   bne(T0, AT, error);
  2455   delayed()->nop();
  2457   // make sure klass is 'reasonable'
  2458   //add for compressedoops
  2459   reinit_heapbase();
  2460   //add for compressedoops
  2461   load_klass(T0, A1);
  2462   beq(T0, R0, error);                        // if klass is NULL it is broken
  2463   delayed()->nop();
  2464   #if 0
  2465   //FIXME:wuhui.
  2466   // Check if the klass is in the right area of memory
  2467   //const int klass_mask = Universe::verify_klass_mask();
  2468   //const int klass_bits = Universe::verify_klass_bits();
  2469   const uintptr_t klass_mask = Universe::verify_klass_mask();
  2470   const uintptr_t klass_bits = Universe::verify_klass_bits();
  2472   li(AT, klass_mask);
  2473   andr(T1, T0, AT);
  2474   li(AT, klass_bits);
  2475   bne(T1, AT, error);
  2476   delayed()->nop();
  2477   // make sure klass' klass is 'reasonable'
  2478   //add for compressedoops
  2479   load_klass(T0, T0);
  2480   beq(T0, R0, error);  // if klass' klass is NULL it is broken
  2481   delayed()->nop();
  2483   li(AT, klass_mask);
  2484   andr(T1, T0, AT);
  2485   li(AT, klass_bits);
  2486   bne(T1, AT, error);
  2487   delayed()->nop();     // if klass not in right area of memory it is broken too.
  2488 #endif
  2489   // return if everything seems ok
  2490   bind(exit);
  2492   jr(RA);
  2493   delayed()->nop();
  2495   // handle errors
  2496   bind(error);
  2497   pushad();
  2498 #ifndef _LP64
  2499   addi(SP, SP, (-1) * wordSize);
  2500 #endif
  2501   call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
  2502   delayed()->nop();
  2503 #ifndef _LP64
  2504   addiu(SP, SP, 1 * wordSize);
  2505 #endif
  2506   popad();
  2507   jr(RA);
  2508   delayed()->nop();
  2511 void MacroAssembler::verify_tlab(Register t1, Register t2) {
  2512 #ifdef ASSERT
  2513   assert_different_registers(t1, t2, AT);
  2514   if (UseTLAB && VerifyOops) {
  2515     Label next, ok;
  2517     get_thread(t1);
  2519     ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset()));
  2520     ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset()));
  2521     sltu(AT, t2, AT);
  2522     beq(AT, R0, next);
  2523     delayed()->nop();
  2525     stop("assert(top >= start)");
  2527     bind(next);
  2528     ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset()));
  2529     sltu(AT, AT, t2);
  2530     beq(AT, R0, ok);
  2531     delayed()->nop();
  2533     stop("assert(top <= end)");
  2535     bind(ok);
  2538 #endif
  2540  RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
  2541                                                        Register tmp,
  2542                                                        int offset) {
  2543    intptr_t value = *delayed_value_addr;
  2544    if (value != 0)
  2545    return RegisterOrConstant(value + offset);
  2546    AddressLiteral a(delayed_value_addr);
  2547    // load indirectly to solve generation ordering problem
  2548    //movptr(tmp, ExternalAddress((address) delayed_value_addr));
  2549    //ld(tmp, a);
  2550    if (offset != 0)
  2551      daddi(tmp,tmp, offset);
  2553    return RegisterOrConstant(tmp);
  2556 void MacroAssembler::hswap(Register reg) {
  2557   //short
  2558   //andi(reg, reg, 0xffff);
  2559   srl(AT, reg, 8);
  2560   sll(reg, reg, 24);
  2561   sra(reg, reg, 16);
  2562   orr(reg, reg, AT);
  2565 void MacroAssembler::huswap(Register reg) {
  2566 #ifdef _LP64
  2567   dsrl(AT, reg, 8);
  2568   dsll(reg, reg, 24);
  2569   dsrl(reg, reg, 16);
  2570   orr(reg, reg, AT);
  2571   andi(reg, reg, 0xffff);
  2572 #else
  2573   //andi(reg, reg, 0xffff);
  2574   srl(AT, reg, 8);
  2575   sll(reg, reg, 24);
  2576   srl(reg, reg, 16);
  2577   orr(reg, reg, AT);
  2578 #endif
  2581 // something funny to do this will only one more register AT
  2582 // 32 bits
  2583 void MacroAssembler::swap(Register reg) {
  2584   srl(AT, reg, 8);
  2585   sll(reg, reg, 24);
  2586   orr(reg, reg, AT);
  2587   //reg : 4 1 2 3
  2588   srl(AT, AT, 16);
  2589   xorr(AT, AT, reg);
  2590   andi(AT, AT, 0xff);
  2591   //AT : 0 0 0 1^3);
  2592   xorr(reg, reg, AT);
  2593   //reg : 4 1 2 1
  2594   sll(AT, AT, 16);
  2595   xorr(reg, reg, AT);
  2596   //reg : 4 3 2 1
  2599 #ifdef _LP64
  2601 /* do 32-bit CAS using MIPS64 lld/scd
  2603   Jin: cas_int should only compare 32-bits of the memory value.
  2604        However, lld/scd will do 64-bit operation, which violates the intention of cas_int.
  2605        To simulate a 32-bit atomic operation, the value loaded with LLD should be split into
  2606        tow halves, and only the low-32 bits is compared. If equals, the low-32 bits of newval,
  2607        plus the high-32 bits or memory value, are stored togethor with SCD.
  2609 Example:
  2611       double d = 3.1415926;
  2612       System.err.println("hello" + d);
  2614   sun.misc.FloatingDecimal$1.<init>()
  2616    `- java.util.concurrent.atomic.AtomicInteger::compareAndSet()
  2618   38 cas_int [a7a7|J] [a0|I] [a6|I]
  2619 // a0: 0xffffffffe8ea9f63 pc: 0x55647f3354
  2620 // a6: 0x4ab325aa
  2622 again:
  2623    0x00000055647f3c5c: lld at, 0x0(a7)                          ; 64-bit load, "0xe8ea9f63"
  2625    0x00000055647f3c60: sll t9, at, 0                            ; t9: low-32 bits (sign extended)
  2626    0x00000055647f3c64: dsrl32 t8, at, 0                         ; t8: high-32 bits
  2627    0x00000055647f3c68: dsll32 t8, t8, 0
  2628    0x00000055647f3c6c: bne t9, a0, 0x00000055647f3c9c           ; goto nequal
  2629    0x00000055647f3c70: sll zero, zero, 0
  2631    0x00000055647f3c74: ori v1, zero, 0xffffffff                 ; v1: low-32 bits of newval (sign unextended)
  2632    0x00000055647f3c78: dsll v1, v1, 16                          ; v1 = a6 & 0xFFFFFFFF;
  2633    0x00000055647f3c7c: ori v1, v1, 0xffffffff
  2634    0x00000055647f3c80: and v1, a6, v1
  2635    0x00000055647f3c84: or at, t8, v1
  2636    0x00000055647f3c88: scd at, 0x0(a7)
  2637    0x00000055647f3c8c: beq at, zero, 0x00000055647f3c5c         ; goto again
  2638    0x00000055647f3c90: sll zero, zero, 0
  2639    0x00000055647f3c94: beq zero, zero, 0x00000055647f45ac       ; goto done
  2640    0x00000055647f3c98: sll zero, zero, 0
  2641 nequal:
  2642    0x00000055647f45a4: dadd a0, t9, zero
  2643    0x00000055647f45a8: dadd at, zero, zero
  2644 done:
  2645 */
  2647 void MacroAssembler::cmpxchg32(Register x_reg, Address dest, Register c_reg) {
  2648   /* 2012/11/11 Jin: MIPS64 can use ll/sc for 32-bit atomic memory access */
  2649   Label done, again, nequal;
  2651   bind(again);
  2653   if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
  2654   ll(AT, dest);
  2655   bne(AT, c_reg, nequal);
  2656   delayed()->nop();
  2658   move(AT, x_reg);
  2659   sc(AT, dest);
  2660   beq(AT, R0, again);
  2661   delayed()->nop();
  2662   b(done);
  2663   delayed()->nop();
  2665   // not xchged
  2666   bind(nequal);
  2667   sync();
  2668   move(c_reg, AT);
  2669   move(AT, R0);
  2671   bind(done);
  2673 #endif  // cmpxchg32
  2675 void MacroAssembler::cmpxchg(Register x_reg, Address dest, Register c_reg) {
  2676   Label done, again, nequal;
  2678   bind(again);
  2679   if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
  2680 #ifdef _LP64
  2681   lld(AT, dest);
  2682 #else
  2683   ll(AT, dest);
  2684 #endif
  2685   bne(AT, c_reg, nequal);
  2686   delayed()->nop();
  2688   move(AT, x_reg);
  2689 #ifdef _LP64
  2690   scd(AT, dest);
  2691 #else
  2692   sc(AT, dest);
  2693 #endif
  2694   beq(AT, R0, again);
  2695   delayed()->nop();
  2696   b(done);
  2697   delayed()->nop();
  2699   // not xchged
  2700   bind(nequal);
  2701   sync();
  2702   move(c_reg, AT);
  2703   move(AT, R0);
  2705   bind(done);
  2708 void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) {
  2709   Label done, again, nequal;
  2711   Register x_reg = x_regLo;
  2712   dsll32(x_regHi, x_regHi, 0);
  2713   dsll32(x_regLo, x_regLo, 0);
  2714   dsrl32(x_regLo, x_regLo, 0);
  2715   orr(x_reg, x_regLo, x_regHi);
  2717   Register c_reg = c_regLo;
  2718   dsll32(c_regHi, c_regHi, 0);
  2719   dsll32(c_regLo, c_regLo, 0);
  2720   dsrl32(c_regLo, c_regLo, 0);
  2721   orr(c_reg, c_regLo, c_regHi);
  2723   bind(again);
  2725   if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
  2726   lld(AT, dest);
  2727   bne(AT, c_reg, nequal);
  2728   delayed()->nop();
  2730   //move(AT, x_reg);
  2731   dadd(AT, x_reg, R0);
  2732   scd(AT, dest);
  2733   beq(AT, R0, again);
  2734   delayed()->nop();
  2735   b(done);
  2736   delayed()->nop();
  2738   // not xchged
  2739   bind(nequal);
  2740   sync();
  2741   //move(c_reg, AT);
  2742   //move(AT, R0);
  2743   dadd(c_reg, AT, R0);
  2744   dadd(AT, R0, R0);
  2745   bind(done);
  2748 // be sure the three register is different
  2749 void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
  2750   assert_different_registers(tmp, fs, ft);
  2751   div_s(tmp, fs, ft);
  2752   trunc_l_s(tmp, tmp);
  2753   cvt_s_l(tmp, tmp);
  2754   mul_s(tmp, tmp, ft);
  2755   sub_s(fd, fs, tmp);
  2758 // be sure the three register is different
  2759 void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
  2760   assert_different_registers(tmp, fs, ft);
  2761   div_d(tmp, fs, ft);
  2762   trunc_l_d(tmp, tmp);
  2763   cvt_d_l(tmp, tmp);
  2764   mul_d(tmp, tmp, ft);
  2765   sub_d(fd, fs, tmp);
  2768 // Fast_Lock and Fast_Unlock used by C2
  2770 // Because the transitions from emitted code to the runtime
  2771 // monitorenter/exit helper stubs are so slow it's critical that
  2772 // we inline both the stack-locking fast-path and the inflated fast path.
  2773 //
  2774 // See also: cmpFastLock and cmpFastUnlock.
  2775 //
  2776 // What follows is a specialized inline transliteration of the code
  2777 // in slow_enter() and slow_exit().  If we're concerned about I$ bloat
  2778 // another option would be to emit TrySlowEnter and TrySlowExit methods
  2779 // at startup-time.  These methods would accept arguments as
  2780 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
  2781 // indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
  2782 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
  2783 // In practice, however, the # of lock sites is bounded and is usually small.
  2784 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
  2785 // if the processor uses simple bimodal branch predictors keyed by EIP
  2786 // Since the helper routines would be called from multiple synchronization
  2787 // sites.
  2788 //
  2789 // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
  2790 // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
  2791 // to those specialized methods.  That'd give us a mostly platform-independent
  2792 // implementation that the JITs could optimize and inline at their pleasure.
  2793 // Done correctly, the only time we'd need to cross to native could would be
  2794 // to park() or unpark() threads.  We'd also need a few more unsafe operators
  2795 // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
  2796 // (b) explicit barriers or fence operations.
  2797 //
  2798 // TODO:
  2799 //
  2800 // *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
  2801 //    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
  2802 //    Given TLAB allocation, Self is usually manifested in a register, so passing it into
  2803 //    the lock operators would typically be faster than reifying Self.
  2804 //
  2805 // *  Ideally I'd define the primitives as:
  2806 //       fast_lock   (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
  2807 //       fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
  2808 //    Unfortunately ADLC bugs prevent us from expressing the ideal form.
  2809 //    Instead, we're stuck with a rather awkward and brittle register assignments below.
  2810 //    Furthermore the register assignments are overconstrained, possibly resulting in
  2811 //    sub-optimal code near the synchronization site.
  2812 //
  2813 // *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
  2814 //    Alternately, use a better sp-proximity test.
  2815 //
  2816 // *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
  2817 //    Either one is sufficient to uniquely identify a thread.
  2818 //    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
  2819 //
  2820 // *  Intrinsify notify() and notifyAll() for the common cases where the
  2821 //    object is locked by the calling thread but the waitlist is empty.
  2822 //    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
  2823 //
  2824 // *  use jccb and jmpb instead of jcc and jmp to improve code density.
  2825 //    But beware of excessive branch density on AMD Opterons.
  2826 //
  2827 // *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
  2828 //    or failure of the fast-path.  If the fast-path fails then we pass
  2829 //    control to the slow-path, typically in C.  In Fast_Lock and
  2830 //    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
  2831 //    will emit a conditional branch immediately after the node.
  2832 //    So we have branches to branches and lots of ICC.ZF games.
  2833 //    Instead, it might be better to have C2 pass a "FailureLabel"
  2834 //    into Fast_Lock and Fast_Unlock.  In the case of success, control
  2835 //    will drop through the node.  ICC.ZF is undefined at exit.
  2836 //    In the case of failure, the node will branch directly to the
  2837 //    FailureLabel
  2840 // obj: object to lock
  2841 // box: on-stack box address (displaced header location) - KILLED
  2842 // rax,: tmp -- KILLED
  2843 // scr: tmp -- KILLED
  2844 void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg) {
  2846   // Ensure the register assignents are disjoint
  2847   guarantee (objReg != boxReg, "") ;
  2848   guarantee (objReg != tmpReg, "") ;
  2849   guarantee (objReg != scrReg, "") ;
  2850   guarantee (boxReg != tmpReg, "") ;
  2851   guarantee (boxReg != scrReg, "") ;
  2854   block_comment("FastLock");
  2855   /*
  2856      move(AT, 0x0);
  2857      return;
  2858      */
  2859   if (PrintBiasedLockingStatistics) {
  2860     push(tmpReg);
  2861     atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, AT, tmpReg);
  2862     pop(tmpReg);
  2865   if (EmitSync & 1) {
  2866     move(AT, 0x0);
  2867     return;
  2868   } else
  2869     if (EmitSync & 2) {
  2870       Label DONE_LABEL ;
  2871       if (UseBiasedLocking) {
  2872         // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
  2873         biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
  2876       ld(tmpReg, Address(objReg, 0)) ;          // fetch markword
  2877       ori(tmpReg, tmpReg, 0x1);
  2878       sd(tmpReg, Address(boxReg, 0));           // Anticipate successful CAS
  2880       cmpxchg(boxReg, Address(objReg, 0), tmpReg);          // Updates tmpReg
  2881       bne(AT, R0, DONE_LABEL);
  2882       delayed()->nop();
  2884       // Recursive locking
  2885       dsubu(tmpReg, tmpReg, SP);
  2886       li(AT, (7 - os::vm_page_size() ));
  2887       andr(tmpReg, tmpReg, AT);
  2888       sd(tmpReg, Address(boxReg, 0));
  2889       bind(DONE_LABEL) ;
  2890     } else {
  2891       // Possible cases that we'll encounter in fast_lock
  2892       // ------------------------------------------------
  2893       // * Inflated
  2894       //    -- unlocked
  2895       //    -- Locked
  2896       //       = by self
  2897       //       = by other
  2898       // * biased
  2899       //    -- by Self
  2900       //    -- by other
  2901       // * neutral
  2902       // * stack-locked
  2903       //    -- by self
  2904       //       = sp-proximity test hits
  2905       //       = sp-proximity test generates false-negative
  2906       //    -- by other
  2907       //
  2909       Label IsInflated, DONE_LABEL, PopDone ;
  2911       // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
  2912       // order to reduce the number of conditional branches in the most common cases.
  2913       // Beware -- there's a subtle invariant that fetch of the markword
  2914       // at [FETCH], below, will never observe a biased encoding (*101b).
  2915       // If this invariant is not held we risk exclusion (safety) failure.
  2916       if (UseBiasedLocking && !UseOptoBiasInlining) {
  2917         biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
  2920       ld(tmpReg, Address(objReg, 0)) ;         //Fetch the markword of the object.
  2921       andi(AT, tmpReg, markOopDesc::monitor_value);
  2922       bne(AT, R0, IsInflated);                      // inflated vs stack-locked|neutral|bias
  2923       delayed()->nop();
  2925       // Attempt stack-locking ...
  2926       ori (tmpReg, tmpReg, markOopDesc::unlocked_value);
  2927       sd(tmpReg, Address(boxReg, 0));          // Anticipate successful CAS
  2928       //if (os::is_MP()) {
  2929       //  sync();
  2930       //}
  2932       cmpxchg(boxReg, Address(objReg, 0), tmpReg);           // Updates tmpReg
  2933       //AT == 1: unlocked
  2935       if (PrintBiasedLockingStatistics) {
  2936         Label L;
  2937         beq(AT, R0, L);
  2938         delayed()->nop();
  2939         push(T0);
  2940         push(T1);
  2941         atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
  2942         pop(T1);
  2943         pop(T0);
  2944         bind(L);
  2946       bne(AT, R0, DONE_LABEL);
  2947       delayed()->nop();
  2949       // Recursive locking
  2950       // The object is stack-locked: markword contains stack pointer to BasicLock.
  2951       // Locked by current thread if difference with current SP is less than one page.
  2952       dsubu(tmpReg, tmpReg, SP);
  2953       li(AT, 7 - os::vm_page_size() );
  2954       andr(tmpReg, tmpReg, AT);
  2955       sd(tmpReg, Address(boxReg, 0));
  2956       if (PrintBiasedLockingStatistics) {
  2957         Label L;
  2958         // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++
  2959         bne(tmpReg, R0, L);
  2960         delayed()->nop();
  2961         push(T0);
  2962         push(T1);
  2963         atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
  2964         pop(T1);
  2965         pop(T0);
  2966         bind(L);
  2968       sltiu(AT, tmpReg, 1); /* AT = (tmpReg == 0) ? 1 : 0 */
  2970       b(DONE_LABEL) ;
  2971       delayed()->nop();
  2973       bind(IsInflated) ;
  2974       // The object's monitor m is unlocked iff m->owner == NULL,
  2975       // otherwise m->owner may contain a thread or a stack address.
  2977       // TODO: someday avoid the ST-before-CAS penalty by
  2978       // relocating (deferring) the following ST.
  2979       // We should also think about trying a CAS without having
  2980       // fetched _owner.  If the CAS is successful we may
  2981       // avoid an RTO->RTS upgrade on the $line.
  2982       // Without cast to int32_t a movptr will destroy r10 which is typically obj
  2983       li(AT, (int32_t)intptr_t(markOopDesc::unused_mark()));
  2984       sd(AT, Address(boxReg, 0));
  2986       move(boxReg, tmpReg) ;
  2987       ld(tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
  2988       // if (m->owner != 0) => AT = 0, goto slow path.
  2989       move(AT, R0);
  2990       bne(tmpReg, R0, DONE_LABEL);
  2991       delayed()->nop();
  2993 #ifndef OPT_THREAD
  2994       get_thread (TREG) ;
  2995 #endif
  2996       // It's inflated and appears unlocked
  2997       //if (os::is_MP()) {
  2998       //  sync();
  2999       //}
  3000       cmpxchg(TREG, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), tmpReg) ;
  3001       // Intentional fall-through into DONE_LABEL ...
  3004       // DONE_LABEL is a hot target - we'd really like to place it at the
  3005       // start of cache line by padding with NOPs.
  3006       // See the AMD and Intel software optimization manuals for the
  3007       // most efficient "long" NOP encodings.
  3008       // Unfortunately none of our alignment mechanisms suffice.
  3009       bind(DONE_LABEL);
  3011       // At DONE_LABEL the AT is set as follows ...
  3012       // Fast_Unlock uses the same protocol.
  3013       // AT == 1 -> Success
  3014       // AT == 0 -> Failure - force control through the slow-path
  3016       // Avoid branch-to-branch on AMD processors
  3017       // This appears to be superstition.
  3018       if (EmitSync & 32) nop() ;
  3023 // obj: object to unlock
  3024 // box: box address (displaced header location), killed.  Must be EAX.
  3025 // rbx,: killed tmp; cannot be obj nor box.
  3026 //
  3027 // Some commentary on balanced locking:
  3028 //
  3029 // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
  3030 // Methods that don't have provably balanced locking are forced to run in the
  3031 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
  3032 // The interpreter provides two properties:
  3033 // I1:  At return-time the interpreter automatically and quietly unlocks any
  3034 //      objects acquired the current activation (frame).  Recall that the
  3035 //      interpreter maintains an on-stack list of locks currently held by
  3036 //      a frame.
  3037 // I2:  If a method attempts to unlock an object that is not held by the
  3038 //      the frame the interpreter throws IMSX.
  3039 //
  3040 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
  3041 // B() doesn't have provably balanced locking so it runs in the interpreter.
  3042 // Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
  3043 // is still locked by A().
  3044 //
  3045 // The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
  3046 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
  3047 // should not be unlocked by "normal" java-level locking and vice-versa.  The specification
  3048 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
  3050 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) {
  3052   guarantee (objReg != boxReg, "") ;
  3053   guarantee (objReg != tmpReg, "") ;
  3054   guarantee (boxReg != tmpReg, "") ;
  3058   block_comment("FastUnlock");
  3061   if (EmitSync & 4) {
  3062     // Disable - inhibit all inlining.  Force control through the slow-path
  3063     move(AT, 0x0);
  3064     return;
  3065   } else
  3066     if (EmitSync & 8) {
  3067       Label DONE_LABEL ;
  3068       if (UseBiasedLocking) {
  3069         biased_locking_exit(objReg, tmpReg, DONE_LABEL);
  3071       // classic stack-locking code ...
  3072       ld(tmpReg, Address(boxReg, 0)) ;
  3073       beq(tmpReg, R0, DONE_LABEL) ;
  3074       move(AT, 0x1);  // delay slot
  3076       cmpxchg(tmpReg, Address(objReg, 0), boxReg);          // Uses EAX which is box
  3077       bind(DONE_LABEL);
  3078     } else {
  3079       Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
  3081       // Critically, the biased locking test must have precedence over
  3082       // and appear before the (box->dhw == 0) recursive stack-lock test.
  3083       if (UseBiasedLocking && !UseOptoBiasInlining) {
  3084         biased_locking_exit(objReg, tmpReg, DONE_LABEL);
  3087       ld(AT, Address(boxReg, 0)) ;            // Examine the displaced header
  3088       beq(AT, R0, DONE_LABEL) ;      // 0 indicates recursive stack-lock
  3089       delayed()->daddiu(AT, R0, 0x1);
  3091       ld(tmpReg, Address(objReg, 0)) ;       // Examine the object's markword
  3092       andi(AT, tmpReg, markOopDesc::monitor_value) ;                     // Inflated?
  3093       beq(AT, R0, Stacked) ;                     // Inflated?
  3094       delayed()->nop();
  3096       bind(Inflated) ;
  3097       // It's inflated.
  3098       // Despite our balanced locking property we still check that m->_owner == Self
  3099       // as java routines or native JNI code called by this thread might
  3100       // have released the lock.
  3101       // Refer to the comments in synchronizer.cpp for how we might encode extra
  3102       // state in _succ so we can avoid fetching EntryList|cxq.
  3103       //
  3104       // I'd like to add more cases in fast_lock() and fast_unlock() --
  3105       // such as recursive enter and exit -- but we have to be wary of
  3106       // I$ bloat, T$ effects and BP$ effects.
  3107       //
  3108       // If there's no contention try a 1-0 exit.  That is, exit without
  3109       // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
  3110       // we detect and recover from the race that the 1-0 exit admits.
  3111       //
  3112       // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
  3113       // before it STs null into _owner, releasing the lock.  Updates
  3114       // to data protected by the critical section must be visible before
  3115       // we drop the lock (and thus before any other thread could acquire
  3116       // the lock and observe the fields protected by the lock).
  3117       // IA32's memory-model is SPO, so STs are ordered with respect to
  3118       // each other and there's no need for an explicit barrier (fence).
  3119       // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
  3120 #ifndef OPT_THREAD
  3121       get_thread (TREG) ;
  3122 #endif
  3124       // It's inflated
  3125       ld(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
  3126       xorr(boxReg, boxReg, TREG);
  3128       ld(AT, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
  3129       orr(boxReg, boxReg, AT);
  3131       move(AT, R0);
  3132       bne(boxReg, R0, DONE_LABEL);
  3133       delayed()->nop();
  3135       ld(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
  3136       ld(AT, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
  3137       orr(boxReg, boxReg, AT);
  3139       move(AT, R0);
  3140       bne(boxReg, R0, DONE_LABEL);
  3141       delayed()->nop();
  3143       sync();
  3144       sd(R0, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
  3145       move(AT, 0x1);
  3146       b(DONE_LABEL);
  3147       delayed()->nop();
  3149       bind  (Stacked);
  3150       ld(tmpReg, Address(boxReg, 0)) ;
  3151       //if (os::is_MP()) { sync(); }
  3152       cmpxchg(tmpReg, Address(objReg, 0), boxReg);
  3154       if (EmitSync & 65536) {
  3155         bind (CheckSucc);
  3158       bind(DONE_LABEL);
  3160       // Avoid branch to branch on AMD processors
  3161       if (EmitSync & 32768) { nop() ; }
  3165 void MacroAssembler::align(int modulus) {
  3166   while (offset() % modulus != 0) nop();
  3170 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
  3171   //Unimplemented();
  3174 #ifdef _LP64
  3175 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
  3177 /* FIXME: Jin: In MIPS64, F0~23 are all caller-saved registers */
  3178 FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13};
  3179 #else
  3180 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, T4, T5, T6, T7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
  3182 Register caller_saved_fpu_registers[] = {};
  3183 #endif
  3185 //We preserve all caller-saved register
  3186 void  MacroAssembler::pushad(){
  3187   int i;
  3189   /* Fixed-point registers */
  3190   int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
  3191   daddi(SP, SP, -1 * len * wordSize);
  3192   for (i = 0; i < len; i++)
  3194 #ifdef _LP64
  3195     sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
  3196 #else
  3197     sw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
  3198 #endif
  3201   /* Floating-point registers */
  3202   len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
  3203   daddi(SP, SP, -1 * len * wordSize);
  3204   for (i = 0; i < len; i++)
  3206 #ifdef _LP64
  3207     sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3208 #else
  3209     swc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3210 #endif
  3212 };
  3214 void  MacroAssembler::popad(){
  3215   int i;
  3217   /* Floating-point registers */
  3218   int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
  3219   for (i = 0; i < len; i++)
  3221 #ifdef _LP64
  3222     ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3223 #else
  3224     lwc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3225 #endif
  3227   daddi(SP, SP, len * wordSize);
  3229   /* Fixed-point registers */
  3230   len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
  3231   for (i = 0; i < len; i++)
  3233 #ifdef _LP64
  3234     ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
  3235 #else
  3236     lw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
  3237 #endif
  3239   daddi(SP, SP, len * wordSize);
  3240 };
  3242 void MacroAssembler::push2(Register reg1, Register reg2) {
  3243 #ifdef _LP64
  3244   daddi(SP, SP, -16);
  3245   sd(reg2, SP, 0);
  3246   sd(reg1, SP, 8);
  3247 #else
  3248   addi(SP, SP, -8);
  3249   sw(reg2, SP, 0);
  3250   sw(reg1, SP, 4);
  3251 #endif
  3254 void MacroAssembler::pop2(Register reg1, Register reg2) {
  3255 #ifdef _LP64
  3256   ld(reg1, SP, 0);
  3257   ld(reg2, SP, 8);
  3258   daddi(SP, SP, 16);
  3259 #else
  3260   lw(reg1, SP, 0);
  3261   lw(reg2, SP, 4);
  3262   addi(SP, SP, 8);
  3263 #endif
  3266 //for UseCompressedOops Option
  3267 void MacroAssembler::load_klass(Register dst, Register src) {
  3268 #ifdef _LP64
  3269   if(UseCompressedClassPointers){
  3270     lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
  3271     decode_klass_not_null(dst);
  3272   } else
  3273 #endif
  3274   ld(dst, src, oopDesc::klass_offset_in_bytes());
  3277 void MacroAssembler::store_klass(Register dst, Register src) {
  3278 #ifdef _LP64
  3279   if(UseCompressedClassPointers){
  3280     encode_klass_not_null(src);
  3281     sw(src, dst, oopDesc::klass_offset_in_bytes());
  3282   } else {
  3283 #endif
  3284     sd(src, dst, oopDesc::klass_offset_in_bytes());
  3288 void MacroAssembler::load_prototype_header(Register dst, Register src) {
  3289   load_klass(dst, src);
  3290   ld(dst, Address(dst, Klass::prototype_header_offset()));
  3293 #ifdef _LP64
  3294 void MacroAssembler::store_klass_gap(Register dst, Register src) {
  3295   if (UseCompressedClassPointers) {
  3296     sw(src, dst, oopDesc::klass_gap_offset_in_bytes());
  3300 void MacroAssembler::load_heap_oop(Register dst, Address src) {
  3301   if(UseCompressedOops){
  3302     lwu(dst, src);
  3303     decode_heap_oop(dst);
  3304   } else {
  3305     ld(dst, src);
  3309 void MacroAssembler::store_heap_oop(Address dst, Register src){
  3310   if(UseCompressedOops){
  3311     assert(!dst.uses(src), "not enough registers");
  3312     encode_heap_oop(src);
  3313     sw(src, dst);
  3314   } else {
  3315     sd(src, dst);
  3319 void MacroAssembler::store_heap_oop_null(Address dst){
  3320   if(UseCompressedOops){
  3321     sw(R0, dst);
  3322   } else {
  3323     sd(R0, dst);
  3327 #ifdef ASSERT
  3328 void MacroAssembler::verify_heapbase(const char* msg) {
  3329   assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed");
  3330   assert (Universe::heap() != NULL, "java heap should be initialized");
  3332 #endif
  3335 // Algorithm must match oop.inline.hpp encode_heap_oop.
  3336 void MacroAssembler::encode_heap_oop(Register r) {
  3337 #ifdef ASSERT
  3338   verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
  3339 #endif
  3340   verify_oop(r, "broken oop in encode_heap_oop");
  3341   if (Universe::narrow_oop_base() == NULL) {
  3342     if (Universe::narrow_oop_shift() != 0) {
  3343       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3344       shr(r, LogMinObjAlignmentInBytes);
  3346     return;
  3349   movz(r, S5_heapbase, r);
  3350   dsub(r, r, S5_heapbase);
  3351   if (Universe::narrow_oop_shift() != 0) {
  3352     assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3353     shr(r, LogMinObjAlignmentInBytes);
  3357 void MacroAssembler::encode_heap_oop(Register dst, Register src) {
  3358 #ifdef ASSERT
  3359   verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
  3360 #endif
  3361   verify_oop(src, "broken oop in encode_heap_oop");
  3362   if (Universe::narrow_oop_base() == NULL) {
  3363     if (Universe::narrow_oop_shift() != 0) {
  3364       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3365       dsrl(dst, src, LogMinObjAlignmentInBytes);
  3366     } else {
  3367       if (dst != src) move(dst, src);
  3369   } else {
  3370     if (dst == src) {
  3371       movz(dst, S5_heapbase, dst);
  3372       dsub(dst, dst, S5_heapbase);
  3373       if (Universe::narrow_oop_shift() != 0) {
  3374         assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3375         shr(dst, LogMinObjAlignmentInBytes);
  3377     } else {
  3378       dsub(dst, src, S5_heapbase);
  3379       if (Universe::narrow_oop_shift() != 0) {
  3380         assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3381         shr(dst, LogMinObjAlignmentInBytes);
  3383       movz(dst, R0, src);
  3388 void MacroAssembler::encode_heap_oop_not_null(Register r) {
  3389   assert (UseCompressedOops, "should be compressed");
  3390 #ifdef ASSERT
  3391   if (CheckCompressedOops) {
  3392     Label ok;
  3393     bne(r, R0, ok);
  3394     delayed()->nop();
  3395     stop("null oop passed to encode_heap_oop_not_null");
  3396     bind(ok);
  3398 #endif
  3399   verify_oop(r, "broken oop in encode_heap_oop_not_null");
  3400   if (Universe::narrow_oop_base() != NULL) {
  3401     dsub(r, r, S5_heapbase);
  3403   if (Universe::narrow_oop_shift() != 0) {
  3404     assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3405     shr(r, LogMinObjAlignmentInBytes);
  3410 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
  3411   assert (UseCompressedOops, "should be compressed");
  3412 #ifdef ASSERT
  3413   if (CheckCompressedOops) {
  3414     Label ok;
  3415     bne(src, R0, ok);
  3416     delayed()->nop();
  3417     stop("null oop passed to encode_heap_oop_not_null2");
  3418     bind(ok);
  3420 #endif
  3421   verify_oop(src, "broken oop in encode_heap_oop_not_null2");
  3423   if (Universe::narrow_oop_base() != NULL) {
  3424     dsub(dst, src, S5_heapbase);
  3425     if (Universe::narrow_oop_shift() != 0) {
  3426       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3427       shr(dst, LogMinObjAlignmentInBytes);
  3429   } else {
  3430     if (Universe::narrow_oop_shift() != 0) {
  3431       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3432       dsrl(dst, src, LogMinObjAlignmentInBytes);
  3433     } else {
  3434       if (dst != src) move(dst, src);
  3439 void  MacroAssembler::decode_heap_oop(Register r) {
  3440 #ifdef ASSERT
  3441   verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
  3442 #endif
  3443   if (Universe::narrow_oop_base() == NULL) {
  3444     if (Universe::narrow_oop_shift() != 0) {
  3445       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3446       shl(r, LogMinObjAlignmentInBytes);
  3448   } else {
  3449     move(AT, r);
  3450     if (Universe::narrow_oop_shift() != 0) {
  3451       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3452       shl(r, LogMinObjAlignmentInBytes);
  3454     dadd(r, r, S5_heapbase);
  3455     movz(r, R0, AT);
  3457   verify_oop(r, "broken oop in decode_heap_oop");
  3460 void  MacroAssembler::decode_heap_oop(Register dst, Register src) {
  3461 #ifdef ASSERT
  3462   verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
  3463 #endif
  3464   if (Universe::narrow_oop_base() == NULL) {
  3465     if (Universe::narrow_oop_shift() != 0) {
  3466       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3467       if (dst != src) nop(); // DON'T DELETE THIS GUY.
  3468       dsll(dst, src, LogMinObjAlignmentInBytes);
  3469     } else {
  3470       if (dst != src) move(dst, src);
  3472   } else {
  3473     if (dst == src) {
  3474       move(AT, dst);
  3475       if (Universe::narrow_oop_shift() != 0) {
  3476         assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3477         shl(dst, LogMinObjAlignmentInBytes);
  3479       dadd(dst, dst, S5_heapbase);
  3480       movz(dst, R0, AT);
  3481     } else {
  3482       if (Universe::narrow_oop_shift() != 0) {
  3483         assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3484         dsll(dst, src, LogMinObjAlignmentInBytes);
  3485         daddu(dst, dst, S5_heapbase);
  3486       } else {
  3487         daddu(dst, src, S5_heapbase);
  3489       movz(dst, R0, src);
  3492   verify_oop(dst, "broken oop in decode_heap_oop");
  3495 void  MacroAssembler::decode_heap_oop_not_null(Register r) {
  3496   // Note: it will change flags
  3497   assert (UseCompressedOops, "should only be used for compressed headers");
  3498   assert (Universe::heap() != NULL, "java heap should be initialized");
  3499   // Cannot assert, unverified entry point counts instructions (see .ad file)
  3500   // vtableStubs also counts instructions in pd_code_size_limit.
  3501   // Also do not verify_oop as this is called by verify_oop.
  3502   if (Universe::narrow_oop_shift() != 0) {
  3503     assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3504     shl(r, LogMinObjAlignmentInBytes);
  3505     if (Universe::narrow_oop_base() != NULL) {
  3506       daddu(r, r, S5_heapbase);
  3508   } else {
  3509     assert (Universe::narrow_oop_base() == NULL, "sanity");
  3513 void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
  3514   assert (UseCompressedOops, "should only be used for compressed headers");
  3515   assert (Universe::heap() != NULL, "java heap should be initialized");
  3517   // Cannot assert, unverified entry point counts instructions (see .ad file)
  3518   // vtableStubs also counts instructions in pd_code_size_limit.
  3519   // Also do not verify_oop as this is called by verify_oop.
  3520   //lea(dst, Address(S5_heapbase, src, Address::times_8, 0));
  3521   if (Universe::narrow_oop_shift() != 0) {
  3522     assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3523     if (LogMinObjAlignmentInBytes == Address::times_8) {
  3524       dsll(dst, src, LogMinObjAlignmentInBytes);
  3525       daddu(dst, dst, S5_heapbase);
  3526     } else {
  3527       dsll(dst, src, LogMinObjAlignmentInBytes);
  3528       if (Universe::narrow_oop_base() != NULL) {
  3529         daddu(dst, dst, S5_heapbase);
  3532   } else {
  3533     assert (Universe::narrow_oop_base() == NULL, "sanity");
  3534     if (dst != src) {
  3535       move(dst, src);
  3540 void MacroAssembler::encode_klass_not_null(Register r) {
  3541   if (Universe::narrow_klass_base() != NULL) {
  3542     assert(r != AT, "Encoding a klass in AT");
  3543     set64(AT, (int64_t)Universe::narrow_klass_base());
  3544     dsub(r, r, AT);
  3546   if (Universe::narrow_klass_shift() != 0) {
  3547     assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  3548     shr(r, LogKlassAlignmentInBytes);
  3552 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
  3553   if (dst == src) {
  3554     encode_klass_not_null(src);
  3555   } else {
  3556     if (Universe::narrow_klass_base() != NULL) {
  3557       set64(dst, (int64_t)Universe::narrow_klass_base());
  3558       dsub(dst, src, dst);
  3559       if (Universe::narrow_klass_shift() != 0) {
  3560         assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  3561         shr(dst, LogKlassAlignmentInBytes);
  3563     } else {
  3564       if (Universe::narrow_klass_shift() != 0) {
  3565         assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  3566         dsrl(dst, src, LogKlassAlignmentInBytes);
  3567       } else {
  3568         move(dst, src);
  3574 // Function instr_size_for_decode_klass_not_null() counts the instructions
  3575 // generated by decode_klass_not_null(register r) and reinit_heapbase(),
  3576 // when (Universe::heap() != NULL).  Hence, if the instructions they
  3577 // generate change, then this method needs to be updated.
  3578 int MacroAssembler::instr_size_for_decode_klass_not_null() {
  3579   assert (UseCompressedClassPointers, "only for compressed klass ptrs");
  3580   if (Universe::narrow_klass_base() != NULL) {
  3581     // mov64 + addq + shlq? + mov64  (for reinit_heapbase()).
  3582     return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10);
  3583   } else {
  3584     // longest load decode klass function, mov64, leaq
  3585     return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1);
  3589 void  MacroAssembler::decode_klass_not_null(Register r) {
  3590   assert (UseCompressedClassPointers, "should only be used for compressed headers");
  3591   assert(r != AT, "Decoding a klass in AT");
  3592   // Cannot assert, unverified entry point counts instructions (see .ad file)
  3593   // vtableStubs also counts instructions in pd_code_size_limit.
  3594   // Also do not verify_oop as this is called by verify_oop.
  3595   if (Universe::narrow_klass_shift() != 0) {
  3596     assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  3597     shl(r, LogKlassAlignmentInBytes);
  3599   if (Universe::narrow_klass_base() != NULL) {
  3600     set64(AT, (int64_t)Universe::narrow_klass_base());
  3601     daddu(r, r, AT);
  3602     //Not neccessary for MIPS at all.
  3603     //reinit_heapbase();
  3607 void  MacroAssembler::decode_klass_not_null(Register dst, Register src) {
  3608   assert (UseCompressedClassPointers, "should only be used for compressed headers");
  3610   if (dst == src) {
  3611     decode_klass_not_null(dst);
  3612   } else {
  3613     // Cannot assert, unverified entry point counts instructions (see .ad file)
  3614     // vtableStubs also counts instructions in pd_code_size_limit.
  3615     // Also do not verify_oop as this is called by verify_oop.
  3616     set64(dst, (int64_t)Universe::narrow_klass_base());
  3617     if (Universe::narrow_klass_shift() != 0) {
  3618       assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  3619       assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
  3620       dsll(AT, src, Address::times_8);
  3621       daddu(dst, dst, AT);
  3622     } else {
  3623       daddu(dst, src, dst);
  3628 void MacroAssembler::incrementl(Register reg, int value) {
  3629   if (value == min_jint) {
  3630      move(AT, value);
  3631      LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
  3632      return;
  3634   if (value <  0) { decrementl(reg, -value); return; }
  3635   if (value == 0) {                        ; return; }
  3637   if(Assembler::is_simm16(value)) {
  3638      NOT_LP64(addiu(reg, reg, value));
  3639      LP64_ONLY(move(AT, value); addu32(reg, reg, AT));
  3640   } else {
  3641      move(AT, value);
  3642      LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
  3646 void MacroAssembler::decrementl(Register reg, int value) {
  3647   if (value == min_jint) {
  3648      move(AT, value);
  3649      LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
  3650      return;
  3652   if (value <  0) { incrementl(reg, -value); return; }
  3653   if (value == 0) {                        ; return; }
  3655   if (Assembler::is_simm16(value)) {
  3656      NOT_LP64(addiu(reg, reg, -value));
  3657      LP64_ONLY(move(AT, value); subu32(reg, reg, AT));
  3658   } else {
  3659      move(AT, value);
  3660      LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
  3664 void MacroAssembler::reinit_heapbase() {
  3665   if (UseCompressedOops || UseCompressedClassPointers) {
  3666     if (Universe::heap() != NULL) {
  3667       if (Universe::narrow_oop_base() == NULL) {
  3668         move(S5_heapbase, R0);
  3669       } else {
  3670         set64(S5_heapbase, (int64_t)Universe::narrow_ptrs_base());
  3672     } else {
  3673       set64(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr());
  3674       ld(S5_heapbase, S5_heapbase, 0);
  3678 #endif // _LP64
  3680 void MacroAssembler::check_klass_subtype(Register sub_klass,
  3681                            Register super_klass,
  3682                            Register temp_reg,
  3683                            Label& L_success) {
  3684 //implement ind   gen_subtype_check
  3685   Label L_failure;
  3686   check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_failure, NULL);
  3687   check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
  3688   bind(L_failure);
  3691 SkipIfEqual::SkipIfEqual(
  3692     MacroAssembler* masm, const bool* flag_addr, bool value) {
  3693   _masm = masm;
  3694   _masm->li(AT, (address)flag_addr);
  3695   _masm->lb(AT,AT,0);
  3696   _masm->addi(AT,AT,-value);
  3697   _masm->beq(AT,R0,_label);
  3698   _masm->delayed()->nop();
  3700 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
  3701                                                    Register super_klass,
  3702                                                    Register temp_reg,
  3703                                                    Label* L_success,
  3704                                                    Label* L_failure,
  3705                                                    Label* L_slow_path,
  3706                                         RegisterOrConstant super_check_offset) {
  3707   assert_different_registers(sub_klass, super_klass, temp_reg);
  3708   bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
  3709   if (super_check_offset.is_register()) {
  3710     assert_different_registers(sub_klass, super_klass,
  3711                                super_check_offset.as_register());
  3712   } else if (must_load_sco) {
  3713     assert(temp_reg != noreg, "supply either a temp or a register offset");
  3716   Label L_fallthrough;
  3717   int label_nulls = 0;
  3718   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
  3719   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
  3720   if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
  3721   assert(label_nulls <= 1, "at most one NULL in the batch");
  3723   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
  3724   int sco_offset = in_bytes(Klass::super_check_offset_offset());
  3725   // If the pointers are equal, we are done (e.g., String[] elements).
  3726   // This self-check enables sharing of secondary supertype arrays among
  3727   // non-primary types such as array-of-interface.  Otherwise, each such
  3728   // type would need its own customized SSA.
  3729   // We move this check to the front of the fast path because many
  3730   // type checks are in fact trivially successful in this manner,
  3731   // so we get a nicely predicted branch right at the start of the check.
  3732   beq(sub_klass, super_klass, *L_success);
  3733   delayed()->nop();
  3734   // Check the supertype display:
  3735   if (must_load_sco) {
  3736     // Positive movl does right thing on LP64.
  3737     lwu(temp_reg, super_klass, sco_offset);
  3738     super_check_offset = RegisterOrConstant(temp_reg);
  3740   dsll(AT, super_check_offset.register_or_noreg(), Address::times_1);
  3741   daddu(AT, sub_klass, AT);
  3742   ld(AT, AT, super_check_offset.constant_or_zero()*Address::times_1);
  3744   // This check has worked decisively for primary supers.
  3745   // Secondary supers are sought in the super_cache ('super_cache_addr').
  3746   // (Secondary supers are interfaces and very deeply nested subtypes.)
  3747   // This works in the same check above because of a tricky aliasing
  3748   // between the super_cache and the primary super display elements.
  3749   // (The 'super_check_addr' can address either, as the case requires.)
  3750   // Note that the cache is updated below if it does not help us find
  3751   // what we need immediately.
  3752   // So if it was a primary super, we can just fail immediately.
  3753   // Otherwise, it's the slow path for us (no success at this point).
  3755   if (super_check_offset.is_register()) {
  3756     beq(super_klass, AT, *L_success);
  3757     delayed()->nop();
  3758     addi(AT, super_check_offset.as_register(), -sc_offset);
  3759     if (L_failure == &L_fallthrough) {
  3760       beq(AT, R0, *L_slow_path);
  3761       delayed()->nop();
  3762     } else {
  3763       bne(AT, R0, *L_failure);
  3764       delayed()->nop();
  3765       b(*L_slow_path);
  3766       delayed()->nop();
  3768   } else if (super_check_offset.as_constant() == sc_offset) {
  3769     // Need a slow path; fast failure is impossible.
  3770     if (L_slow_path == &L_fallthrough) {
  3771       beq(super_klass, AT, *L_success);
  3772       delayed()->nop();
  3773     } else {
  3774       bne(super_klass, AT, *L_slow_path);
  3775       delayed()->nop();
  3776       b(*L_success);
  3777       delayed()->nop();
  3779   } else {
  3780     // No slow path; it's a fast decision.
  3781     if (L_failure == &L_fallthrough) {
  3782       beq(super_klass, AT, *L_success);
  3783       delayed()->nop();
  3784     } else {
  3785       bne(super_klass, AT, *L_failure);
  3786       delayed()->nop();
  3787       b(*L_success);
  3788       delayed()->nop();
  3792   bind(L_fallthrough);
  3797 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
  3798                                                    Register super_klass,
  3799                                                    Register temp_reg,
  3800                                                    Register temp2_reg,
  3801                                                    Label* L_success,
  3802                                                    Label* L_failure,
  3803                                                    bool set_cond_codes) {
  3804   assert_different_registers(sub_klass, super_klass, temp_reg);
  3805   if (temp2_reg != noreg)
  3806     assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
  3807   else
  3808     temp2_reg = T9;
  3809 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
  3811   Label L_fallthrough;
  3812   int label_nulls = 0;
  3813   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
  3814   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
  3815   assert(label_nulls <= 1, "at most one NULL in the batch");
  3817   // a couple of useful fields in sub_klass:
  3818   int ss_offset = in_bytes(Klass::secondary_supers_offset());
  3819   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
  3820   Address secondary_supers_addr(sub_klass, ss_offset);
  3821   Address super_cache_addr(     sub_klass, sc_offset);
  3823   // Do a linear scan of the secondary super-klass chain.
  3824   // This code is rarely used, so simplicity is a virtue here.
  3825   // The repne_scan instruction uses fixed registers, which we must spill.
  3826   // Don't worry too much about pre-existing connections with the input regs.
  3828   // Get super_klass value into rax (even if it was in rdi or rcx).
  3829 #ifndef PRODUCT
  3830   int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
  3831   ExternalAddress pst_counter_addr((address) pst_counter);
  3832   NOT_LP64(  incrementl(pst_counter_addr) );
  3833 #endif //PRODUCT
  3835   // We will consult the secondary-super array.
  3836   ld(temp_reg, secondary_supers_addr);
  3837   // Load the array length.  (Positive movl does right thing on LP64.)
  3838   lw(temp2_reg, Address(temp_reg, Array<Klass*>::length_offset_in_bytes()));
  3839   // Skip to start of data.
  3840   daddiu(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes());
  3842   // Scan RCX words at [RDI] for an occurrence of RAX.
  3843   // Set NZ/Z based on last compare.
  3844   // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does
  3845   // not change flags (only scas instruction which is repeated sets flags).
  3846   // Set Z = 0 (not equal) before 'repne' to indicate that class was not found.
  3848   /* 2013/4/3 Jin: OpenJDK8 never compresses klass pointers in secondary-super array. */
  3849   Label Loop, subtype;
  3850   bind(Loop);
  3851   beq(temp2_reg, R0, *L_failure);
  3852   delayed()->nop();
  3853   ld(AT, temp_reg, 0);
  3854   beq(AT, super_klass, subtype);
  3855   delayed()->daddi(temp_reg, temp_reg, 1 * wordSize);
  3856   b(Loop);
  3857   delayed()->daddi(temp2_reg, temp2_reg, -1);
  3859   bind(subtype);
  3860   sd(super_klass, super_cache_addr);
  3861   if (L_success != &L_fallthrough) {
  3862     b(*L_success);
  3863     delayed()->nop();
  3866   // Success.  Cache the super we found and proceed in triumph.
  3867 #undef IS_A_TEMP
  3869   bind(L_fallthrough);
  3872 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
  3873   ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
  3874   sd(R0, Address(java_thread, JavaThread::vm_result_offset()));
  3875   verify_oop(oop_result, "broken oop in call_VM_base");
  3878 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
  3879   ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
  3880   sd(R0, Address(java_thread, JavaThread::vm_result_2_offset()));
  3883 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
  3884                                          int extra_slot_offset) {
  3885   // cf. TemplateTable::prepare_invoke(), if (load_receiver).
  3886   int stackElementSize = Interpreter::stackElementSize;
  3887   int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
  3888 #ifdef ASSERT
  3889   int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
  3890   assert(offset1 - offset == stackElementSize, "correct arithmetic");
  3891 #endif
  3892   Register             scale_reg    = NOREG;
  3893   Address::ScaleFactor scale_factor = Address::no_scale;
  3894   if (arg_slot.is_constant()) {
  3895     offset += arg_slot.as_constant() * stackElementSize;
  3896   } else {
  3897     scale_reg    = arg_slot.as_register();
  3898     scale_factor = Address::times_8;
  3900   // 2014/07/31 Fu: We don't push RA on stack in prepare_invoke.
  3901   //  offset += wordSize;           // return PC is on stack
  3902   if(scale_reg==NOREG) return Address(SP, offset);
  3903   else {
  3904   dsll(scale_reg, scale_reg, scale_factor);
  3905   daddu(scale_reg, SP, scale_reg);
  3906   return Address(scale_reg, offset);
  3910 SkipIfEqual::~SkipIfEqual() {
  3911   _masm->bind(_label);
  3914 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
  3915   switch (size_in_bytes) {
  3916 #ifndef _LP64
  3917   case  8:
  3918     assert(dst2 != noreg, "second dest register required");
  3919     lw(dst,  src);
  3920     lw(dst2, src.plus_disp(BytesPerInt));
  3921     break;
  3922 #else
  3923   case  8:  ld(dst, src); break;
  3924 #endif
  3925   case  4:  lw(dst, src); break;
  3926   case  2:  is_signed ? lh(dst, src) : lhu(dst, src); break;
  3927   case  1:  is_signed ? lb( dst, src) : lbu( dst, src); break;
  3928   default:  ShouldNotReachHere();
  3932 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
  3933   switch (size_in_bytes) {
  3934 #ifndef _LP64
  3935   case  8:
  3936     assert(src2 != noreg, "second source register required");
  3937     sw(src, dst);
  3938     sw(src2, dst.plus_disp(BytesPerInt));
  3939     break;
  3940 #else
  3941   case  8:  sd(src, dst); break;
  3942 #endif
  3943   case  4:  sw(src, dst); break;
  3944   case  2:  sh(src, dst); break;
  3945   case  1:  sb(src, dst); break;
  3946   default:  ShouldNotReachHere();
  3950 // Look up the method for a megamorphic invokeinterface call.
  3951 // The target method is determined by <intf_klass, itable_index>.
  3952 // The receiver klass is in recv_klass.
  3953 // On success, the result will be in method_result, and execution falls through.
  3954 // On failure, execution transfers to the given label.
  3955 void MacroAssembler::lookup_interface_method(Register recv_klass,
  3956                                              Register intf_klass,
  3957                                              RegisterOrConstant itable_index,
  3958                                              Register method_result,
  3959                                              Register scan_temp,
  3960                                              Label& L_no_such_interface) {
  3961   assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
  3962   assert(itable_index.is_constant() || itable_index.as_register() == method_result,
  3963          "caller must use same register for non-constant itable index as for method");
  3965   // Compute start of first itableOffsetEntry (which is at the end of the vtable)
  3966   int vtable_base = InstanceKlass::vtable_start_offset() * wordSize;
  3967   int itentry_off = itableMethodEntry::method_offset_in_bytes();
  3968   int scan_step   = itableOffsetEntry::size() * wordSize;
  3969   int vte_size    = vtableEntry::size() * wordSize;
  3970   Address::ScaleFactor times_vte_scale = Address::times_ptr;
  3971   assert(vte_size == wordSize, "else adjust times_vte_scale");
  3973   lw(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize));
  3975   // %%% Could store the aligned, prescaled offset in the klassoop.
  3976   dsll(scan_temp, scan_temp, times_vte_scale);
  3977   daddu(scan_temp, recv_klass, scan_temp);
  3978   daddiu(scan_temp, scan_temp, vtable_base);
  3979   if (HeapWordsPerLong > 1) {
  3980     // Round up to align_object_offset boundary
  3981     // see code for InstanceKlass::start_of_itable!
  3982     round_to(scan_temp, BytesPerLong);
  3985   // Adjust recv_klass by scaled itable_index, so we can free itable_index.
  3986   assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
  3987   if (itable_index.is_constant()) {
  3988     set64(AT, (int)itable_index.is_constant());
  3989     dsll(AT, AT, (int)Address::times_ptr);
  3990   } else {
  3991     dsll(AT, itable_index.as_register(), (int)Address::times_ptr);
  3993   daddu(AT, AT, recv_klass);
  3994   daddiu(recv_klass, AT, itentry_off);
  3996   Label search, found_method;
  3998   for (int peel = 1; peel >= 0; peel--) {
  3999     ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
  4001     if (peel) {
  4002       beq(intf_klass, method_result, found_method);
  4003       nop();
  4004     } else {
  4005       bne(intf_klass, method_result, search);
  4006       nop();
  4007       // (invert the test to fall through to found_method...)
  4010     if (!peel)  break;
  4012     bind(search);
  4014     // Check that the previous entry is non-null.  A null entry means that
  4015     // the receiver class doesn't implement the interface, and wasn't the
  4016     // same as when the caller was compiled.
  4017     beq(method_result, R0, L_no_such_interface);
  4018     nop();
  4019     daddiu(scan_temp, scan_temp, scan_step);
  4022   bind(found_method);
  4024   // Got a hit.
  4025   lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
  4026   if(UseLoongsonISA) {
  4027     gsldx(method_result, recv_klass, scan_temp, 0);
  4028   } else {
  4029     daddu(AT, recv_klass, scan_temp);
  4030     ld(method_result, AT);
  4034 // virtual method calling
  4035 void MacroAssembler::lookup_virtual_method(Register recv_klass,
  4036                                            RegisterOrConstant vtable_index,
  4037                                            Register method_result) {
  4038   Register tmp = GP;
  4039   push(tmp);
  4041   if (vtable_index.is_constant()) {
  4042     assert_different_registers(recv_klass, method_result, tmp);
  4043   } else {
  4044     assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp);
  4046   const int base = InstanceKlass::vtable_start_offset() * wordSize;
  4047   assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
  4048 /*
  4049   Address vtable_entry_addr(recv_klass,
  4050                             vtable_index, Address::times_ptr,
  4051                             base + vtableEntry::method_offset_in_bytes());
  4052 */
  4053   if (vtable_index.is_constant()) {
  4054     set64(AT, vtable_index.as_constant());
  4055     dsll(AT, AT, (int)Address::times_ptr);
  4056   } else {
  4057     dsll(AT, vtable_index.as_register(), (int)Address::times_ptr);
  4059   set64(tmp, base + vtableEntry::method_offset_in_bytes());
  4060   daddu(tmp, tmp, AT);
  4061   daddu(tmp, tmp, recv_klass);
  4062   ld(method_result, tmp, 0);
  4064   pop(tmp);

mercurial