src/cpu/mips/vm/macroAssembler_mips.cpp

Tue, 04 Sep 2018 21:25:12 +0800

author
aoqi
date
Tue, 04 Sep 2018 21:25:12 +0800
changeset 9228
617b86d17edb
parent 9227
f1560009a081
child 9242
133e3e9881b9
permissions
-rw-r--r--

#7517 mRegP match a0_RegP

     1 /*
     2  * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
     3  * Copyright (c) 2017, 2018, Loongson Technology. All rights reserved.
     4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     5  *
     6  * This code is free software; you can redistribute it and/or modify it
     7  * under the terms of the GNU General Public License version 2 only, as
     8  * published by the Free Software Foundation.
     9  *
    10  * This code is distributed in the hope that it will be useful, but WITHOUT
    11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    13  * version 2 for more details (a copy is included in the LICENSE file that
    14  * accompanied this code).
    15  *
    16  * You should have received a copy of the GNU General Public License version
    17  * 2 along with this work; if not, write to the Free Software Foundation,
    18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    19  *
    20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    21  * or visit www.oracle.com if you need additional information or have any
    22  * questions.
    23  *
    24  */
    26 #include "precompiled.hpp"
    27 #include "asm/assembler.hpp"
    28 #include "asm/assembler.inline.hpp"
    29 #include "asm/macroAssembler.inline.hpp"
    30 #include "compiler/disassembler.hpp"
    31 #include "gc_interface/collectedHeap.inline.hpp"
    32 #include "interpreter/interpreter.hpp"
    33 #include "memory/cardTableModRefBS.hpp"
    34 #include "memory/resourceArea.hpp"
    35 #include "memory/universe.hpp"
    36 #include "prims/methodHandles.hpp"
    37 #include "runtime/biasedLocking.hpp"
    38 #include "runtime/interfaceSupport.hpp"
    39 #include "runtime/objectMonitor.hpp"
    40 #include "runtime/os.hpp"
    41 #include "runtime/sharedRuntime.hpp"
    42 #include "runtime/stubRoutines.hpp"
    43 #include "utilities/macros.hpp"
    44 #if INCLUDE_ALL_GCS
    45 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
    46 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
    47 #include "gc_implementation/g1/heapRegion.hpp"
    48 #endif // INCLUDE_ALL_GCS
    50 // Implementation of MacroAssembler
    52 intptr_t MacroAssembler::i[32] = {0};
    53 float MacroAssembler::f[32] = {0.0};
    55 void MacroAssembler::print(outputStream *s) {
    56   unsigned int k;
    57   for(k=0; k<sizeof(i)/sizeof(i[0]); k++) {
    58     s->print_cr("i%d = 0x%.16lx", k, i[k]);
    59   }
    60   s->cr();
    62   for(k=0; k<sizeof(f)/sizeof(f[0]); k++) {
    63     s->print_cr("f%d = %f", k, f[k]);
    64   }
    65   s->cr();
    66 }
    68 int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; }
    69 int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; }
    71 void MacroAssembler::save_registers(MacroAssembler *masm) {
    72 #define __ masm->
    73   for(int k=0; k<32; k++) {
    74     __ sw (as_Register(k), A0, i_offset(k));
    75   }
    77   for(int k=0; k<32; k++) {
    78     __ swc1 (as_FloatRegister(k), A0, f_offset(k));
    79   }
    80 #undef __
    81 }
    83 void MacroAssembler::restore_registers(MacroAssembler *masm) {
    84 #define __ masm->
    85   for(int k=0; k<32; k++) {
    86     __ lw (as_Register(k), A0, i_offset(k));
    87   }
    89   for(int k=0; k<32; k++) {
    90     __ lwc1 (as_FloatRegister(k), A0, f_offset(k));
    91   }
    92 #undef __
    93 }
    96 void MacroAssembler::pd_patch_instruction(address branch, address target) {
    97   jint& stub_inst = *(jint*) branch;
    98   jint *pc = (jint *)branch;
   100   if((opcode(stub_inst) == special_op) && (special(stub_inst) == dadd_op)) {
   101     //b_far:
   102     //  move(AT, RA); // dadd
   103     //  emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
   104     //  nop();
   105     //  lui(T9, 0); // to be patched
   106     //  ori(T9, 0);
   107     //  daddu(T9, T9, RA);
   108     //  move(RA, AT);
   109     //  jr(T9);
   111     assert(opcode(pc[3]) == lui_op
   112         && opcode(pc[4]) == ori_op
   113         && special(pc[5]) == daddu_op, "Not a branch label patch");
   114     if(!(opcode(pc[3]) == lui_op
   115           && opcode(pc[4]) == ori_op
   116           && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); }
   118     int offset = target - branch;
   119     if (!is_simm16(offset)) {
   120       pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12);
   121       pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12);
   122     } else {
   123       /* revert to "beq + nop" */
   124       CodeBuffer cb(branch, 4 * 10);
   125       MacroAssembler masm(&cb);
   126 #define __ masm.
   127       __ b(target);
   128       __ delayed()->nop();
   129       __ nop();
   130       __ nop();
   131       __ nop();
   132       __ nop();
   133       __ nop();
   134       __ nop();
   135     }
   136     return;
   137   } else if (special(pc[4]) == jr_op
   138              && opcode(pc[4]) == special_op
   139              && (((opcode(pc[0]) == lui_op) || opcode(pc[0]) == daddiu_op) || (opcode(pc[0]) == ori_op))) {
   140     //jmp_far:
   141     //  patchable_set48(T9, target);
   142     //  jr(T9);
   143     //  nop();
   145     CodeBuffer cb(branch, 4 * 4);
   146     MacroAssembler masm(&cb);
   147     masm.patchable_set48(T9, (long)(target));
   148     return;
   149   }
   151 #ifndef PRODUCT
   152   if (!is_simm16((target - branch - 4) >> 2)) {
   153     tty->print_cr("Illegal patching: branch = 0x%lx, target = 0x%lx", branch, target);
   154     tty->print_cr("======= Start decoding at branch = 0x%lx =======", branch);
   155     Disassembler::decode(branch - 4 * 16, branch + 4 * 16, tty);
   156     tty->print_cr("======= End of decoding =======");
   157   }
   158 #endif
   160   stub_inst = patched_branch(target - branch, stub_inst, 0);
   161 }
   163 static inline address first_cache_address() {
   164   return CodeCache::low_bound() + sizeof(HeapBlock::Header);
   165 }
   167 static inline address last_cache_address() {
   168   return CodeCache::high_bound() - Assembler::InstructionSize;
   169 }
   171 int MacroAssembler::call_size(address target, bool far, bool patchable) {
   172   if (patchable) return 6 << Assembler::LogInstructionSize;
   173   if (!far) return 2 << Assembler::LogInstructionSize; // jal + nop
   174   return (insts_for_set64((jlong)target) + 2) << Assembler::LogInstructionSize;
   175 }
   177 // Can we reach target using jal/j from anywhere
   178 // in the code cache (because code can be relocated)?
   179 bool MacroAssembler::reachable_from_cache(address target) {
   180   address cl = first_cache_address();
   181   address ch = last_cache_address();
   183   return (cl <= target) && (target <= ch) && fit_in_jal(cl, ch);
   184 }
   186 void MacroAssembler::general_jump(address target) {
   187   if (reachable_from_cache(target)) {
   188     j(target);
   189     delayed()->nop();
   190   } else {
   191     set64(T9, (long)target);
   192     jr(T9);
   193     delayed()->nop();
   194   }
   195 }
   197 int MacroAssembler::insts_for_general_jump(address target) {
   198   if (reachable_from_cache(target)) {
   199     //j(target);
   200     //nop();
   201     return 2;
   202   } else {
   203     //set64(T9, (long)target);
   204     //jr(T9);
   205     //nop();
   206     return insts_for_set64((jlong)target) + 2;
   207   }
   208 }
   210 void MacroAssembler::patchable_jump(address target) {
   211   if (reachable_from_cache(target)) {
   212     nop();
   213     nop();
   214     nop();
   215     nop();
   216     j(target);
   217     delayed()->nop();
   218   } else {
   219     patchable_set48(T9, (long)target);
   220     jr(T9);
   221     delayed()->nop();
   222   }
   223 }
   225 int MacroAssembler::insts_for_patchable_jump(address target) {
   226   return 6;
   227 }
   229 void MacroAssembler::general_call(address target) {
   230   if (reachable_from_cache(target)) {
   231     jal(target);
   232     delayed()->nop();
   233   } else {
   234     set64(T9, (long)target);
   235     jalr(T9);
   236     delayed()->nop();
   237   }
   238 }
   240 int MacroAssembler::insts_for_general_call(address target) {
   241   if (reachable_from_cache(target)) {
   242     //jal(target);
   243     //nop();
   244     return 2;
   245   } else {
   246     //set64(T9, (long)target);
   247     //jalr(T9);
   248     //nop();
   249     return insts_for_set64((jlong)target) + 2;
   250   }
   251 }
   253 void MacroAssembler::patchable_call(address target) {
   254   if (reachable_from_cache(target)) {
   255     nop();
   256     nop();
   257     nop();
   258     nop();
   259     jal(target);
   260     delayed()->nop();
   261   } else {
   262     patchable_set48(T9, (long)target);
   263     jalr(T9);
   264     delayed()->nop();
   265   }
   266 }
   268 int MacroAssembler::insts_for_patchable_call(address target) {
   269   return 6;
   270 }
   272 void MacroAssembler::beq_far(Register rs, Register rt, address entry) {
   273   u_char * cur_pc = pc();
   275   // Near/Far jump
   276   if(is_simm16((entry - pc() - 4) / 4)) {
   277     Assembler::beq(rs, rt, offset(entry));
   278   } else {
   279     Label not_jump;
   280     bne(rs, rt, not_jump);
   281     delayed()->nop();
   283     b_far(entry);
   284     delayed()->nop();
   286     bind(not_jump);
   287     has_delay_slot();
   288   }
   289 }
   291 void MacroAssembler::beq_far(Register rs, Register rt, Label& L) {
   292   if (L.is_bound()) {
   293     beq_far(rs, rt, target(L));
   294   } else {
   295     u_char * cur_pc = pc();
   296     Label not_jump;
   297     bne(rs, rt, not_jump);
   298     delayed()->nop();
   300     b_far(L);
   301     delayed()->nop();
   303     bind(not_jump);
   304     has_delay_slot();
   305   }
   306 }
   308 void MacroAssembler::bne_far(Register rs, Register rt, address entry) {
   309   u_char * cur_pc = pc();
   311   //Near/Far jump
   312   if(is_simm16((entry - pc() - 4) / 4)) {
   313     Assembler::bne(rs, rt, offset(entry));
   314   } else {
   315     Label not_jump;
   316     beq(rs, rt, not_jump);
   317     delayed()->nop();
   319     b_far(entry);
   320     delayed()->nop();
   322     bind(not_jump);
   323     has_delay_slot();
   324   }
   325 }
   327 void MacroAssembler::bne_far(Register rs, Register rt, Label& L) {
   328   if (L.is_bound()) {
   329     bne_far(rs, rt, target(L));
   330   } else {
   331     u_char * cur_pc = pc();
   332     Label not_jump;
   333     beq(rs, rt, not_jump);
   334     delayed()->nop();
   336     b_far(L);
   337     delayed()->nop();
   339     bind(not_jump);
   340     has_delay_slot();
   341   }
   342 }
   344 void MacroAssembler::beq_long(Register rs, Register rt, Label& L) {
   345   Label not_taken;
   347   bne(rs, rt, not_taken);
   348   delayed()->nop();
   350   jmp_far(L);
   352   bind(not_taken);
   353 }
   355 void MacroAssembler::bne_long(Register rs, Register rt, Label& L) {
   356   Label not_taken;
   358   beq(rs, rt, not_taken);
   359   delayed()->nop();
   361   jmp_far(L);
   363   bind(not_taken);
   364 }
   366 void MacroAssembler::bc1t_long(Label& L) {
   367   Label not_taken;
   369   bc1f(not_taken);
   370   delayed()->nop();
   372   jmp_far(L);
   374   bind(not_taken);
   375 }
   377 void MacroAssembler::bc1f_long(Label& L) {
   378   Label not_taken;
   380   bc1t(not_taken);
   381   delayed()->nop();
   383   jmp_far(L);
   385   bind(not_taken);
   386 }
   388 void MacroAssembler::b_far(Label& L) {
   389   if (L.is_bound()) {
   390     b_far(target(L));
   391   } else {
   392     volatile address dest = target(L);
   393 /*
   394 MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8
   395    0x00000055651ed514: dadd at, ra, zero
   396    0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520
   398    0x00000055651ed51c: sll zero, zero, 0
   399    0x00000055651ed520: lui t9, 0x0
   400    0x00000055651ed524: ori t9, t9, 0x21b8
   401    0x00000055651ed528: daddu t9, t9, ra
   402    0x00000055651ed52c: dadd ra, at, zero
   403    0x00000055651ed530: jr t9
   404    0x00000055651ed534: sll zero, zero, 0
   405 */
   406     move(AT, RA);
   407     emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
   408     nop();
   409     lui(T9, 0); // to be patched
   410     ori(T9, T9, 0);
   411     daddu(T9, T9, RA);
   412     move(RA, AT);
   413     jr(T9);
   414   }
   415 }
   417 void MacroAssembler::b_far(address entry) {
   418   u_char * cur_pc = pc();
   420   // Near/Far jump
   421   if(is_simm16((entry - pc() - 4) / 4)) {
   422     b(offset(entry));
   423   } else {
   424     // address must be bounded
   425     move(AT, RA);
   426     emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
   427     nop();
   428     li32(T9, entry - pc());
   429     daddu(T9, T9, RA);
   430     move(RA, AT);
   431     jr(T9);
   432   }
   433 }
   435 void MacroAssembler::ld_ptr(Register rt, Register offset, Register base) {
   436   addu_long(AT, base, offset);
   437   ld_ptr(rt, 0, AT);
   438 }
   440 void MacroAssembler::st_ptr(Register rt, Register offset, Register base) {
   441   addu_long(AT, base, offset);
   442   st_ptr(rt, 0, AT);
   443 }
   445 void MacroAssembler::ld_long(Register rt, Register offset, Register base) {
   446   addu_long(AT, base, offset);
   447   ld_long(rt, 0, AT);
   448 }
   450 void MacroAssembler::st_long(Register rt, Register offset, Register base) {
   451   addu_long(AT, base, offset);
   452   st_long(rt, 0, AT);
   453 }
   455 Address MacroAssembler::as_Address(AddressLiteral adr) {
   456   return Address(adr.target(), adr.rspec());
   457 }
   459 Address MacroAssembler::as_Address(ArrayAddress adr) {
   460   return Address::make_array(adr);
   461 }
   463 // tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved).
   464 void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) {
   465   Label again;
   467   li(tmp_reg1, counter_addr);
   468   bind(again);
   469   if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
   470   ll(tmp_reg2, tmp_reg1, 0);
   471   addi(tmp_reg2, tmp_reg2, inc);
   472   sc(tmp_reg2, tmp_reg1, 0);
   473   beq(tmp_reg2, R0, again);
   474   delayed()->nop();
   475 }
   477 int MacroAssembler::biased_locking_enter(Register lock_reg,
   478                                          Register obj_reg,
   479                                          Register swap_reg,
   480                                          Register tmp_reg,
   481                                          bool swap_reg_contains_mark,
   482                                          Label& done,
   483                                          Label* slow_case,
   484                                          BiasedLockingCounters* counters) {
   485   assert(UseBiasedLocking, "why call this otherwise?");
   486   bool need_tmp_reg = false;
   487   if (tmp_reg == noreg) {
   488     need_tmp_reg = true;
   489     tmp_reg = T9;
   490   }
   491   assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT);
   492   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
   493   Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
   494   Address saved_mark_addr(lock_reg, 0);
   496   // Biased locking
   497   // See whether the lock is currently biased toward our thread and
   498   // whether the epoch is still valid
   499   // Note that the runtime guarantees sufficient alignment of JavaThread
   500   // pointers to allow age to be placed into low bits
   501   // First check to see whether biasing is even enabled for this object
   502   Label cas_label;
   503   int null_check_offset = -1;
   504   if (!swap_reg_contains_mark) {
   505     null_check_offset = offset();
   506     ld_ptr(swap_reg, mark_addr);
   507   }
   509   if (need_tmp_reg) {
   510     push(tmp_reg);
   511   }
   512   move(tmp_reg, swap_reg);
   513   andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
   514 #ifdef _LP64
   515   daddi(AT, R0, markOopDesc::biased_lock_pattern);
   516   dsub(AT, AT, tmp_reg);
   517 #else
   518   addi(AT, R0, markOopDesc::biased_lock_pattern);
   519   sub(AT, AT, tmp_reg);
   520 #endif
   521   if (need_tmp_reg) {
   522     pop(tmp_reg);
   523   }
   525   bne(AT, R0, cas_label);
   526   delayed()->nop();
   529   // The bias pattern is present in the object's header. Need to check
   530   // whether the bias owner and the epoch are both still current.
   531   // Note that because there is no current thread register on MIPS we
   532   // need to store off the mark word we read out of the object to
   533   // avoid reloading it and needing to recheck invariants below. This
   534   // store is unfortunate but it makes the overall code shorter and
   535   // simpler.
   536   st_ptr(swap_reg, saved_mark_addr);
   537   if (need_tmp_reg) {
   538     push(tmp_reg);
   539   }
   540   if (swap_reg_contains_mark) {
   541     null_check_offset = offset();
   542   }
   543   load_prototype_header(tmp_reg, obj_reg);
   544   xorr(tmp_reg, tmp_reg, swap_reg);
   545   get_thread(swap_reg);
   546   xorr(swap_reg, swap_reg, tmp_reg);
   548   move(AT, ~((int) markOopDesc::age_mask_in_place));
   549   andr(swap_reg, swap_reg, AT);
   551   if (PrintBiasedLockingStatistics) {
   552     Label L;
   553     bne(swap_reg, R0, L);
   554     delayed()->nop();
   555     push(tmp_reg);
   556     push(A0);
   557     atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg);
   558     pop(A0);
   559     pop(tmp_reg);
   560     bind(L);
   561   }
   562   if (need_tmp_reg) {
   563     pop(tmp_reg);
   564   }
   565   beq(swap_reg, R0, done);
   566   delayed()->nop();
   567   Label try_revoke_bias;
   568   Label try_rebias;
   570   // At this point we know that the header has the bias pattern and
   571   // that we are not the bias owner in the current epoch. We need to
   572   // figure out more details about the state of the header in order to
   573   // know what operations can be legally performed on the object's
   574   // header.
   576   // If the low three bits in the xor result aren't clear, that means
   577   // the prototype header is no longer biased and we have to revoke
   578   // the bias on this object.
   580   move(AT, markOopDesc::biased_lock_mask_in_place);
   581   andr(AT, swap_reg, AT);
   582   bne(AT, R0, try_revoke_bias);
   583   delayed()->nop();
   584   // Biasing is still enabled for this data type. See whether the
   585   // epoch of the current bias is still valid, meaning that the epoch
   586   // bits of the mark word are equal to the epoch bits of the
   587   // prototype header. (Note that the prototype header's epoch bits
   588   // only change at a safepoint.) If not, attempt to rebias the object
   589   // toward the current thread. Note that we must be absolutely sure
   590   // that the current epoch is invalid in order to do this because
   591   // otherwise the manipulations it performs on the mark word are
   592   // illegal.
   594   move(AT, markOopDesc::epoch_mask_in_place);
   595   andr(AT,swap_reg, AT);
   596   bne(AT, R0, try_rebias);
   597   delayed()->nop();
   598   // The epoch of the current bias is still valid but we know nothing
   599   // about the owner; it might be set or it might be clear. Try to
   600   // acquire the bias of the object using an atomic operation. If this
   601   // fails we will go in to the runtime to revoke the object's bias.
   602   // Note that we first construct the presumed unbiased header so we
   603   // don't accidentally blow away another thread's valid bias.
   605   ld_ptr(swap_reg, saved_mark_addr);
   607   move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
   608   andr(swap_reg, swap_reg, AT);
   610   if (need_tmp_reg) {
   611     push(tmp_reg);
   612   }
   613   get_thread(tmp_reg);
   614   orr(tmp_reg, tmp_reg, swap_reg);
   615   //if (os::is_MP()) {
   616   //  sync();
   617   //}
   618   cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
   619   if (need_tmp_reg) {
   620     pop(tmp_reg);
   621   }
   622   // If the biasing toward our thread failed, this means that
   623   // another thread succeeded in biasing it toward itself and we
   624   // need to revoke that bias. The revocation will occur in the
   625   // interpreter runtime in the slow case.
   626   if (PrintBiasedLockingStatistics) {
   627     Label L;
   628     bne(AT, R0, L);
   629     delayed()->nop();
   630     push(tmp_reg);
   631     push(A0);
   632     atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg);
   633     pop(A0);
   634     pop(tmp_reg);
   635     bind(L);
   636   }
   637   if (slow_case != NULL) {
   638     beq_far(AT, R0, *slow_case);
   639     delayed()->nop();
   640   }
   641   b(done);
   642   delayed()->nop();
   644   bind(try_rebias);
   645   // At this point we know the epoch has expired, meaning that the
   646   // current "bias owner", if any, is actually invalid. Under these
   647   // circumstances _only_, we are allowed to use the current header's
   648   // value as the comparison value when doing the cas to acquire the
   649   // bias in the current epoch. In other words, we allow transfer of
   650   // the bias from one thread to another directly in this situation.
   651   //
   652   // FIXME: due to a lack of registers we currently blow away the age
   653   // bits in this situation. Should attempt to preserve them.
   654   if (need_tmp_reg) {
   655     push(tmp_reg);
   656   }
   657   load_prototype_header(tmp_reg, obj_reg);
   658   get_thread(swap_reg);
   659   orr(tmp_reg, tmp_reg, swap_reg);
   660   ld_ptr(swap_reg, saved_mark_addr);
   662   //if (os::is_MP()) {
   663   //  sync();
   664   //}
   665   cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
   666   if (need_tmp_reg) {
   667     pop(tmp_reg);
   668   }
   669   // If the biasing toward our thread failed, then another thread
   670   // succeeded in biasing it toward itself and we need to revoke that
   671   // bias. The revocation will occur in the runtime in the slow case.
   672   if (PrintBiasedLockingStatistics) {
   673     Label L;
   674     bne(AT, R0, L);
   675     delayed()->nop();
   676     push(AT);
   677     push(tmp_reg);
   678     atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg);
   679     pop(tmp_reg);
   680     pop(AT);
   681     bind(L);
   682   }
   683   if (slow_case != NULL) {
   684     beq_far(AT, R0, *slow_case);
   685     delayed()->nop();
   686   }
   688   b(done);
   689   delayed()->nop();
   690   bind(try_revoke_bias);
   691   // The prototype mark in the klass doesn't have the bias bit set any
   692   // more, indicating that objects of this data type are not supposed
   693   // to be biased any more. We are going to try to reset the mark of
   694   // this object to the prototype value and fall through to the
   695   // CAS-based locking scheme. Note that if our CAS fails, it means
   696   // that another thread raced us for the privilege of revoking the
   697   // bias of this particular object, so it's okay to continue in the
   698   // normal locking code.
   699   //
   700   // FIXME: due to a lack of registers we currently blow away the age
   701   // bits in this situation. Should attempt to preserve them.
   702   ld_ptr(swap_reg, saved_mark_addr);
   704   if (need_tmp_reg) {
   705     push(tmp_reg);
   706   }
   707   load_prototype_header(tmp_reg, obj_reg);
   708   //if (os::is_MP()) {
   709   // lock();
   710   //}
   711   cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
   712   if (need_tmp_reg) {
   713     pop(tmp_reg);
   714   }
   715   // Fall through to the normal CAS-based lock, because no matter what
   716   // the result of the above CAS, some thread must have succeeded in
   717   // removing the bias bit from the object's header.
   718   if (PrintBiasedLockingStatistics) {
   719     Label L;
   720     bne(AT, R0, L);
   721     delayed()->nop();
   722     push(AT);
   723     push(tmp_reg);
   724     atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg);
   725     pop(tmp_reg);
   726     pop(AT);
   727     bind(L);
   728   }
   730   bind(cas_label);
   731   return null_check_offset;
   732 }
   734 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
   735   assert(UseBiasedLocking, "why call this otherwise?");
   737   // Check for biased locking unlock case, which is a no-op
   738   // Note: we do not have to check the thread ID for two reasons.
   739   // First, the interpreter checks for IllegalMonitorStateException at
   740   // a higher level. Second, if the bias was revoked while we held the
   741   // lock, the object could not be rebiased toward another thread, so
   742   // the bias bit would be clear.
   743 #ifdef _LP64
   744   ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
   745   andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
   746   daddi(AT, R0, markOopDesc::biased_lock_pattern);
   747 #else
   748   lw(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
   749   andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
   750   addi(AT, R0, markOopDesc::biased_lock_pattern);
   751 #endif
   753   beq(AT, temp_reg, done);
   754   delayed()->nop();
   755 }
   757 // the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf
   758 // this method will handle the stack problem, you need not to preserve the stack space for the argument now
   759 void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) {
   760   Label L, E;
   762   assert(number_of_arguments <= 4, "just check");
   764   andi(AT, SP, 0xf);
   765   beq(AT, R0, L);
   766   delayed()->nop();
   767   daddi(SP, SP, -8);
   768   call(entry_point, relocInfo::runtime_call_type);
   769   delayed()->nop();
   770   daddi(SP, SP, 8);
   771   b(E);
   772   delayed()->nop();
   774   bind(L);
   775   call(entry_point, relocInfo::runtime_call_type);
   776   delayed()->nop();
   777   bind(E);
   778 }
   781 void MacroAssembler::jmp(address entry) {
   782   patchable_set48(T9, (long)entry);
   783   jr(T9);
   784 }
   786 void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) {
   787   switch (rtype) {
   788     case relocInfo::runtime_call_type:
   789     case relocInfo::none:
   790       jmp(entry);
   791       break;
   792     default:
   793       {
   794       InstructionMark im(this);
   795       relocate(rtype);
   796       patchable_set48(T9, (long)entry);
   797       jr(T9);
   798       }
   799       break;
   800   }
   801 }
   803 void MacroAssembler::jmp_far(Label& L) {
   804   if (L.is_bound()) {
   805     address entry = target(L);
   806     assert(entry != NULL, "jmp most probably wrong");
   807     InstructionMark im(this);
   809     relocate(relocInfo::internal_word_type);
   810     patchable_set48(T9, (long)entry);
   811   } else {
   812     InstructionMark im(this);
   813     L.add_patch_at(code(), locator());
   815     relocate(relocInfo::internal_word_type);
   816     patchable_set48(T9, (long)pc());
   817   }
   819   jr(T9);
   820   delayed()->nop();
   821 }
   822 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
   823   int oop_index;
   824   if (obj) {
   825     oop_index = oop_recorder()->find_index(obj);
   826   } else {
   827     oop_index = oop_recorder()->allocate_metadata_index(obj);
   828   }
   829   relocate(metadata_Relocation::spec(oop_index));
   830   patchable_set48(AT, (long)obj);
   831   sd(AT, dst);
   832 }
   834 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
   835   int oop_index;
   836   if (obj) {
   837     oop_index = oop_recorder()->find_index(obj);
   838   } else {
   839     oop_index = oop_recorder()->allocate_metadata_index(obj);
   840   }
   841   relocate(metadata_Relocation::spec(oop_index));
   842   patchable_set48(dst, (long)obj);
   843 }
   845 void MacroAssembler::call(address entry) {
   846 // c/c++ code assume T9 is entry point, so we just always move entry to t9
   847 // maybe there is some more graceful method to handle this. FIXME
   848 // For more info, see class NativeCall.
   849 #ifndef _LP64
   850   move(T9, (int)entry);
   851 #else
   852   patchable_set48(T9, (long)entry);
   853 #endif
   854   jalr(T9);
   855 }
   857 void MacroAssembler::call(address entry, relocInfo::relocType rtype) {
   858   switch (rtype) {
   859     case relocInfo::runtime_call_type:
   860     case relocInfo::none:
   861       call(entry);
   862       break;
   863     default:
   864       {
   865   InstructionMark im(this);
   866   relocate(rtype);
   867   call(entry);
   868       }
   869       break;
   870   }
   871 }
   873 void MacroAssembler::call(address entry, RelocationHolder& rh)
   874 {
   875   switch (rh.type()) {
   876     case relocInfo::runtime_call_type:
   877     case relocInfo::none:
   878       call(entry);
   879       break;
   880     default:
   881       {
   882   InstructionMark im(this);
   883   relocate(rh);
   884   call(entry);
   885       }
   886       break;
   887   }
   888 }
   890 void MacroAssembler::ic_call(address entry) {
   891   RelocationHolder rh = virtual_call_Relocation::spec(pc());
   892   patchable_set48(IC_Klass, (long)Universe::non_oop_word());
   893   assert(entry != NULL, "call most probably wrong");
   894   InstructionMark im(this);
   895   relocate(rh);
   896   patchable_call(entry);
   897 }
   899 void MacroAssembler::c2bool(Register r) {
   900   Label L;
   901   Assembler::beq(r, R0, L);
   902   delayed()->nop();
   903   move(r, 1);
   904   bind(L);
   905 }
   907 #ifndef PRODUCT
   908 extern "C" void findpc(intptr_t x);
   909 #endif
   911 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
   912   // In order to get locks to work, we need to fake a in_VM state
   913   JavaThread* thread = JavaThread::current();
   914   JavaThreadState saved_state = thread->thread_state();
   915   thread->set_thread_state(_thread_in_vm);
   916   if (ShowMessageBoxOnError) {
   917     JavaThread* thread = JavaThread::current();
   918     JavaThreadState saved_state = thread->thread_state();
   919     thread->set_thread_state(_thread_in_vm);
   920     if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
   921       ttyLocker ttyl;
   922       BytecodeCounter::print();
   923     }
   924     // To see where a verify_oop failed, get $ebx+40/X for this frame.
   925     // This is the value of eip which points to where verify_oop will return.
   926     if (os::message_box(msg, "Execution stopped, print registers?")) {
   927       ttyLocker ttyl;
   928       tty->print_cr("eip = 0x%08x", eip);
   929 #ifndef PRODUCT
   930       tty->cr();
   931       findpc(eip);
   932       tty->cr();
   933 #endif
   934       tty->print_cr("rax, = 0x%08x", rax);
   935       tty->print_cr("rbx, = 0x%08x", rbx);
   936       tty->print_cr("rcx = 0x%08x", rcx);
   937       tty->print_cr("rdx = 0x%08x", rdx);
   938       tty->print_cr("rdi = 0x%08x", rdi);
   939       tty->print_cr("rsi = 0x%08x", rsi);
   940       tty->print_cr("rbp, = 0x%08x", rbp);
   941       tty->print_cr("rsp = 0x%08x", rsp);
   942       BREAKPOINT;
   943     }
   944   } else {
   945     ttyLocker ttyl;
   946     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
   947     assert(false, "DEBUG MESSAGE");
   948   }
   949   ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
   950 }
   952 void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) {
   953   if ( ShowMessageBoxOnError ) {
   954     JavaThreadState saved_state = JavaThread::current()->thread_state();
   955     JavaThread::current()->set_thread_state(_thread_in_vm);
   956     {
   957       // In order to get locks work, we need to fake a in_VM state
   958       ttyLocker ttyl;
   959       ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
   960       if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
   961   BytecodeCounter::print();
   962       }
   964       //      if (os::message_box(msg, "Execution stopped, print registers?"))
   965       //        regs->print(::tty);
   966     }
   967     ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
   968   }
   969   else
   970     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
   971 }
   974 void MacroAssembler::stop(const char* msg) {
   975   li(A0, (long)msg);
   976 #ifndef _LP64
   977   //reserver space for argument.
   978   addiu(SP, SP, - 1 * wordSize);
   979 #endif
   980   call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
   981   delayed()->nop();
   982 #ifndef _LP64
   983   //restore space for argument
   984   addiu(SP, SP, 1 * wordSize);
   985 #endif
   986   brk(17);
   987 }
   989 void MacroAssembler::warn(const char* msg) {
   990 #ifdef _LP64
   991   pushad();
   992   li(A0, (long)msg);
   993   push(S2);
   994   move(AT, -(StackAlignmentInBytes));
   995   move(S2, SP);     // use S2 as a sender SP holder
   996   andr(SP, SP, AT); // align stack as required by ABI
   997   call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
   998   delayed()->nop();
   999   move(SP, S2);     // use S2 as a sender SP holder
  1000   pop(S2);
  1001   popad();
  1002 #else
  1003   pushad();
  1004   addi(SP, SP, -4);
  1005   sw(A0, SP, -1 * wordSize);
  1006   li(A0, (long)msg);
  1007   addi(SP, SP, -1 * wordSize);
  1008   call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
  1009   delayed()->nop();
  1010   addi(SP, SP, 1 * wordSize);
  1011   lw(A0, SP, -1 * wordSize);
  1012   addi(SP, SP, 4);
  1013   popad();
  1014 #endif
  1017 void MacroAssembler::print_reg(Register reg) {
  1018 /*
  1019 char *s = getenv("PRINT_REG");
  1020 if (s == NULL)
  1021   return;
  1022 if (strcmp(s, "1") != 0)
  1023   return;
  1024 */
  1025   void * cur_pc = pc();
  1026   pushad();
  1027   NOT_LP64(push(FP);)
  1029   li(A0, (long)reg->name());
  1030   if (reg == SP)
  1031     addiu(A1, SP, wordSize * 23); //23 registers saved in pushad()
  1032   else if (reg == A0)
  1033     ld(A1, SP, wordSize * 19); //A0 has been modified by li(A0, (long)reg->name()). Ugly Code!
  1034   else
  1035     move(A1, reg);
  1036   li(A2, (long)cur_pc);
  1037   push(S2);
  1038   move(AT, -(StackAlignmentInBytes));
  1039   move(S2, SP);     // use S2 as a sender SP holder
  1040   andr(SP, SP, AT); // align stack as required by ABI
  1041   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_reg_with_pc),relocInfo::runtime_call_type);
  1042   delayed()->nop();
  1043   move(SP, S2);     // use S2 as a sender SP holder
  1044   pop(S2);
  1045   NOT_LP64(pop(FP);)
  1046   popad();
  1048 /*
  1049   pushad();
  1050 #ifdef _LP64
  1051   if (reg == SP)
  1052     addiu(A0, SP, wordSize * 23); //23 registers saved in pushad()
  1053   else
  1054     move(A0, reg);
  1055   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type);
  1056   delayed()->nop();
  1057 #else
  1058   push(FP);
  1059   move(A0, reg);
  1060   dsrl32(A1, reg, 0);
  1061   //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_int),relocInfo::runtime_call_type);
  1062   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type);
  1063   delayed()->nop();
  1064   pop(FP);
  1065 #endif
  1066   popad();
  1067   pushad();
  1068   NOT_LP64(push(FP);)
  1069   char b[50];
  1070   sprintf((char *)b, " pc: %p\n",cur_pc);
  1071   li(A0, (long)(char *)b);
  1072   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
  1073   delayed()->nop();
  1074   NOT_LP64(pop(FP);)
  1075   popad();
  1076 */
  1079 void MacroAssembler::print_reg(FloatRegister reg) {
  1080   void * cur_pc = pc();
  1081   pushad();
  1082   NOT_LP64(push(FP);)
  1083   li(A0, (long)reg->name());
  1084   push(S2);
  1085   move(AT, -(StackAlignmentInBytes));
  1086   move(S2, SP);     // use S2 as a sender SP holder
  1087   andr(SP, SP, AT); // align stack as required by ABI
  1088   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
  1089   delayed()->nop();
  1090   move(SP, S2);     // use S2 as a sender SP holder
  1091   pop(S2);
  1092   NOT_LP64(pop(FP);)
  1093   popad();
  1095   pushad();
  1096   NOT_LP64(push(FP);)
  1097 #if 1
  1098   move(FP, SP);
  1099   move(AT, -(StackAlignmentInBytes));
  1100   andr(SP , SP , AT);
  1101   mov_d(F12, reg);
  1102   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_double),relocInfo::runtime_call_type);
  1103   delayed()->nop();
  1104   move(SP, FP);
  1105 #else
  1106   mov_s(F12, reg);
  1107   //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_float),relocInfo::runtime_call_type);
  1108   //delayed()->nop();
  1109 #endif
  1110   NOT_LP64(pop(FP);)
  1111   popad();
  1113 #if 0
  1114   pushad();
  1115   NOT_LP64(push(FP);)
  1116   char* b = new char[50];
  1117   sprintf(b, " pc: %p\n", cur_pc);
  1118   li(A0, (long)b);
  1119   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
  1120   delayed()->nop();
  1121   NOT_LP64(pop(FP);)
  1122   popad();
  1123 #endif
  1126 void MacroAssembler::increment(Register reg, int imm) {
  1127   if (!imm) return;
  1128   if (is_simm16(imm)) {
  1129 #ifdef _LP64
  1130     daddiu(reg, reg, imm);
  1131 #else
  1132     addiu(reg, reg, imm);
  1133 #endif
  1134   } else {
  1135     move(AT, imm);
  1136 #ifdef _LP64
  1137     daddu(reg, reg, AT);
  1138 #else
  1139     addu(reg, reg, AT);
  1140 #endif
  1144 void MacroAssembler::decrement(Register reg, int imm) {
  1145   increment(reg, -imm);
  1149 void MacroAssembler::call_VM(Register oop_result,
  1150                              address entry_point,
  1151                              bool check_exceptions) {
  1152   call_VM_helper(oop_result, entry_point, 0, check_exceptions);
  1155 void MacroAssembler::call_VM(Register oop_result,
  1156                              address entry_point,
  1157                              Register arg_1,
  1158                              bool check_exceptions) {
  1159   if (arg_1!=A1) move(A1, arg_1);
  1160   call_VM_helper(oop_result, entry_point, 1, check_exceptions);
  1163 void MacroAssembler::call_VM(Register oop_result,
  1164                              address entry_point,
  1165                              Register arg_1,
  1166                              Register arg_2,
  1167                              bool check_exceptions) {
  1168   if (arg_1!=A1) move(A1, arg_1);
  1169   if (arg_2!=A2) move(A2, arg_2);
  1170   assert(arg_2 != A1, "smashed argument");
  1171   call_VM_helper(oop_result, entry_point, 2, check_exceptions);
  1174 void MacroAssembler::call_VM(Register oop_result,
  1175                              address entry_point,
  1176                              Register arg_1,
  1177                              Register arg_2,
  1178                              Register arg_3,
  1179                              bool check_exceptions) {
  1180   if (arg_1!=A1) move(A1, arg_1);
  1181   if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
  1182   if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
  1183   call_VM_helper(oop_result, entry_point, 3, check_exceptions);
  1186 void MacroAssembler::call_VM(Register oop_result,
  1187                              Register last_java_sp,
  1188                              address entry_point,
  1189                              int number_of_arguments,
  1190                              bool check_exceptions) {
  1191   call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions);
  1194 void MacroAssembler::call_VM(Register oop_result,
  1195                              Register last_java_sp,
  1196                              address entry_point,
  1197                              Register arg_1,
  1198                              bool check_exceptions) {
  1199   if (arg_1 != A1) move(A1, arg_1);
  1200   call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
  1203 void MacroAssembler::call_VM(Register oop_result,
  1204                              Register last_java_sp,
  1205                              address entry_point,
  1206                              Register arg_1,
  1207                              Register arg_2,
  1208                              bool check_exceptions) {
  1209   if (arg_1 != A1) move(A1, arg_1);
  1210   if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
  1211   call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
  1214 void MacroAssembler::call_VM(Register oop_result,
  1215                              Register last_java_sp,
  1216                              address entry_point,
  1217                              Register arg_1,
  1218                              Register arg_2,
  1219                              Register arg_3,
  1220                              bool check_exceptions) {
  1221   if (arg_1 != A1) move(A1, arg_1);
  1222   if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
  1223   if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
  1224   call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
  1227 void MacroAssembler::call_VM_base(Register oop_result,
  1228                                   Register java_thread,
  1229                                   Register last_java_sp,
  1230                                   address  entry_point,
  1231                                   int      number_of_arguments,
  1232                                   bool     check_exceptions) {
  1234   address before_call_pc;
  1235   // determine java_thread register
  1236   if (!java_thread->is_valid()) {
  1237 #ifndef OPT_THREAD
  1238     java_thread = T2;
  1239     get_thread(java_thread);
  1240 #else
  1241     java_thread = TREG;
  1242 #endif
  1244   // determine last_java_sp register
  1245   if (!last_java_sp->is_valid()) {
  1246     last_java_sp = SP;
  1248   // debugging support
  1249   assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
  1250   assert(number_of_arguments <= 4   , "cannot have negative number of arguments");
  1251   assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
  1252   assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
  1254   assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save ebp");
  1256   // set last Java frame before call
  1257   before_call_pc = (address)pc();
  1258   set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc);
  1260   // do the call
  1261   move(A0, java_thread);
  1262   call(entry_point, relocInfo::runtime_call_type);
  1263   delayed()->nop();
  1265   // restore the thread (cannot use the pushed argument since arguments
  1266   // may be overwritten by C code generated by an optimizing compiler);
  1267   // however can use the register value directly if it is callee saved.
  1268 #ifndef OPT_THREAD
  1269   get_thread(java_thread);
  1270 #else
  1271 #ifdef ASSERT
  1273     Label L;
  1274     get_thread(AT);
  1275     beq(java_thread, AT, L);
  1276     delayed()->nop();
  1277     stop("MacroAssembler::call_VM_base: TREG not callee saved?");
  1278     bind(L);
  1280 #endif
  1281 #endif
  1283   // discard thread and arguments
  1284   ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
  1285   // reset last Java frame
  1286   reset_last_Java_frame(java_thread, false);
  1288   check_and_handle_popframe(java_thread);
  1289   check_and_handle_earlyret(java_thread);
  1290   if (check_exceptions) {
  1291     // check for pending exceptions (java_thread is set upon return)
  1292     Label L;
  1293 #ifdef _LP64
  1294     ld(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
  1295 #else
  1296     lw(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
  1297 #endif
  1298     beq(AT, R0, L);
  1299     delayed()->nop();
  1300     li(AT, before_call_pc);
  1301     push(AT);
  1302     jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
  1303     delayed()->nop();
  1304     bind(L);
  1307   // get oop result if there is one and reset the value in the thread
  1308   if (oop_result->is_valid()) {
  1309 #ifdef _LP64
  1310     ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
  1311     sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
  1312 #else
  1313     lw(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
  1314     sw(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
  1315 #endif
  1316     verify_oop(oop_result);
  1320 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
  1322   move(V0, SP);
  1323   //we also reserve space for java_thread here
  1324 #ifndef _LP64
  1325   daddi(SP, SP, (1 + number_of_arguments) * (- wordSize));
  1326 #endif
  1327   move(AT, -(StackAlignmentInBytes));
  1328   andr(SP, SP, AT);
  1329   call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions);
  1333 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
  1334   call_VM_leaf_base(entry_point, number_of_arguments);
  1337 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
  1338   if (arg_0 != A0) move(A0, arg_0);
  1339   call_VM_leaf(entry_point, 1);
  1342 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
  1343   if (arg_0 != A0) move(A0, arg_0);
  1344   if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
  1345   call_VM_leaf(entry_point, 2);
  1348 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
  1349   if (arg_0 != A0) move(A0, arg_0);
  1350   if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
  1351   if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument");
  1352   call_VM_leaf(entry_point, 3);
  1354 void MacroAssembler::super_call_VM_leaf(address entry_point) {
  1355   MacroAssembler::call_VM_leaf_base(entry_point, 0);
  1359 void MacroAssembler::super_call_VM_leaf(address entry_point,
  1360                                                    Register arg_1) {
  1361   if (arg_1 != A0) move(A0, arg_1);
  1362   MacroAssembler::call_VM_leaf_base(entry_point, 1);
  1366 void MacroAssembler::super_call_VM_leaf(address entry_point,
  1367                                                    Register arg_1,
  1368                                                    Register arg_2) {
  1369   if (arg_1 != A0) move(A0, arg_1);
  1370   if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
  1371   MacroAssembler::call_VM_leaf_base(entry_point, 2);
  1373 void MacroAssembler::super_call_VM_leaf(address entry_point,
  1374                                                    Register arg_1,
  1375                                                    Register arg_2,
  1376                                                    Register arg_3) {
  1377   if (arg_1 != A0) move(A0, arg_1);
  1378   if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
  1379   if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument");
  1380   MacroAssembler::call_VM_leaf_base(entry_point, 3);
  1383 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
  1386 void MacroAssembler::check_and_handle_popframe(Register java_thread) {
  1389 void MacroAssembler::null_check(Register reg, int offset) {
  1390   if (needs_explicit_null_check(offset)) {
  1391     // provoke OS NULL exception if reg = NULL by
  1392     // accessing M[reg] w/o changing any (non-CC) registers
  1393     // NOTE: cmpl is plenty here to provoke a segv
  1394     lw(AT, reg, 0);
  1395     // Note: should probably use testl(rax, Address(reg, 0));
  1396     //       may be shorter code (however, this version of
  1397     //       testl needs to be implemented first)
  1398   } else {
  1399     // nothing to do, (later) access of M[reg + offset]
  1400     // will provoke OS NULL exception if reg = NULL
  1404 void MacroAssembler::enter() {
  1405   push2(RA, FP);
  1406   move(FP, SP);
  1409 void MacroAssembler::leave() {
  1410 #ifndef _LP64
  1411   //move(SP, FP);
  1412   //pop2(FP, RA);
  1413   addi(SP, FP, 2 * wordSize);
  1414   lw(RA, SP, - 1 * wordSize);
  1415   lw(FP, SP, - 2 * wordSize);
  1416 #else
  1417   daddi(SP, FP, 2 * wordSize);
  1418   ld(RA, SP, - 1 * wordSize);
  1419   ld(FP, SP, - 2 * wordSize);
  1420 #endif
  1422 /*
  1423 void MacroAssembler::os_breakpoint() {
  1424   // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
  1425   // (e.g., MSVC can't call ps() otherwise)
  1426   call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
  1428 */
  1429 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) {
  1430   // determine java_thread register
  1431   if (!java_thread->is_valid()) {
  1432 #ifndef OPT_THREAD
  1433     java_thread = T1;
  1434     get_thread(java_thread);
  1435 #else
  1436     java_thread = TREG;
  1437 #endif
  1439   // we must set sp to zero to clear frame
  1440   st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
  1441   // must clear fp, so that compiled frames are not confused; it is possible
  1442   // that we need it only for debugging
  1443   if(clear_fp) {
  1444     st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
  1447   // Always clear the pc because it could have been set by make_walkable()
  1448   st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
  1451 void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
  1452   Register thread = TREG;
  1453 #ifndef OPT_THREAD
  1454   get_thread(thread);
  1455 #endif
  1456   // we must set sp to zero to clear frame
  1457   sd(R0, Address(thread, JavaThread::last_Java_sp_offset()));
  1458   // must clear fp, so that compiled frames are not confused; it is
  1459   // possible that we need it only for debugging
  1460   if (clear_fp) {
  1461     sd(R0, Address(thread, JavaThread::last_Java_fp_offset()));
  1464   // Always clear the pc because it could have been set by make_walkable()
  1465   sd(R0, Address(thread, JavaThread::last_Java_pc_offset()));
  1468 // Write serialization page so VM thread can do a pseudo remote membar.
  1469 // We use the current thread pointer to calculate a thread specific
  1470 // offset to write to within the page. This minimizes bus traffic
  1471 // due to cache line collision.
  1472 void MacroAssembler::serialize_memory(Register thread, Register tmp) {
  1473   move(tmp, thread);
  1474   srl(tmp, tmp,os::get_serialize_page_shift_count());
  1475   move(AT, (os::vm_page_size() - sizeof(int)));
  1476   andr(tmp, tmp,AT);
  1477   sw(tmp,Address(tmp, (intptr_t)os::get_memory_serialize_page()));
  1480 // Calls to C land
  1481 //
  1482 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded
  1483 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
  1484 // has to be reset to 0. This is required to allow proper stack traversal.
  1485 void MacroAssembler::set_last_Java_frame(Register java_thread,
  1486                                          Register last_java_sp,
  1487                                          Register last_java_fp,
  1488                                          address  last_java_pc) {
  1489   // determine java_thread register
  1490   if (!java_thread->is_valid()) {
  1491 #ifndef OPT_THREAD
  1492     java_thread = T2;
  1493     get_thread(java_thread);
  1494 #else
  1495     java_thread = TREG;
  1496 #endif
  1498   // determine last_java_sp register
  1499   if (!last_java_sp->is_valid()) {
  1500     last_java_sp = SP;
  1503   // last_java_fp is optional
  1504   if (last_java_fp->is_valid()) {
  1505     st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
  1508   // last_java_pc is optional
  1509   if (last_java_pc != NULL) {
  1510     relocate(relocInfo::internal_word_type);
  1511     patchable_set48(AT, (long)last_java_pc);
  1512     st_ptr(AT, java_thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
  1514   st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
  1517 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
  1518                                          Register last_java_fp,
  1519                                          address  last_java_pc) {
  1520   // determine last_java_sp register
  1521   if (!last_java_sp->is_valid()) {
  1522     last_java_sp = SP;
  1525   Register thread = TREG;
  1526 #ifndef OPT_THREAD
  1527   get_thread(thread);
  1528 #endif
  1529   // last_java_fp is optional
  1530   if (last_java_fp->is_valid()) {
  1531     sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset()));
  1534   // last_java_pc is optional
  1535   if (last_java_pc != NULL) {
  1536     relocate(relocInfo::internal_word_type);
  1537     patchable_set48(AT, (long)last_java_pc);
  1538     st_ptr(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
  1541   sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset()));
  1544 //////////////////////////////////////////////////////////////////////////////////
  1545 #if INCLUDE_ALL_GCS
  1547 void MacroAssembler::g1_write_barrier_pre(Register obj,
  1548                                           Register pre_val,
  1549                                           Register thread,
  1550                                           Register tmp,
  1551                                           bool tosca_live,
  1552                                           bool expand_call) {
  1554   // If expand_call is true then we expand the call_VM_leaf macro
  1555   // directly to skip generating the check by
  1556   // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
  1558 #ifdef _LP64
  1559   assert(thread == TREG, "must be");
  1560 #endif // _LP64
  1562   Label done;
  1563   Label runtime;
  1565   assert(pre_val != noreg, "check this code");
  1567   if (obj != noreg) {
  1568     assert_different_registers(obj, pre_val, tmp);
  1569     assert(pre_val != V0, "check this code");
  1572   Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
  1573                                        PtrQueue::byte_offset_of_active()));
  1574   Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
  1575                                        PtrQueue::byte_offset_of_index()));
  1576   Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
  1577                                        PtrQueue::byte_offset_of_buf()));
  1580   // Is marking active?
  1581   if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
  1582     lw(AT, in_progress);
  1583   } else {
  1584     assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
  1585     lb(AT, in_progress);
  1587   beq(AT, R0, done);
  1588   delayed()->nop();
  1590   // Do we need to load the previous value?
  1591   if (obj != noreg) {
  1592     load_heap_oop(pre_val, Address(obj, 0));
  1595   // Is the previous value null?
  1596   beq(pre_val, R0, done);
  1597   delayed()->nop();
  1599   // Can we store original value in the thread's buffer?
  1600   // Is index == 0?
  1601   // (The index field is typed as size_t.)
  1603   ld(tmp, index);
  1604   beq(tmp, R0, runtime);
  1605   delayed()->nop();
  1607   daddiu(tmp, tmp, -1 * wordSize);
  1608   sd(tmp, index);
  1609   ld(AT, buffer);
  1610   daddu(tmp, tmp, AT);
  1612   // Record the previous value
  1613   sd(pre_val, tmp, 0);
  1614   beq(R0, R0, done);
  1615   delayed()->nop();
  1617   bind(runtime);
  1618   // save the live input values
  1619   if (tosca_live) push(V0);
  1621   if (obj != noreg && obj != V0) push(obj);
  1623   if (pre_val != V0) push(pre_val);
  1625   // Calling the runtime using the regular call_VM_leaf mechanism generates
  1626   // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
  1627   // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
  1628   //
  1629   // If we care generating the pre-barrier without a frame (e.g. in the
  1630   // intrinsified Reference.get() routine) then ebp might be pointing to
  1631   // the caller frame and so this check will most likely fail at runtime.
  1632   //
  1633   // Expanding the call directly bypasses the generation of the check.
  1634   // So when we do not have have a full interpreter frame on the stack
  1635   // expand_call should be passed true.
  1637   NOT_LP64( push(thread); )
  1639   if (expand_call) {
  1640     LP64_ONLY( assert(pre_val != A1, "smashed arg"); )
  1641     if (thread != A1) move(A1, thread);
  1642     if (pre_val != A0) move(A0, pre_val);
  1643     MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
  1644   } else {
  1645     call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
  1648   NOT_LP64( pop(thread); )
  1650   // save the live input values
  1651   if (pre_val != V0)
  1652     pop(pre_val);
  1654   if (obj != noreg && obj != V0)
  1655     pop(obj);
  1657   if(tosca_live) pop(V0);
  1659   bind(done);
  1662 void MacroAssembler::g1_write_barrier_post(Register store_addr,
  1663                                            Register new_val,
  1664                                            Register thread,
  1665                                            Register tmp,
  1666                                            Register tmp2) {
  1667   assert(tmp  != AT, "must be");
  1668   assert(tmp2 != AT, "must be");
  1669 #ifdef _LP64
  1670   assert(thread == TREG, "must be");
  1671 #endif // _LP64
  1673   Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
  1674                                        PtrQueue::byte_offset_of_index()));
  1675   Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
  1676                                        PtrQueue::byte_offset_of_buf()));
  1678   BarrierSet* bs = Universe::heap()->barrier_set();
  1679   CardTableModRefBS* ct = (CardTableModRefBS*)bs;
  1680   assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
  1682   Label done;
  1683   Label runtime;
  1685   // Does store cross heap regions?
  1686   xorr(AT, store_addr, new_val);
  1687   dsrl(AT, AT, HeapRegion::LogOfHRGrainBytes);
  1688   beq(AT, R0, done);
  1689   delayed()->nop();
  1692   // crosses regions, storing NULL?
  1693   beq(new_val, R0, done);
  1694   delayed()->nop();
  1696   // storing region crossing non-NULL, is card already dirty?
  1697   const Register card_addr = tmp;
  1698   const Register cardtable = tmp2;
  1700   move(card_addr, store_addr);
  1701   dsrl(card_addr, card_addr, CardTableModRefBS::card_shift);
  1702   // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
  1703   // a valid address and therefore is not properly handled by the relocation code.
  1704   set64(cardtable, (intptr_t)ct->byte_map_base);
  1705   daddu(card_addr, card_addr, cardtable);
  1707   lb(AT, card_addr, 0);
  1708   daddiu(AT, AT, -1 * (int)G1SATBCardTableModRefBS::g1_young_card_val());
  1709   beq(AT, R0, done);
  1710   delayed()->nop();
  1712   sync();
  1713   lb(AT, card_addr, 0);
  1714   daddiu(AT, AT, -1 * (int)(int)CardTableModRefBS::dirty_card_val());
  1715   beq(AT, R0, done);
  1716   delayed()->nop();
  1719   // storing a region crossing, non-NULL oop, card is clean.
  1720   // dirty card and log.
  1721   move(AT, (int)CardTableModRefBS::dirty_card_val());
  1722   sb(AT, card_addr, 0);
  1724   lw(AT, queue_index);
  1725   beq(AT, R0, runtime);
  1726   delayed()->nop();
  1727   daddiu(AT, AT, -1 * wordSize);
  1728   sw(AT, queue_index);
  1729   ld(tmp2, buffer);
  1730 #ifdef _LP64
  1731   ld(AT, queue_index);
  1732   daddu(tmp2, tmp2, AT);
  1733   sd(card_addr, tmp2, 0);
  1734 #else
  1735   lw(AT, queue_index);
  1736   addu32(tmp2, tmp2, AT);
  1737   sw(card_addr, tmp2, 0);
  1738 #endif
  1739   beq(R0, R0, done);
  1740   delayed()->nop();
  1742   bind(runtime);
  1743   // save the live input values
  1744   push(store_addr);
  1745   push(new_val);
  1746 #ifdef _LP64
  1747   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, TREG);
  1748 #else
  1749   push(thread);
  1750   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
  1751   pop(thread);
  1752 #endif
  1753   pop(new_val);
  1754   pop(store_addr);
  1756   bind(done);
  1759 #endif // INCLUDE_ALL_GCS
  1760 //////////////////////////////////////////////////////////////////////////////////
  1763 void MacroAssembler::store_check(Register obj) {
  1764   // Does a store check for the oop in register obj. The content of
  1765   // register obj is destroyed afterwards.
  1766   store_check_part_1(obj);
  1767   store_check_part_2(obj);
  1770 void MacroAssembler::store_check(Register obj, Address dst) {
  1771   store_check(obj);
  1775 // split the store check operation so that other instructions can be scheduled inbetween
  1776 void MacroAssembler::store_check_part_1(Register obj) {
  1777   BarrierSet* bs = Universe::heap()->barrier_set();
  1778   assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
  1779 #ifdef _LP64
  1780   dsrl(obj, obj, CardTableModRefBS::card_shift);
  1781 #else
  1782   shr(obj, CardTableModRefBS::card_shift);
  1783 #endif
  1786 void MacroAssembler::store_check_part_2(Register obj) {
  1787   BarrierSet* bs = Universe::heap()->barrier_set();
  1788   assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
  1789   CardTableModRefBS* ct = (CardTableModRefBS*)bs;
  1790   assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
  1792   set64(AT, (long)ct->byte_map_base);
  1793 #ifdef _LP64
  1794   dadd(AT, AT, obj);
  1795 #else
  1796   add(AT, AT, obj);
  1797 #endif
  1798   if (UseConcMarkSweepGC) sync();
  1799   sb(R0, AT, 0);
  1802 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
  1803 void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
  1804                                    Register t1, Register t2, Label& slow_case) {
  1805   assert_different_registers(obj, var_size_in_bytes, t1, t2, AT);
  1807   Register end = t2;
  1808 #ifndef OPT_THREAD
  1809   Register thread = t1;
  1810   get_thread(thread);
  1811 #else
  1812   Register thread = TREG;
  1813 #endif
  1814   verify_tlab(t1, t2);//blows t1&t2
  1816   ld_ptr(obj, thread, in_bytes(JavaThread::tlab_top_offset()));
  1818   if (var_size_in_bytes == NOREG) {
  1819     // i dont think we need move con_size_in_bytes to a register first.
  1820     assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first");
  1821     addi(end, obj, con_size_in_bytes);
  1822   } else {
  1823     add(end, obj, var_size_in_bytes);
  1826   ld_ptr(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
  1827   sltu(AT, AT, end);
  1828   bne_far(AT, R0, slow_case);
  1829   delayed()->nop();
  1832   // update the tlab top pointer
  1833   st_ptr(end, thread, in_bytes(JavaThread::tlab_top_offset()));
  1835   verify_tlab(t1, t2);
  1838 // Defines obj, preserves var_size_in_bytes
  1839 void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
  1840                                    Register t1, Register t2, Label& slow_case) {
  1841   assert_different_registers(obj, var_size_in_bytes, t1, AT);
  1842   if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
  1843     // No allocation in the shared eden.
  1844     b_far(slow_case);
  1845     delayed()->nop();
  1846   } else {
  1848 #ifndef _LP64
  1849     Address heap_top(t1, Assembler::split_low((intptr_t)Universe::heap()->top_addr()));
  1850     lui(t1, split_high((intptr_t)Universe::heap()->top_addr()));
  1851 #else
  1852     Address heap_top(t1);
  1853     li(t1, (long)Universe::heap()->top_addr());
  1854 #endif
  1855     ld_ptr(obj, heap_top);
  1857     Register end = t2;
  1858     Label retry;
  1860     bind(retry);
  1861     if (var_size_in_bytes == NOREG) {
  1862     // i dont think we need move con_size_in_bytes to a register first.
  1863       assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first");
  1864       addi(end, obj, con_size_in_bytes);
  1865     } else {
  1866       add(end, obj, var_size_in_bytes);
  1868     // if end < obj then we wrapped around => object too long => slow case
  1869     sltu(AT, end, obj);
  1870     bne_far(AT, R0, slow_case);
  1871     delayed()->nop();
  1873     li(AT, (long)Universe::heap()->end_addr());
  1874     ld_ptr(AT, AT, 0);
  1875     sltu(AT, AT, end);
  1876     bne_far(AT, R0, slow_case);
  1877     delayed()->nop();
  1878     // Compare obj with the top addr, and if still equal, store the new top addr in
  1879     // end at the address of the top addr pointer. Sets ZF if was equal, and clears
  1880     // it otherwise. Use lock prefix for atomicity on MPs.
  1881     //if (os::is_MP()) {
  1882     //  sync();
  1883     //}
  1885     // if someone beat us on the allocation, try again, otherwise continue
  1886     cmpxchg(end, heap_top, obj);
  1887     beq_far(AT, R0, retry);
  1888     delayed()->nop();
  1892 // C2 doesn't invoke this one.
  1893 void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) {
  1894   Register top = T0;
  1895   Register t1  = T1;
  1896   Register t2  = T9;
  1897   Register t3  = T3;
  1898   Register thread_reg = T8;
  1899   assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ T2, A4);
  1900   Label do_refill, discard_tlab;
  1902   if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
  1903     // No allocation in the shared eden.
  1904     b(slow_case);
  1905     delayed()->nop();
  1908   get_thread(thread_reg);
  1910   ld_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
  1911   ld_ptr(t1,  thread_reg, in_bytes(JavaThread::tlab_end_offset()));
  1913   // calculate amount of free space
  1914   sub(t1, t1, top);
  1915   shr(t1, LogHeapWordSize);
  1917   // Retain tlab and allocate object in shared space if
  1918   // the amount free in the tlab is too large to discard.
  1919   ld_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
  1920   slt(AT, t2, t1);
  1921   beq(AT, R0, discard_tlab);
  1922   delayed()->nop();
  1924   // Retain
  1925 #ifndef _LP64
  1926   move(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
  1927 #else
  1928   li(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
  1929 #endif
  1930   add(t2, t2, AT);
  1931   st_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
  1933   if (TLABStats) {
  1934     // increment number of slow_allocations
  1935     lw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
  1936     addiu(AT, AT, 1);
  1937     sw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
  1939   b(try_eden);
  1940   delayed()->nop();
  1942   bind(discard_tlab);
  1943   if (TLABStats) {
  1944     // increment number of refills
  1945     lw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
  1946     addi(AT, AT, 1);
  1947     sw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
  1948     // accumulate wastage -- t1 is amount free in tlab
  1949     lw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
  1950     add(AT, AT, t1);
  1951     sw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
  1954   // if tlab is currently allocated (top or end != null) then
  1955   // fill [top, end + alignment_reserve) with array object
  1956   beq(top, R0, do_refill);
  1957   delayed()->nop();
  1959   // set up the mark word
  1960   li(AT, (long)markOopDesc::prototype()->copy_set_hash(0x2));
  1961   st_ptr(AT, top, oopDesc::mark_offset_in_bytes());
  1963   // set the length to the remaining space
  1964   addi(t1, t1, - typeArrayOopDesc::header_size(T_INT));
  1965   addi(t1, t1, ThreadLocalAllocBuffer::alignment_reserve());
  1966   shl(t1, log2_intptr(HeapWordSize/sizeof(jint)));
  1967   sw(t1, top, arrayOopDesc::length_offset_in_bytes());
  1969   // set klass to intArrayKlass
  1970 #ifndef _LP64
  1971   lui(AT, split_high((intptr_t)Universe::intArrayKlassObj_addr()));
  1972   lw(t1, AT, split_low((intptr_t)Universe::intArrayKlassObj_addr()));
  1973 #else
  1974   li(AT, (intptr_t)Universe::intArrayKlassObj_addr());
  1975   ld_ptr(t1, AT, 0);
  1976 #endif
  1977   //st_ptr(t1, top, oopDesc::klass_offset_in_bytes());
  1978   store_klass(top, t1);
  1980   ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_start_offset()));
  1981   subu(t1, top, t1);
  1982   incr_allocated_bytes(thread_reg, t1, 0);
  1984   // refill the tlab with an eden allocation
  1985   bind(do_refill);
  1986   ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
  1987   shl(t1, LogHeapWordSize);
  1988   // add object_size ??
  1989   eden_allocate(top, t1, 0, t2, t3, slow_case);
  1991   // Check that t1 was preserved in eden_allocate.
  1992 #ifdef ASSERT
  1993   if (UseTLAB) {
  1994     Label ok;
  1995     assert_different_registers(thread_reg, t1);
  1996     ld_ptr(AT, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
  1997     shl(AT, LogHeapWordSize);
  1998     beq(AT, t1, ok);
  1999     delayed()->nop();
  2000     stop("assert(t1 != tlab size)");
  2001     should_not_reach_here();
  2003     bind(ok);
  2005 #endif
  2006   st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_start_offset()));
  2007   st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
  2008   add(top, top, t1);
  2009   addi(top, top, - ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
  2010   st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
  2011   verify_tlab(t1, t2);
  2012   b(retry);
  2013   delayed()->nop();
  2016 void MacroAssembler::incr_allocated_bytes(Register thread,
  2017                                           Register var_size_in_bytes,
  2018                                           int con_size_in_bytes,
  2019                                           Register t1) {
  2020   if (!thread->is_valid()) {
  2021 #ifndef OPT_THREAD
  2022     assert(t1->is_valid(), "need temp reg");
  2023     thread = t1;
  2024     get_thread(thread);
  2025 #else
  2026     thread = TREG;
  2027 #endif
  2030   ld_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset()));
  2031   if (var_size_in_bytes->is_valid()) {
  2032     addu(AT, AT, var_size_in_bytes);
  2033   } else {
  2034     addiu(AT, AT, con_size_in_bytes);
  2036   st_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset()));
  2039 static const double     pi_4 =  0.7853981633974483;
  2041 // the x86 version is to clumsy, i dont think we need that fuss. maybe i'm wrong, FIXME
  2042 // must get argument(a double) in F12/F13
  2043 //void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) {
  2044 //We need to preseve the register which maybe modified during the Call
  2045 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
  2046 //save all modified register here
  2047 //FIXME, in the disassembly of tirgfunc, only used V0,V1,T9, SP,RA,so we ony save V0,V1,T9
  2048   pushad();
  2049 //we should preserve the stack space before we call
  2050   addi(SP, SP, -wordSize * 2);
  2051         switch (trig){
  2052     case 's' :
  2053                   call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type );
  2054       delayed()->nop();
  2055       break;
  2056     case 'c':
  2057       call( CAST_FROM_FN_PTR(address, SharedRuntime::dcos), relocInfo::runtime_call_type );
  2058       delayed()->nop();
  2059       break;
  2060     case 't':
  2061       call( CAST_FROM_FN_PTR(address, SharedRuntime::dtan), relocInfo::runtime_call_type );
  2062       delayed()->nop();
  2063       break;
  2064     default:assert (false, "bad intrinsic");
  2065     break;
  2069   addi(SP, SP, wordSize * 2);
  2070   popad();
  2073 #ifdef _LP64
  2074 void MacroAssembler::li(Register rd, long imm) {
  2075   if (imm <= max_jint && imm >= min_jint) {
  2076     li32(rd, (int)imm);
  2077   } else if (julong(imm) <= 0xFFFFFFFF) {
  2078     assert_not_delayed();
  2079     // lui sign-extends, so we can't use that.
  2080     ori(rd, R0, julong(imm) >> 16);
  2081     dsll(rd, rd, 16);
  2082     ori(rd, rd, split_low(imm));
  2083   } else if ((imm > 0) && is_simm16(imm >> 32)) {
  2084     /* A 48-bit address */
  2085     li48(rd, imm);
  2086   } else {
  2087     li64(rd, imm);
  2090 #else
  2091 void MacroAssembler::li(Register rd, long imm) {
  2092   li32(rd, (int)imm);
  2094 #endif
  2096 void MacroAssembler::li32(Register reg, int imm) {
  2097   if (is_simm16(imm)) {
  2098     /* for imm < 0, we should use addi instead of addiu.
  2100      *  java.lang.StringCoding$StringDecoder.decode(jobject, jint, jint)
  2102      *  78 move [int:-1|I] [a0|I]
  2103      *    : daddi a0, zero, 0xffffffff  (correct)
  2104      *    : daddiu a0, zero, 0xffffffff (incorrect)
  2105      */
  2106     if (imm >= 0)
  2107       addiu(reg, R0, imm);
  2108     else
  2109       addi(reg, R0, imm);
  2110   } else {
  2111     lui(reg, split_low(imm >> 16));
  2112     if (split_low(imm))
  2113       ori(reg, reg, split_low(imm));
  2117 #ifdef _LP64
  2118 void MacroAssembler::set64(Register d, jlong value) {
  2119   assert_not_delayed();
  2121   int hi = (int)(value >> 32);
  2122   int lo = (int)(value & ~0);
  2124   if (value == lo) {  // 32-bit integer
  2125     if (is_simm16(value)) {
  2126       daddiu(d, R0, value);
  2127     } else {
  2128       lui(d, split_low(value >> 16));
  2129       if (split_low(value)) {
  2130         ori(d, d, split_low(value));
  2133   } else if (hi == 0) {  // hardware zero-extends to upper 32
  2134       ori(d, R0, julong(value) >> 16);
  2135       dsll(d, d, 16);
  2136       if (split_low(value)) {
  2137         ori(d, d, split_low(value));
  2139   } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
  2140     // 4 insts
  2141     li48(d, value);
  2142   } else {  // li64
  2143     // 6 insts
  2144     li64(d, value);
  2149 int MacroAssembler::insts_for_set64(jlong value) {
  2150   int hi = (int)(value >> 32);
  2151   int lo = (int)(value & ~0);
  2153   int count = 0;
  2155   if (value == lo) {  // 32-bit integer
  2156     if (is_simm16(value)) {
  2157       //daddiu(d, R0, value);
  2158       count++;
  2159     } else {
  2160       //lui(d, split_low(value >> 16));
  2161       count++;
  2162       if (split_low(value)) {
  2163         //ori(d, d, split_low(value));
  2164         count++;
  2167   } else if (hi == 0) {  // hardware zero-extends to upper 32
  2168       //ori(d, R0, julong(value) >> 16);
  2169       //dsll(d, d, 16);
  2170       count += 2;
  2171       if (split_low(value)) {
  2172         //ori(d, d, split_low(value));
  2173         count++;
  2175   } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
  2176     // 4 insts
  2177     //li48(d, value);
  2178     count += 4;
  2179   } else {  // li64
  2180     // 6 insts
  2181     //li64(d, value);
  2182     count += 6;
  2185   return count;
  2188 void MacroAssembler::patchable_set48(Register d, jlong value) {
  2189   assert_not_delayed();
  2191   int hi = (int)(value >> 32);
  2192   int lo = (int)(value & ~0);
  2194   int count = 0;
  2196   if (value == lo) {  // 32-bit integer
  2197     if (is_simm16(value)) {
  2198       daddiu(d, R0, value);
  2199       count += 1;
  2200     } else {
  2201       lui(d, split_low(value >> 16));
  2202       count += 1;
  2203       if (split_low(value)) {
  2204         ori(d, d, split_low(value));
  2205         count += 1;
  2208   } else if (hi == 0) {  // hardware zero-extends to upper 32
  2209       ori(d, R0, julong(value) >> 16);
  2210       dsll(d, d, 16);
  2211       count += 2;
  2212       if (split_low(value)) {
  2213         ori(d, d, split_low(value));
  2214         count += 1;
  2216   } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
  2217     // 4 insts
  2218     li48(d, value);
  2219     count += 4;
  2220   } else {  // li64
  2221     tty->print_cr("value = 0x%x", value);
  2222     guarantee(false, "Not supported yet !");
  2225   for (count; count < 4; count++) {
  2226     nop();
  2230 void MacroAssembler::patchable_set32(Register d, jlong value) {
  2231   assert_not_delayed();
  2233   int hi = (int)(value >> 32);
  2234   int lo = (int)(value & ~0);
  2236   int count = 0;
  2238   if (value == lo) {  // 32-bit integer
  2239     if (is_simm16(value)) {
  2240       daddiu(d, R0, value);
  2241       count += 1;
  2242     } else {
  2243       lui(d, split_low(value >> 16));
  2244       count += 1;
  2245       if (split_low(value)) {
  2246         ori(d, d, split_low(value));
  2247         count += 1;
  2250   } else if (hi == 0) {  // hardware zero-extends to upper 32
  2251       ori(d, R0, julong(value) >> 16);
  2252       dsll(d, d, 16);
  2253       count += 2;
  2254       if (split_low(value)) {
  2255         ori(d, d, split_low(value));
  2256         count += 1;
  2258   } else {
  2259     tty->print_cr("value = 0x%x", value);
  2260     guarantee(false, "Not supported yet !");
  2263   for (count; count < 3; count++) {
  2264     nop();
  2268 void MacroAssembler::patchable_call32(Register d, jlong value) {
  2269   assert_not_delayed();
  2271   int hi = (int)(value >> 32);
  2272   int lo = (int)(value & ~0);
  2274   int count = 0;
  2276   if (value == lo) {  // 32-bit integer
  2277     if (is_simm16(value)) {
  2278       daddiu(d, R0, value);
  2279       count += 1;
  2280     } else {
  2281       lui(d, split_low(value >> 16));
  2282       count += 1;
  2283       if (split_low(value)) {
  2284         ori(d, d, split_low(value));
  2285         count += 1;
  2288   } else {
  2289     tty->print_cr("value = 0x%x", value);
  2290     guarantee(false, "Not supported yet !");
  2293   for (count; count < 2; count++) {
  2294     nop();
  2298 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
  2299   assert(UseCompressedClassPointers, "should only be used for compressed header");
  2300   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
  2302   int klass_index = oop_recorder()->find_index(k);
  2303   RelocationHolder rspec = metadata_Relocation::spec(klass_index);
  2304   long narrowKlass = (long)Klass::encode_klass(k);
  2306   relocate(rspec, Assembler::narrow_oop_operand);
  2307   patchable_set48(dst, narrowKlass);
  2311 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
  2312   assert(UseCompressedOops, "should only be used for compressed header");
  2313   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
  2315   int oop_index = oop_recorder()->find_index(obj);
  2316   RelocationHolder rspec = oop_Relocation::spec(oop_index);
  2318   relocate(rspec, Assembler::narrow_oop_operand);
  2319   patchable_set48(dst, oop_index);
  2322 void MacroAssembler::li64(Register rd, long imm) {
  2323   assert_not_delayed();
  2324   lui(rd, imm >> 48);
  2325   ori(rd, rd, split_low(imm >> 32));
  2326   dsll(rd, rd, 16);
  2327   ori(rd, rd, split_low(imm >> 16));
  2328   dsll(rd, rd, 16);
  2329   ori(rd, rd, split_low(imm));
  2332 void MacroAssembler::li48(Register rd, long imm) {
  2333   assert_not_delayed();
  2334   assert(is_simm16(imm >> 32), "Not a 48-bit address");
  2335   lui(rd, imm >> 32);
  2336   ori(rd, rd, split_low(imm >> 16));
  2337   dsll(rd, rd, 16);
  2338   ori(rd, rd, split_low(imm));
  2340 #endif
  2341 // NOTE: i dont push eax as i486.
  2342 // the x86 save eax for it use eax as the jump register
  2343 void MacroAssembler::verify_oop(Register reg, const char* s) {
  2344   /*
  2345      if (!VerifyOops) return;
  2347   // Pass register number to verify_oop_subroutine
  2348   char* b = new char[strlen(s) + 50];
  2349   sprintf(b, "verify_oop: %s: %s", reg->name(), s);
  2350   push(rax);                          // save rax,
  2351   push(reg);                          // pass register argument
  2352   ExternalAddress buffer((address) b);
  2353   // avoid using pushptr, as it modifies scratch registers
  2354   // and our contract is not to modify anything
  2355   movptr(rax, buffer.addr());
  2356   push(rax);
  2357   // call indirectly to solve generation ordering problem
  2358   movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
  2359   call(rax);
  2360    */
  2361   if (!VerifyOops) return;
  2362   const char * b = NULL;
  2363   stringStream ss;
  2364   ss.print("verify_oop: %s: %s", reg->name(), s);
  2365   b = code_string(ss.as_string());
  2366 #ifdef _LP64
  2367   pushad();
  2368   move(A1, reg);
  2369   li(A0, (long)b);
  2370   li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
  2371   ld(T9, AT, 0);
  2372   jalr(T9);
  2373   delayed()->nop();
  2374   popad();
  2375 #else
  2376   // Pass register number to verify_oop_subroutine
  2377   sw(T0, SP, - wordSize);
  2378   sw(T1, SP, - 2*wordSize);
  2379   sw(RA, SP, - 3*wordSize);
  2380   sw(A0, SP ,- 4*wordSize);
  2381   sw(A1, SP ,- 5*wordSize);
  2382   sw(AT, SP ,- 6*wordSize);
  2383   sw(T9, SP ,- 7*wordSize);
  2384   addiu(SP, SP, - 7 * wordSize);
  2385   move(A1, reg);
  2386   li(A0, (long)b);
  2387   // call indirectly to solve generation ordering problem
  2388   li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
  2389   lw(T9, AT, 0);
  2390   jalr(T9);
  2391   delayed()->nop();
  2392   lw(T0, SP, 6* wordSize);
  2393   lw(T1, SP, 5* wordSize);
  2394   lw(RA, SP, 4* wordSize);
  2395   lw(A0, SP, 3* wordSize);
  2396   lw(A1, SP, 2* wordSize);
  2397   lw(AT, SP, 1* wordSize);
  2398   lw(T9, SP, 0* wordSize);
  2399   addiu(SP, SP, 7 * wordSize);
  2400 #endif
  2404 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
  2405   if (!VerifyOops) {
  2406     nop();
  2407     return;
  2409   // Pass register number to verify_oop_subroutine
  2410   const char * b = NULL;
  2411   stringStream ss;
  2412   ss.print("verify_oop_addr: %s",  s);
  2413   b = code_string(ss.as_string());
  2415   st_ptr(T0, SP, - wordSize);
  2416   st_ptr(T1, SP, - 2*wordSize);
  2417   st_ptr(RA, SP, - 3*wordSize);
  2418   st_ptr(A0, SP, - 4*wordSize);
  2419   st_ptr(A1, SP, - 5*wordSize);
  2420   st_ptr(AT, SP, - 6*wordSize);
  2421   st_ptr(T9, SP, - 7*wordSize);
  2422   ld_ptr(A1, addr);   // addr may use SP, so load from it before change SP
  2423   addiu(SP, SP, - 7 * wordSize);
  2425   li(A0, (long)b);
  2426   // call indirectly to solve generation ordering problem
  2427   li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
  2428   ld_ptr(T9, AT, 0);
  2429   jalr(T9);
  2430   delayed()->nop();
  2431   ld_ptr(T0, SP, 6* wordSize);
  2432   ld_ptr(T1, SP, 5* wordSize);
  2433   ld_ptr(RA, SP, 4* wordSize);
  2434   ld_ptr(A0, SP, 3* wordSize);
  2435   ld_ptr(A1, SP, 2* wordSize);
  2436   ld_ptr(AT, SP, 1* wordSize);
  2437   ld_ptr(T9, SP, 0* wordSize);
  2438   addiu(SP, SP, 7 * wordSize);
  2441 // used registers :  T0, T1
  2442 void MacroAssembler::verify_oop_subroutine() {
  2443   // RA: ra
  2444   // A0: char* error message
  2445   // A1: oop   object to verify
  2447   Label exit, error;
  2448   // increment counter
  2449   li(T0, (long)StubRoutines::verify_oop_count_addr());
  2450   lw(AT, T0, 0);
  2451 #ifdef _LP64
  2452   daddi(AT, AT, 1);
  2453 #else
  2454   addi(AT, AT, 1);
  2455 #endif
  2456   sw(AT, T0, 0);
  2458   // make sure object is 'reasonable'
  2459   beq(A1, R0, exit);         // if obj is NULL it is ok
  2460   delayed()->nop();
  2462   // Check if the oop is in the right area of memory
  2463   //const int oop_mask = Universe::verify_oop_mask();
  2464   //const int oop_bits = Universe::verify_oop_bits();
  2465   const uintptr_t oop_mask = Universe::verify_oop_mask();
  2466   const uintptr_t oop_bits = Universe::verify_oop_bits();
  2467   li(AT, oop_mask);
  2468   andr(T0, A1, AT);
  2469   li(AT, oop_bits);
  2470   bne(T0, AT, error);
  2471   delayed()->nop();
  2473   // make sure klass is 'reasonable'
  2474   //add for compressedoops
  2475   reinit_heapbase();
  2476   //add for compressedoops
  2477   load_klass(T0, A1);
  2478   beq(T0, R0, error);                        // if klass is NULL it is broken
  2479   delayed()->nop();
  2480   #if 0
  2481   //FIXME:wuhui.
  2482   // Check if the klass is in the right area of memory
  2483   //const int klass_mask = Universe::verify_klass_mask();
  2484   //const int klass_bits = Universe::verify_klass_bits();
  2485   const uintptr_t klass_mask = Universe::verify_klass_mask();
  2486   const uintptr_t klass_bits = Universe::verify_klass_bits();
  2488   li(AT, klass_mask);
  2489   andr(T1, T0, AT);
  2490   li(AT, klass_bits);
  2491   bne(T1, AT, error);
  2492   delayed()->nop();
  2493   // make sure klass' klass is 'reasonable'
  2494   //add for compressedoops
  2495   load_klass(T0, T0);
  2496   beq(T0, R0, error);  // if klass' klass is NULL it is broken
  2497   delayed()->nop();
  2499   li(AT, klass_mask);
  2500   andr(T1, T0, AT);
  2501   li(AT, klass_bits);
  2502   bne(T1, AT, error);
  2503   delayed()->nop();     // if klass not in right area of memory it is broken too.
  2504 #endif
  2505   // return if everything seems ok
  2506   bind(exit);
  2508   jr(RA);
  2509   delayed()->nop();
  2511   // handle errors
  2512   bind(error);
  2513   pushad();
  2514 #ifndef _LP64
  2515   addi(SP, SP, (-1) * wordSize);
  2516 #endif
  2517   call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
  2518   delayed()->nop();
  2519 #ifndef _LP64
  2520   addiu(SP, SP, 1 * wordSize);
  2521 #endif
  2522   popad();
  2523   jr(RA);
  2524   delayed()->nop();
  2527 void MacroAssembler::verify_tlab(Register t1, Register t2) {
  2528 #ifdef ASSERT
  2529   assert_different_registers(t1, t2, AT);
  2530   if (UseTLAB && VerifyOops) {
  2531     Label next, ok;
  2533     get_thread(t1);
  2535     ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset()));
  2536     ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset()));
  2537     sltu(AT, t2, AT);
  2538     beq(AT, R0, next);
  2539     delayed()->nop();
  2541     stop("assert(top >= start)");
  2543     bind(next);
  2544     ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset()));
  2545     sltu(AT, AT, t2);
  2546     beq(AT, R0, ok);
  2547     delayed()->nop();
  2549     stop("assert(top <= end)");
  2551     bind(ok);
  2554 #endif
  2556  RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
  2557                                                        Register tmp,
  2558                                                        int offset) {
  2559    intptr_t value = *delayed_value_addr;
  2560    if (value != 0)
  2561    return RegisterOrConstant(value + offset);
  2562    AddressLiteral a(delayed_value_addr);
  2563    // load indirectly to solve generation ordering problem
  2564    //movptr(tmp, ExternalAddress((address) delayed_value_addr));
  2565    //ld(tmp, a);
  2566    if (offset != 0)
  2567      daddi(tmp,tmp, offset);
  2569    return RegisterOrConstant(tmp);
  2572 void MacroAssembler::hswap(Register reg) {
  2573   //short
  2574   //andi(reg, reg, 0xffff);
  2575   srl(AT, reg, 8);
  2576   sll(reg, reg, 24);
  2577   sra(reg, reg, 16);
  2578   orr(reg, reg, AT);
  2581 void MacroAssembler::huswap(Register reg) {
  2582 #ifdef _LP64
  2583   dsrl(AT, reg, 8);
  2584   dsll(reg, reg, 24);
  2585   dsrl(reg, reg, 16);
  2586   orr(reg, reg, AT);
  2587   andi(reg, reg, 0xffff);
  2588 #else
  2589   //andi(reg, reg, 0xffff);
  2590   srl(AT, reg, 8);
  2591   sll(reg, reg, 24);
  2592   srl(reg, reg, 16);
  2593   orr(reg, reg, AT);
  2594 #endif
  2597 // something funny to do this will only one more register AT
  2598 // 32 bits
  2599 void MacroAssembler::swap(Register reg) {
  2600   srl(AT, reg, 8);
  2601   sll(reg, reg, 24);
  2602   orr(reg, reg, AT);
  2603   //reg : 4 1 2 3
  2604   srl(AT, AT, 16);
  2605   xorr(AT, AT, reg);
  2606   andi(AT, AT, 0xff);
  2607   //AT : 0 0 0 1^3);
  2608   xorr(reg, reg, AT);
  2609   //reg : 4 1 2 1
  2610   sll(AT, AT, 16);
  2611   xorr(reg, reg, AT);
  2612   //reg : 4 3 2 1
  2615 #ifdef _LP64
  2617 /* do 32-bit CAS using MIPS64 lld/scd
  2619   cas_int should only compare 32-bits of the memory value.
  2620   However, lld/scd will do 64-bit operation, which violates the intention of cas_int.
  2621   To simulate a 32-bit atomic operation, the value loaded with LLD should be split into
  2622   tow halves, and only the low-32 bits is compared. If equals, the low-32 bits of newval,
  2623   plus the high-32 bits or memory value, are stored togethor with SCD.
  2625 Example:
  2627       double d = 3.1415926;
  2628       System.err.println("hello" + d);
  2630   sun.misc.FloatingDecimal$1.<init>()
  2632    `- java.util.concurrent.atomic.AtomicInteger::compareAndSet()
  2634   38 cas_int [a7a7|J] [a0|I] [a6|I]
  2635 // a0: 0xffffffffe8ea9f63 pc: 0x55647f3354
  2636 // a6: 0x4ab325aa
  2638 again:
  2639    0x00000055647f3c5c: lld at, 0x0(a7)                          ; 64-bit load, "0xe8ea9f63"
  2641    0x00000055647f3c60: sll t9, at, 0                            ; t9: low-32 bits (sign extended)
  2642    0x00000055647f3c64: dsrl32 t8, at, 0                         ; t8: high-32 bits
  2643    0x00000055647f3c68: dsll32 t8, t8, 0
  2644    0x00000055647f3c6c: bne t9, a0, 0x00000055647f3c9c           ; goto nequal
  2645    0x00000055647f3c70: sll zero, zero, 0
  2647    0x00000055647f3c74: ori v1, zero, 0xffffffff                 ; v1: low-32 bits of newval (sign unextended)
  2648    0x00000055647f3c78: dsll v1, v1, 16                          ; v1 = a6 & 0xFFFFFFFF;
  2649    0x00000055647f3c7c: ori v1, v1, 0xffffffff
  2650    0x00000055647f3c80: and v1, a6, v1
  2651    0x00000055647f3c84: or at, t8, v1
  2652    0x00000055647f3c88: scd at, 0x0(a7)
  2653    0x00000055647f3c8c: beq at, zero, 0x00000055647f3c5c         ; goto again
  2654    0x00000055647f3c90: sll zero, zero, 0
  2655    0x00000055647f3c94: beq zero, zero, 0x00000055647f45ac       ; goto done
  2656    0x00000055647f3c98: sll zero, zero, 0
  2657 nequal:
  2658    0x00000055647f45a4: dadd a0, t9, zero
  2659    0x00000055647f45a8: dadd at, zero, zero
  2660 done:
  2661 */
  2663 void MacroAssembler::cmpxchg32(Register x_reg, Address dest, Register c_reg) {
  2664   /* MIPS64 can use ll/sc for 32-bit atomic memory access */
  2665   Label done, again, nequal;
  2667   bind(again);
  2669   if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
  2670   ll(AT, dest);
  2671   bne(AT, c_reg, nequal);
  2672   delayed()->nop();
  2674   move(AT, x_reg);
  2675   sc(AT, dest);
  2676   beq(AT, R0, again);
  2677   delayed()->nop();
  2678   b(done);
  2679   delayed()->nop();
  2681   // not xchged
  2682   bind(nequal);
  2683   sync();
  2684   move(c_reg, AT);
  2685   move(AT, R0);
  2687   bind(done);
  2689 #endif  // cmpxchg32
  2691 void MacroAssembler::cmpxchg(Register x_reg, Address dest, Register c_reg) {
  2692   Label done, again, nequal;
  2694   bind(again);
  2695   if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
  2696 #ifdef _LP64
  2697   lld(AT, dest);
  2698 #else
  2699   ll(AT, dest);
  2700 #endif
  2701   bne(AT, c_reg, nequal);
  2702   delayed()->nop();
  2704   move(AT, x_reg);
  2705 #ifdef _LP64
  2706   scd(AT, dest);
  2707 #else
  2708   sc(AT, dest);
  2709 #endif
  2710   beq(AT, R0, again);
  2711   delayed()->nop();
  2712   b(done);
  2713   delayed()->nop();
  2715   // not xchged
  2716   bind(nequal);
  2717   sync();
  2718   move(c_reg, AT);
  2719   move(AT, R0);
  2721   bind(done);
  2724 void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) {
  2725   Label done, again, nequal;
  2727   Register x_reg = x_regLo;
  2728   dsll32(x_regHi, x_regHi, 0);
  2729   dsll32(x_regLo, x_regLo, 0);
  2730   dsrl32(x_regLo, x_regLo, 0);
  2731   orr(x_reg, x_regLo, x_regHi);
  2733   Register c_reg = c_regLo;
  2734   dsll32(c_regHi, c_regHi, 0);
  2735   dsll32(c_regLo, c_regLo, 0);
  2736   dsrl32(c_regLo, c_regLo, 0);
  2737   orr(c_reg, c_regLo, c_regHi);
  2739   bind(again);
  2741   if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
  2742   lld(AT, dest);
  2743   bne(AT, c_reg, nequal);
  2744   delayed()->nop();
  2746   //move(AT, x_reg);
  2747   dadd(AT, x_reg, R0);
  2748   scd(AT, dest);
  2749   beq(AT, R0, again);
  2750   delayed()->nop();
  2751   b(done);
  2752   delayed()->nop();
  2754   // not xchged
  2755   bind(nequal);
  2756   sync();
  2757   //move(c_reg, AT);
  2758   //move(AT, R0);
  2759   dadd(c_reg, AT, R0);
  2760   dadd(AT, R0, R0);
  2761   bind(done);
  2764 // be sure the three register is different
  2765 void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
  2766   assert_different_registers(tmp, fs, ft);
  2767   div_s(tmp, fs, ft);
  2768   trunc_l_s(tmp, tmp);
  2769   cvt_s_l(tmp, tmp);
  2770   mul_s(tmp, tmp, ft);
  2771   sub_s(fd, fs, tmp);
  2774 // be sure the three register is different
  2775 void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
  2776   assert_different_registers(tmp, fs, ft);
  2777   div_d(tmp, fs, ft);
  2778   trunc_l_d(tmp, tmp);
  2779   cvt_d_l(tmp, tmp);
  2780   mul_d(tmp, tmp, ft);
  2781   sub_d(fd, fs, tmp);
  2784 // Fast_Lock and Fast_Unlock used by C2
  2786 // Because the transitions from emitted code to the runtime
  2787 // monitorenter/exit helper stubs are so slow it's critical that
  2788 // we inline both the stack-locking fast-path and the inflated fast path.
  2789 //
  2790 // See also: cmpFastLock and cmpFastUnlock.
  2791 //
  2792 // What follows is a specialized inline transliteration of the code
  2793 // in slow_enter() and slow_exit().  If we're concerned about I$ bloat
  2794 // another option would be to emit TrySlowEnter and TrySlowExit methods
  2795 // at startup-time.  These methods would accept arguments as
  2796 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
  2797 // indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
  2798 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
  2799 // In practice, however, the # of lock sites is bounded and is usually small.
  2800 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
  2801 // if the processor uses simple bimodal branch predictors keyed by EIP
  2802 // Since the helper routines would be called from multiple synchronization
  2803 // sites.
  2804 //
  2805 // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
  2806 // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
  2807 // to those specialized methods.  That'd give us a mostly platform-independent
  2808 // implementation that the JITs could optimize and inline at their pleasure.
  2809 // Done correctly, the only time we'd need to cross to native could would be
  2810 // to park() or unpark() threads.  We'd also need a few more unsafe operators
  2811 // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
  2812 // (b) explicit barriers or fence operations.
  2813 //
  2814 // TODO:
  2815 //
  2816 // *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
  2817 //    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
  2818 //    Given TLAB allocation, Self is usually manifested in a register, so passing it into
  2819 //    the lock operators would typically be faster than reifying Self.
  2820 //
  2821 // *  Ideally I'd define the primitives as:
  2822 //       fast_lock   (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
  2823 //       fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
  2824 //    Unfortunately ADLC bugs prevent us from expressing the ideal form.
  2825 //    Instead, we're stuck with a rather awkward and brittle register assignments below.
  2826 //    Furthermore the register assignments are overconstrained, possibly resulting in
  2827 //    sub-optimal code near the synchronization site.
  2828 //
  2829 // *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
  2830 //    Alternately, use a better sp-proximity test.
  2831 //
  2832 // *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
  2833 //    Either one is sufficient to uniquely identify a thread.
  2834 //    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
  2835 //
  2836 // *  Intrinsify notify() and notifyAll() for the common cases where the
  2837 //    object is locked by the calling thread but the waitlist is empty.
  2838 //    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
  2839 //
  2840 // *  use jccb and jmpb instead of jcc and jmp to improve code density.
  2841 //    But beware of excessive branch density on AMD Opterons.
  2842 //
  2843 // *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
  2844 //    or failure of the fast-path.  If the fast-path fails then we pass
  2845 //    control to the slow-path, typically in C.  In Fast_Lock and
  2846 //    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
  2847 //    will emit a conditional branch immediately after the node.
  2848 //    So we have branches to branches and lots of ICC.ZF games.
  2849 //    Instead, it might be better to have C2 pass a "FailureLabel"
  2850 //    into Fast_Lock and Fast_Unlock.  In the case of success, control
  2851 //    will drop through the node.  ICC.ZF is undefined at exit.
  2852 //    In the case of failure, the node will branch directly to the
  2853 //    FailureLabel
  2856 // obj: object to lock
  2857 // box: on-stack box address (displaced header location) - KILLED
  2858 // rax,: tmp -- KILLED
  2859 // scr: tmp -- KILLED
  2860 void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg) {
  2862   // Ensure the register assignents are disjoint
  2863   guarantee (objReg != boxReg, "") ;
  2864   guarantee (objReg != tmpReg, "") ;
  2865   guarantee (objReg != scrReg, "") ;
  2866   guarantee (boxReg != tmpReg, "") ;
  2867   guarantee (boxReg != scrReg, "") ;
  2870   block_comment("FastLock");
  2871   /*
  2872      move(AT, 0x0);
  2873      return;
  2874      */
  2875   if (PrintBiasedLockingStatistics) {
  2876     push(tmpReg);
  2877     atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, AT, tmpReg);
  2878     pop(tmpReg);
  2881   if (EmitSync & 1) {
  2882     move(AT, 0x0);
  2883     return;
  2884   } else
  2885     if (EmitSync & 2) {
  2886       Label DONE_LABEL ;
  2887       if (UseBiasedLocking) {
  2888         // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
  2889         biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
  2892       ld(tmpReg, Address(objReg, 0)) ;          // fetch markword
  2893       ori(tmpReg, tmpReg, 0x1);
  2894       sd(tmpReg, Address(boxReg, 0));           // Anticipate successful CAS
  2896       cmpxchg(boxReg, Address(objReg, 0), tmpReg);          // Updates tmpReg
  2897       bne(AT, R0, DONE_LABEL);
  2898       delayed()->nop();
  2900       // Recursive locking
  2901       dsubu(tmpReg, tmpReg, SP);
  2902       li(AT, (7 - os::vm_page_size() ));
  2903       andr(tmpReg, tmpReg, AT);
  2904       sd(tmpReg, Address(boxReg, 0));
  2905       bind(DONE_LABEL) ;
  2906     } else {
  2907       // Possible cases that we'll encounter in fast_lock
  2908       // ------------------------------------------------
  2909       // * Inflated
  2910       //    -- unlocked
  2911       //    -- Locked
  2912       //       = by self
  2913       //       = by other
  2914       // * biased
  2915       //    -- by Self
  2916       //    -- by other
  2917       // * neutral
  2918       // * stack-locked
  2919       //    -- by self
  2920       //       = sp-proximity test hits
  2921       //       = sp-proximity test generates false-negative
  2922       //    -- by other
  2923       //
  2925       Label IsInflated, DONE_LABEL, PopDone ;
  2927       // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
  2928       // order to reduce the number of conditional branches in the most common cases.
  2929       // Beware -- there's a subtle invariant that fetch of the markword
  2930       // at [FETCH], below, will never observe a biased encoding (*101b).
  2931       // If this invariant is not held we risk exclusion (safety) failure.
  2932       if (UseBiasedLocking && !UseOptoBiasInlining) {
  2933         biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
  2936       ld(tmpReg, Address(objReg, 0)) ;         //Fetch the markword of the object.
  2937       andi(AT, tmpReg, markOopDesc::monitor_value);
  2938       bne(AT, R0, IsInflated);                      // inflated vs stack-locked|neutral|bias
  2939       delayed()->nop();
  2941       // Attempt stack-locking ...
  2942       ori (tmpReg, tmpReg, markOopDesc::unlocked_value);
  2943       sd(tmpReg, Address(boxReg, 0));          // Anticipate successful CAS
  2944       //if (os::is_MP()) {
  2945       //  sync();
  2946       //}
  2948       cmpxchg(boxReg, Address(objReg, 0), tmpReg);           // Updates tmpReg
  2949       //AT == 1: unlocked
  2951       if (PrintBiasedLockingStatistics) {
  2952         Label L;
  2953         beq(AT, R0, L);
  2954         delayed()->nop();
  2955         push(T0);
  2956         push(T1);
  2957         atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
  2958         pop(T1);
  2959         pop(T0);
  2960         bind(L);
  2962       bne(AT, R0, DONE_LABEL);
  2963       delayed()->nop();
  2965       // Recursive locking
  2966       // The object is stack-locked: markword contains stack pointer to BasicLock.
  2967       // Locked by current thread if difference with current SP is less than one page.
  2968       dsubu(tmpReg, tmpReg, SP);
  2969       li(AT, 7 - os::vm_page_size() );
  2970       andr(tmpReg, tmpReg, AT);
  2971       sd(tmpReg, Address(boxReg, 0));
  2972       if (PrintBiasedLockingStatistics) {
  2973         Label L;
  2974         // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++
  2975         bne(tmpReg, R0, L);
  2976         delayed()->nop();
  2977         push(T0);
  2978         push(T1);
  2979         atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
  2980         pop(T1);
  2981         pop(T0);
  2982         bind(L);
  2984       sltiu(AT, tmpReg, 1); /* AT = (tmpReg == 0) ? 1 : 0 */
  2986       b(DONE_LABEL) ;
  2987       delayed()->nop();
  2989       bind(IsInflated) ;
  2990       // The object's monitor m is unlocked iff m->owner == NULL,
  2991       // otherwise m->owner may contain a thread or a stack address.
  2993       // TODO: someday avoid the ST-before-CAS penalty by
  2994       // relocating (deferring) the following ST.
  2995       // We should also think about trying a CAS without having
  2996       // fetched _owner.  If the CAS is successful we may
  2997       // avoid an RTO->RTS upgrade on the $line.
  2998       // Without cast to int32_t a movptr will destroy r10 which is typically obj
  2999       li(AT, (int32_t)intptr_t(markOopDesc::unused_mark()));
  3000       sd(AT, Address(boxReg, 0));
  3002       move(boxReg, tmpReg) ;
  3003       ld(tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
  3004       // if (m->owner != 0) => AT = 0, goto slow path.
  3005       move(AT, R0);
  3006       bne(tmpReg, R0, DONE_LABEL);
  3007       delayed()->nop();
  3009 #ifndef OPT_THREAD
  3010       get_thread (TREG) ;
  3011 #endif
  3012       // It's inflated and appears unlocked
  3013       //if (os::is_MP()) {
  3014       //  sync();
  3015       //}
  3016       cmpxchg(TREG, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), tmpReg) ;
  3017       // Intentional fall-through into DONE_LABEL ...
  3020       // DONE_LABEL is a hot target - we'd really like to place it at the
  3021       // start of cache line by padding with NOPs.
  3022       // See the AMD and Intel software optimization manuals for the
  3023       // most efficient "long" NOP encodings.
  3024       // Unfortunately none of our alignment mechanisms suffice.
  3025       bind(DONE_LABEL);
  3027       // At DONE_LABEL the AT is set as follows ...
  3028       // Fast_Unlock uses the same protocol.
  3029       // AT == 1 -> Success
  3030       // AT == 0 -> Failure - force control through the slow-path
  3032       // Avoid branch-to-branch on AMD processors
  3033       // This appears to be superstition.
  3034       if (EmitSync & 32) nop() ;
  3039 // obj: object to unlock
  3040 // box: box address (displaced header location), killed.  Must be EAX.
  3041 // rbx,: killed tmp; cannot be obj nor box.
  3042 //
  3043 // Some commentary on balanced locking:
  3044 //
  3045 // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
  3046 // Methods that don't have provably balanced locking are forced to run in the
  3047 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
  3048 // The interpreter provides two properties:
  3049 // I1:  At return-time the interpreter automatically and quietly unlocks any
  3050 //      objects acquired the current activation (frame).  Recall that the
  3051 //      interpreter maintains an on-stack list of locks currently held by
  3052 //      a frame.
  3053 // I2:  If a method attempts to unlock an object that is not held by the
  3054 //      the frame the interpreter throws IMSX.
  3055 //
  3056 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
  3057 // B() doesn't have provably balanced locking so it runs in the interpreter.
  3058 // Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
  3059 // is still locked by A().
  3060 //
  3061 // The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
  3062 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
  3063 // should not be unlocked by "normal" java-level locking and vice-versa.  The specification
  3064 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
  3066 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) {
  3068   guarantee (objReg != boxReg, "") ;
  3069   guarantee (objReg != tmpReg, "") ;
  3070   guarantee (boxReg != tmpReg, "") ;
  3074   block_comment("FastUnlock");
  3077   if (EmitSync & 4) {
  3078     // Disable - inhibit all inlining.  Force control through the slow-path
  3079     move(AT, 0x0);
  3080     return;
  3081   } else
  3082     if (EmitSync & 8) {
  3083       Label DONE_LABEL ;
  3084       if (UseBiasedLocking) {
  3085         biased_locking_exit(objReg, tmpReg, DONE_LABEL);
  3087       // classic stack-locking code ...
  3088       ld(tmpReg, Address(boxReg, 0)) ;
  3089       beq(tmpReg, R0, DONE_LABEL) ;
  3090       move(AT, 0x1);  // delay slot
  3092       cmpxchg(tmpReg, Address(objReg, 0), boxReg);          // Uses EAX which is box
  3093       bind(DONE_LABEL);
  3094     } else {
  3095       Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
  3097       // Critically, the biased locking test must have precedence over
  3098       // and appear before the (box->dhw == 0) recursive stack-lock test.
  3099       if (UseBiasedLocking && !UseOptoBiasInlining) {
  3100         biased_locking_exit(objReg, tmpReg, DONE_LABEL);
  3103       ld(AT, Address(boxReg, 0)) ;            // Examine the displaced header
  3104       beq(AT, R0, DONE_LABEL) ;      // 0 indicates recursive stack-lock
  3105       delayed()->daddiu(AT, R0, 0x1);
  3107       ld(tmpReg, Address(objReg, 0)) ;       // Examine the object's markword
  3108       andi(AT, tmpReg, markOopDesc::monitor_value) ;                     // Inflated?
  3109       beq(AT, R0, Stacked) ;                     // Inflated?
  3110       delayed()->nop();
  3112       bind(Inflated) ;
  3113       // It's inflated.
  3114       // Despite our balanced locking property we still check that m->_owner == Self
  3115       // as java routines or native JNI code called by this thread might
  3116       // have released the lock.
  3117       // Refer to the comments in synchronizer.cpp for how we might encode extra
  3118       // state in _succ so we can avoid fetching EntryList|cxq.
  3119       //
  3120       // I'd like to add more cases in fast_lock() and fast_unlock() --
  3121       // such as recursive enter and exit -- but we have to be wary of
  3122       // I$ bloat, T$ effects and BP$ effects.
  3123       //
  3124       // If there's no contention try a 1-0 exit.  That is, exit without
  3125       // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
  3126       // we detect and recover from the race that the 1-0 exit admits.
  3127       //
  3128       // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
  3129       // before it STs null into _owner, releasing the lock.  Updates
  3130       // to data protected by the critical section must be visible before
  3131       // we drop the lock (and thus before any other thread could acquire
  3132       // the lock and observe the fields protected by the lock).
  3133       // IA32's memory-model is SPO, so STs are ordered with respect to
  3134       // each other and there's no need for an explicit barrier (fence).
  3135       // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
  3136 #ifndef OPT_THREAD
  3137       get_thread (TREG) ;
  3138 #endif
  3140       // It's inflated
  3141       ld(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
  3142       xorr(boxReg, boxReg, TREG);
  3144       ld(AT, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
  3145       orr(boxReg, boxReg, AT);
  3147       move(AT, R0);
  3148       bne(boxReg, R0, DONE_LABEL);
  3149       delayed()->nop();
  3151       ld(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
  3152       ld(AT, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
  3153       orr(boxReg, boxReg, AT);
  3155       move(AT, R0);
  3156       bne(boxReg, R0, DONE_LABEL);
  3157       delayed()->nop();
  3159       sync();
  3160       sd(R0, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
  3161       move(AT, 0x1);
  3162       b(DONE_LABEL);
  3163       delayed()->nop();
  3165       bind  (Stacked);
  3166       ld(tmpReg, Address(boxReg, 0)) ;
  3167       //if (os::is_MP()) { sync(); }
  3168       cmpxchg(tmpReg, Address(objReg, 0), boxReg);
  3170       if (EmitSync & 65536) {
  3171         bind (CheckSucc);
  3174       bind(DONE_LABEL);
  3176       // Avoid branch to branch on AMD processors
  3177       if (EmitSync & 32768) { nop() ; }
  3181 void MacroAssembler::align(int modulus) {
  3182   while (offset() % modulus != 0) nop();
  3186 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
  3187   //Unimplemented();
  3190 #ifdef _LP64
  3191 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
  3192 Register caller_saved_registers_except_v0[] = {AT, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
  3194 //In MIPS64, F0~23 are all caller-saved registers
  3195 FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13};
  3196 #else
  3197 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, T4, T5, T6, T7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
  3198 Register caller_saved_registers_except_v0[] = {AT, V1, A0, A1, A2, A3, T4, T5, T6, T7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
  3200 Register caller_saved_fpu_registers[] = {};
  3201 #endif
  3203 //We preserve all caller-saved register
  3204 void  MacroAssembler::pushad(){
  3205   int i;
  3207   /* Fixed-point registers */
  3208   int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
  3209   daddi(SP, SP, -1 * len * wordSize);
  3210   for (i = 0; i < len; i++)
  3212 #ifdef _LP64
  3213     sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
  3214 #else
  3215     sw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
  3216 #endif
  3219   /* Floating-point registers */
  3220   len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
  3221   daddi(SP, SP, -1 * len * wordSize);
  3222   for (i = 0; i < len; i++)
  3224 #ifdef _LP64
  3225     sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3226 #else
  3227     swc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3228 #endif
  3230 };
  3232 void  MacroAssembler::popad(){
  3233   int i;
  3235   /* Floating-point registers */
  3236   int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
  3237   for (i = 0; i < len; i++)
  3239 #ifdef _LP64
  3240     ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3241 #else
  3242     lwc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3243 #endif
  3245   daddi(SP, SP, len * wordSize);
  3247   /* Fixed-point registers */
  3248   len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
  3249   for (i = 0; i < len; i++)
  3251 #ifdef _LP64
  3252     ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
  3253 #else
  3254     lw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
  3255 #endif
  3257   daddi(SP, SP, len * wordSize);
  3258 };
  3260 // We preserve all caller-saved register except V0
  3261 void MacroAssembler::pushad_except_v0() {
  3262   int i;
  3264   /* Fixed-point registers */
  3265   int len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]);
  3266   daddi(SP, SP, -1 * len * wordSize);
  3267   for (i = 0; i < len; i++) {
  3268 #ifdef _LP64
  3269     sd(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
  3270 #else
  3271     sw(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
  3272 #endif
  3275   /* Floating-point registers */
  3276   len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
  3277   daddi(SP, SP, -1 * len * wordSize);
  3278   for (i = 0; i < len; i++) {
  3279 #ifdef _LP64
  3280     sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3281 #else
  3282     swc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3283 #endif
  3287 void MacroAssembler::popad_except_v0() {
  3288   int i;
  3290   /* Floating-point registers */
  3291   int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
  3292   for (i = 0; i < len; i++) {
  3293 #ifdef _LP64
  3294     ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3295 #else
  3296     lwc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3297 #endif
  3299   daddi(SP, SP, len * wordSize);
  3301   /* Fixed-point registers */
  3302   len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]);
  3303   for (i = 0; i < len; i++) {
  3304 #ifdef _LP64
  3305     ld(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
  3306 #else
  3307     lw(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
  3308 #endif
  3310   daddi(SP, SP, len * wordSize);
  3313 void MacroAssembler::push2(Register reg1, Register reg2) {
  3314 #ifdef _LP64
  3315   daddi(SP, SP, -16);
  3316   sd(reg2, SP, 0);
  3317   sd(reg1, SP, 8);
  3318 #else
  3319   addi(SP, SP, -8);
  3320   sw(reg2, SP, 0);
  3321   sw(reg1, SP, 4);
  3322 #endif
  3325 void MacroAssembler::pop2(Register reg1, Register reg2) {
  3326 #ifdef _LP64
  3327   ld(reg1, SP, 0);
  3328   ld(reg2, SP, 8);
  3329   daddi(SP, SP, 16);
  3330 #else
  3331   lw(reg1, SP, 0);
  3332   lw(reg2, SP, 4);
  3333   addi(SP, SP, 8);
  3334 #endif
  3337 //for UseCompressedOops Option
  3338 void MacroAssembler::load_klass(Register dst, Register src) {
  3339 #ifdef _LP64
  3340   if(UseCompressedClassPointers){
  3341     lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
  3342     decode_klass_not_null(dst);
  3343   } else
  3344 #endif
  3345   ld(dst, src, oopDesc::klass_offset_in_bytes());
  3348 void MacroAssembler::store_klass(Register dst, Register src) {
  3349 #ifdef _LP64
  3350   if(UseCompressedClassPointers){
  3351     encode_klass_not_null(src);
  3352     sw(src, dst, oopDesc::klass_offset_in_bytes());
  3353   } else {
  3354 #endif
  3355     sd(src, dst, oopDesc::klass_offset_in_bytes());
  3359 void MacroAssembler::load_prototype_header(Register dst, Register src) {
  3360   load_klass(dst, src);
  3361   ld(dst, Address(dst, Klass::prototype_header_offset()));
  3364 #ifdef _LP64
  3365 void MacroAssembler::store_klass_gap(Register dst, Register src) {
  3366   if (UseCompressedClassPointers) {
  3367     sw(src, dst, oopDesc::klass_gap_offset_in_bytes());
  3371 void MacroAssembler::load_heap_oop(Register dst, Address src) {
  3372   if(UseCompressedOops){
  3373     lwu(dst, src);
  3374     decode_heap_oop(dst);
  3375   } else {
  3376     ld(dst, src);
  3380 void MacroAssembler::store_heap_oop(Address dst, Register src){
  3381   if(UseCompressedOops){
  3382     assert(!dst.uses(src), "not enough registers");
  3383     encode_heap_oop(src);
  3384     sw(src, dst);
  3385   } else {
  3386     sd(src, dst);
  3390 void MacroAssembler::store_heap_oop_null(Address dst){
  3391   if(UseCompressedOops){
  3392     sw(R0, dst);
  3393   } else {
  3394     sd(R0, dst);
  3398 #ifdef ASSERT
  3399 void MacroAssembler::verify_heapbase(const char* msg) {
  3400   assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed");
  3401   assert (Universe::heap() != NULL, "java heap should be initialized");
  3403 #endif
  3406 // Algorithm must match oop.inline.hpp encode_heap_oop.
  3407 void MacroAssembler::encode_heap_oop(Register r) {
  3408 #ifdef ASSERT
  3409   verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
  3410 #endif
  3411   verify_oop(r, "broken oop in encode_heap_oop");
  3412   if (Universe::narrow_oop_base() == NULL) {
  3413     if (Universe::narrow_oop_shift() != 0) {
  3414       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3415       shr(r, LogMinObjAlignmentInBytes);
  3417     return;
  3420   movz(r, S5_heapbase, r);
  3421   dsub(r, r, S5_heapbase);
  3422   if (Universe::narrow_oop_shift() != 0) {
  3423     assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3424     shr(r, LogMinObjAlignmentInBytes);
  3428 void MacroAssembler::encode_heap_oop(Register dst, Register src) {
  3429 #ifdef ASSERT
  3430   verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
  3431 #endif
  3432   verify_oop(src, "broken oop in encode_heap_oop");
  3433   if (Universe::narrow_oop_base() == NULL) {
  3434     if (Universe::narrow_oop_shift() != 0) {
  3435       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3436       dsrl(dst, src, LogMinObjAlignmentInBytes);
  3437     } else {
  3438       if (dst != src) move(dst, src);
  3440   } else {
  3441     if (dst == src) {
  3442       movz(dst, S5_heapbase, dst);
  3443       dsub(dst, dst, S5_heapbase);
  3444       if (Universe::narrow_oop_shift() != 0) {
  3445         assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3446         shr(dst, LogMinObjAlignmentInBytes);
  3448     } else {
  3449       dsub(dst, src, S5_heapbase);
  3450       if (Universe::narrow_oop_shift() != 0) {
  3451         assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3452         shr(dst, LogMinObjAlignmentInBytes);
  3454       movz(dst, R0, src);
  3459 void MacroAssembler::encode_heap_oop_not_null(Register r) {
  3460   assert (UseCompressedOops, "should be compressed");
  3461 #ifdef ASSERT
  3462   if (CheckCompressedOops) {
  3463     Label ok;
  3464     bne(r, R0, ok);
  3465     delayed()->nop();
  3466     stop("null oop passed to encode_heap_oop_not_null");
  3467     bind(ok);
  3469 #endif
  3470   verify_oop(r, "broken oop in encode_heap_oop_not_null");
  3471   if (Universe::narrow_oop_base() != NULL) {
  3472     dsub(r, r, S5_heapbase);
  3474   if (Universe::narrow_oop_shift() != 0) {
  3475     assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3476     shr(r, LogMinObjAlignmentInBytes);
  3481 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
  3482   assert (UseCompressedOops, "should be compressed");
  3483 #ifdef ASSERT
  3484   if (CheckCompressedOops) {
  3485     Label ok;
  3486     bne(src, R0, ok);
  3487     delayed()->nop();
  3488     stop("null oop passed to encode_heap_oop_not_null2");
  3489     bind(ok);
  3491 #endif
  3492   verify_oop(src, "broken oop in encode_heap_oop_not_null2");
  3494   if (Universe::narrow_oop_base() != NULL) {
  3495     dsub(dst, src, S5_heapbase);
  3496     if (Universe::narrow_oop_shift() != 0) {
  3497       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3498       shr(dst, LogMinObjAlignmentInBytes);
  3500   } else {
  3501     if (Universe::narrow_oop_shift() != 0) {
  3502       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3503       dsrl(dst, src, LogMinObjAlignmentInBytes);
  3504     } else {
  3505       if (dst != src) move(dst, src);
  3510 void  MacroAssembler::decode_heap_oop(Register r) {
  3511 #ifdef ASSERT
  3512   verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
  3513 #endif
  3514   if (Universe::narrow_oop_base() == NULL) {
  3515     if (Universe::narrow_oop_shift() != 0) {
  3516       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3517       shl(r, LogMinObjAlignmentInBytes);
  3519   } else {
  3520     move(AT, r);
  3521     if (Universe::narrow_oop_shift() != 0) {
  3522       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3523       shl(r, LogMinObjAlignmentInBytes);
  3525     dadd(r, r, S5_heapbase);
  3526     movz(r, R0, AT);
  3528   verify_oop(r, "broken oop in decode_heap_oop");
  3531 void  MacroAssembler::decode_heap_oop(Register dst, Register src) {
  3532 #ifdef ASSERT
  3533   verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
  3534 #endif
  3535   if (Universe::narrow_oop_base() == NULL) {
  3536     if (Universe::narrow_oop_shift() != 0) {
  3537       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3538       if (dst != src) nop(); // DON'T DELETE THIS GUY.
  3539       dsll(dst, src, LogMinObjAlignmentInBytes);
  3540     } else {
  3541       if (dst != src) move(dst, src);
  3543   } else {
  3544     if (dst == src) {
  3545       move(AT, dst);
  3546       if (Universe::narrow_oop_shift() != 0) {
  3547         assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3548         shl(dst, LogMinObjAlignmentInBytes);
  3550       dadd(dst, dst, S5_heapbase);
  3551       movz(dst, R0, AT);
  3552     } else {
  3553       if (Universe::narrow_oop_shift() != 0) {
  3554         assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3555         dsll(dst, src, LogMinObjAlignmentInBytes);
  3556         daddu(dst, dst, S5_heapbase);
  3557       } else {
  3558         daddu(dst, src, S5_heapbase);
  3560       movz(dst, R0, src);
  3563   verify_oop(dst, "broken oop in decode_heap_oop");
  3566 void  MacroAssembler::decode_heap_oop_not_null(Register r) {
  3567   // Note: it will change flags
  3568   assert (UseCompressedOops, "should only be used for compressed headers");
  3569   assert (Universe::heap() != NULL, "java heap should be initialized");
  3570   // Cannot assert, unverified entry point counts instructions (see .ad file)
  3571   // vtableStubs also counts instructions in pd_code_size_limit.
  3572   // Also do not verify_oop as this is called by verify_oop.
  3573   if (Universe::narrow_oop_shift() != 0) {
  3574     assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3575     shl(r, LogMinObjAlignmentInBytes);
  3576     if (Universe::narrow_oop_base() != NULL) {
  3577       daddu(r, r, S5_heapbase);
  3579   } else {
  3580     assert (Universe::narrow_oop_base() == NULL, "sanity");
  3584 void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
  3585   assert (UseCompressedOops, "should only be used for compressed headers");
  3586   assert (Universe::heap() != NULL, "java heap should be initialized");
  3588   // Cannot assert, unverified entry point counts instructions (see .ad file)
  3589   // vtableStubs also counts instructions in pd_code_size_limit.
  3590   // Also do not verify_oop as this is called by verify_oop.
  3591   //lea(dst, Address(S5_heapbase, src, Address::times_8, 0));
  3592   if (Universe::narrow_oop_shift() != 0) {
  3593     assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3594     if (LogMinObjAlignmentInBytes == Address::times_8) {
  3595       dsll(dst, src, LogMinObjAlignmentInBytes);
  3596       daddu(dst, dst, S5_heapbase);
  3597     } else {
  3598       dsll(dst, src, LogMinObjAlignmentInBytes);
  3599       if (Universe::narrow_oop_base() != NULL) {
  3600         daddu(dst, dst, S5_heapbase);
  3603   } else {
  3604     assert (Universe::narrow_oop_base() == NULL, "sanity");
  3605     if (dst != src) {
  3606       move(dst, src);
  3611 void MacroAssembler::encode_klass_not_null(Register r) {
  3612   if (Universe::narrow_klass_base() != NULL) {
  3613     assert(r != AT, "Encoding a klass in AT");
  3614     set64(AT, (int64_t)Universe::narrow_klass_base());
  3615     dsub(r, r, AT);
  3617   if (Universe::narrow_klass_shift() != 0) {
  3618     assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  3619     shr(r, LogKlassAlignmentInBytes);
  3623 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
  3624   if (dst == src) {
  3625     encode_klass_not_null(src);
  3626   } else {
  3627     if (Universe::narrow_klass_base() != NULL) {
  3628       set64(dst, (int64_t)Universe::narrow_klass_base());
  3629       dsub(dst, src, dst);
  3630       if (Universe::narrow_klass_shift() != 0) {
  3631         assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  3632         shr(dst, LogKlassAlignmentInBytes);
  3634     } else {
  3635       if (Universe::narrow_klass_shift() != 0) {
  3636         assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  3637         dsrl(dst, src, LogKlassAlignmentInBytes);
  3638       } else {
  3639         move(dst, src);
  3645 // Function instr_size_for_decode_klass_not_null() counts the instructions
  3646 // generated by decode_klass_not_null(register r) and reinit_heapbase(),
  3647 // when (Universe::heap() != NULL).  Hence, if the instructions they
  3648 // generate change, then this method needs to be updated.
  3649 int MacroAssembler::instr_size_for_decode_klass_not_null() {
  3650   assert (UseCompressedClassPointers, "only for compressed klass ptrs");
  3651   if (Universe::narrow_klass_base() != NULL) {
  3652     // mov64 + addq + shlq? + mov64  (for reinit_heapbase()).
  3653     return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10);
  3654   } else {
  3655     // longest load decode klass function, mov64, leaq
  3656     return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1);
  3660 void  MacroAssembler::decode_klass_not_null(Register r) {
  3661   assert (UseCompressedClassPointers, "should only be used for compressed headers");
  3662   assert(r != AT, "Decoding a klass in AT");
  3663   // Cannot assert, unverified entry point counts instructions (see .ad file)
  3664   // vtableStubs also counts instructions in pd_code_size_limit.
  3665   // Also do not verify_oop as this is called by verify_oop.
  3666   if (Universe::narrow_klass_shift() != 0) {
  3667     assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  3668     shl(r, LogKlassAlignmentInBytes);
  3670   if (Universe::narrow_klass_base() != NULL) {
  3671     set64(AT, (int64_t)Universe::narrow_klass_base());
  3672     daddu(r, r, AT);
  3673     //Not neccessary for MIPS at all.
  3674     //reinit_heapbase();
  3678 void  MacroAssembler::decode_klass_not_null(Register dst, Register src) {
  3679   assert (UseCompressedClassPointers, "should only be used for compressed headers");
  3681   if (dst == src) {
  3682     decode_klass_not_null(dst);
  3683   } else {
  3684     // Cannot assert, unverified entry point counts instructions (see .ad file)
  3685     // vtableStubs also counts instructions in pd_code_size_limit.
  3686     // Also do not verify_oop as this is called by verify_oop.
  3687     set64(dst, (int64_t)Universe::narrow_klass_base());
  3688     if (Universe::narrow_klass_shift() != 0) {
  3689       assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  3690       assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
  3691       dsll(AT, src, Address::times_8);
  3692       daddu(dst, dst, AT);
  3693     } else {
  3694       daddu(dst, src, dst);
  3699 void MacroAssembler::incrementl(Register reg, int value) {
  3700   if (value == min_jint) {
  3701      move(AT, value);
  3702      LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
  3703      return;
  3705   if (value <  0) { decrementl(reg, -value); return; }
  3706   if (value == 0) {                        ; return; }
  3708   if(Assembler::is_simm16(value)) {
  3709      NOT_LP64(addiu(reg, reg, value));
  3710      LP64_ONLY(move(AT, value); addu32(reg, reg, AT));
  3711   } else {
  3712      move(AT, value);
  3713      LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
  3717 void MacroAssembler::decrementl(Register reg, int value) {
  3718   if (value == min_jint) {
  3719      move(AT, value);
  3720      LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
  3721      return;
  3723   if (value <  0) { incrementl(reg, -value); return; }
  3724   if (value == 0) {                        ; return; }
  3726   if (Assembler::is_simm16(value)) {
  3727      NOT_LP64(addiu(reg, reg, -value));
  3728      LP64_ONLY(move(AT, value); subu32(reg, reg, AT));
  3729   } else {
  3730      move(AT, value);
  3731      LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
  3735 void MacroAssembler::reinit_heapbase() {
  3736   if (UseCompressedOops || UseCompressedClassPointers) {
  3737     if (Universe::heap() != NULL) {
  3738       if (Universe::narrow_oop_base() == NULL) {
  3739         move(S5_heapbase, R0);
  3740       } else {
  3741         set64(S5_heapbase, (int64_t)Universe::narrow_ptrs_base());
  3743     } else {
  3744       set64(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr());
  3745       ld(S5_heapbase, S5_heapbase, 0);
  3749 #endif // _LP64
  3751 void MacroAssembler::check_klass_subtype(Register sub_klass,
  3752                            Register super_klass,
  3753                            Register temp_reg,
  3754                            Label& L_success) {
  3755 //implement ind   gen_subtype_check
  3756   Label L_failure;
  3757   check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_failure, NULL);
  3758   check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
  3759   bind(L_failure);
  3762 SkipIfEqual::SkipIfEqual(
  3763     MacroAssembler* masm, const bool* flag_addr, bool value) {
  3764   _masm = masm;
  3765   _masm->li(AT, (address)flag_addr);
  3766   _masm->lb(AT,AT,0);
  3767   _masm->addi(AT,AT,-value);
  3768   _masm->beq(AT,R0,_label);
  3769   _masm->delayed()->nop();
  3771 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
  3772                                                    Register super_klass,
  3773                                                    Register temp_reg,
  3774                                                    Label* L_success,
  3775                                                    Label* L_failure,
  3776                                                    Label* L_slow_path,
  3777                                         RegisterOrConstant super_check_offset) {
  3778   assert_different_registers(sub_klass, super_klass, temp_reg);
  3779   bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
  3780   if (super_check_offset.is_register()) {
  3781     assert_different_registers(sub_klass, super_klass,
  3782                                super_check_offset.as_register());
  3783   } else if (must_load_sco) {
  3784     assert(temp_reg != noreg, "supply either a temp or a register offset");
  3787   Label L_fallthrough;
  3788   int label_nulls = 0;
  3789   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
  3790   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
  3791   if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
  3792   assert(label_nulls <= 1, "at most one NULL in the batch");
  3794   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
  3795   int sco_offset = in_bytes(Klass::super_check_offset_offset());
  3796   // If the pointers are equal, we are done (e.g., String[] elements).
  3797   // This self-check enables sharing of secondary supertype arrays among
  3798   // non-primary types such as array-of-interface.  Otherwise, each such
  3799   // type would need its own customized SSA.
  3800   // We move this check to the front of the fast path because many
  3801   // type checks are in fact trivially successful in this manner,
  3802   // so we get a nicely predicted branch right at the start of the check.
  3803   beq(sub_klass, super_klass, *L_success);
  3804   delayed()->nop();
  3805   // Check the supertype display:
  3806   if (must_load_sco) {
  3807     // Positive movl does right thing on LP64.
  3808     lwu(temp_reg, super_klass, sco_offset);
  3809     super_check_offset = RegisterOrConstant(temp_reg);
  3811   dsll(AT, super_check_offset.register_or_noreg(), Address::times_1);
  3812   daddu(AT, sub_klass, AT);
  3813   ld(AT, AT, super_check_offset.constant_or_zero()*Address::times_1);
  3815   // This check has worked decisively for primary supers.
  3816   // Secondary supers are sought in the super_cache ('super_cache_addr').
  3817   // (Secondary supers are interfaces and very deeply nested subtypes.)
  3818   // This works in the same check above because of a tricky aliasing
  3819   // between the super_cache and the primary super display elements.
  3820   // (The 'super_check_addr' can address either, as the case requires.)
  3821   // Note that the cache is updated below if it does not help us find
  3822   // what we need immediately.
  3823   // So if it was a primary super, we can just fail immediately.
  3824   // Otherwise, it's the slow path for us (no success at this point).
  3826   if (super_check_offset.is_register()) {
  3827     beq(super_klass, AT, *L_success);
  3828     delayed()->nop();
  3829     addi(AT, super_check_offset.as_register(), -sc_offset);
  3830     if (L_failure == &L_fallthrough) {
  3831       beq(AT, R0, *L_slow_path);
  3832       delayed()->nop();
  3833     } else {
  3834       bne_far(AT, R0, *L_failure);
  3835       delayed()->nop();
  3836       b(*L_slow_path);
  3837       delayed()->nop();
  3839   } else if (super_check_offset.as_constant() == sc_offset) {
  3840     // Need a slow path; fast failure is impossible.
  3841     if (L_slow_path == &L_fallthrough) {
  3842       beq(super_klass, AT, *L_success);
  3843       delayed()->nop();
  3844     } else {
  3845       bne(super_klass, AT, *L_slow_path);
  3846       delayed()->nop();
  3847       b(*L_success);
  3848       delayed()->nop();
  3850   } else {
  3851     // No slow path; it's a fast decision.
  3852     if (L_failure == &L_fallthrough) {
  3853       beq(super_klass, AT, *L_success);
  3854       delayed()->nop();
  3855     } else {
  3856       bne_far(super_klass, AT, *L_failure);
  3857       delayed()->nop();
  3858       b(*L_success);
  3859       delayed()->nop();
  3863   bind(L_fallthrough);
  3868 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
  3869                                                    Register super_klass,
  3870                                                    Register temp_reg,
  3871                                                    Register temp2_reg,
  3872                                                    Label* L_success,
  3873                                                    Label* L_failure,
  3874                                                    bool set_cond_codes) {
  3875   assert_different_registers(sub_klass, super_klass, temp_reg);
  3876   if (temp2_reg != noreg)
  3877     assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
  3878   else
  3879     temp2_reg = T9;
  3880 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
  3882   Label L_fallthrough;
  3883   int label_nulls = 0;
  3884   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
  3885   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
  3886   assert(label_nulls <= 1, "at most one NULL in the batch");
  3888   // a couple of useful fields in sub_klass:
  3889   int ss_offset = in_bytes(Klass::secondary_supers_offset());
  3890   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
  3891   Address secondary_supers_addr(sub_klass, ss_offset);
  3892   Address super_cache_addr(     sub_klass, sc_offset);
  3894   // Do a linear scan of the secondary super-klass chain.
  3895   // This code is rarely used, so simplicity is a virtue here.
  3896   // The repne_scan instruction uses fixed registers, which we must spill.
  3897   // Don't worry too much about pre-existing connections with the input regs.
  3899   // Get super_klass value into rax (even if it was in rdi or rcx).
  3900 #ifndef PRODUCT
  3901   int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
  3902   ExternalAddress pst_counter_addr((address) pst_counter);
  3903   NOT_LP64(  incrementl(pst_counter_addr) );
  3904 #endif //PRODUCT
  3906   // We will consult the secondary-super array.
  3907   ld(temp_reg, secondary_supers_addr);
  3908   // Load the array length.  (Positive movl does right thing on LP64.)
  3909   lw(temp2_reg, Address(temp_reg, Array<Klass*>::length_offset_in_bytes()));
  3910   // Skip to start of data.
  3911   daddiu(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes());
  3913   // Scan RCX words at [RDI] for an occurrence of RAX.
  3914   // Set NZ/Z based on last compare.
  3915   // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does
  3916   // not change flags (only scas instruction which is repeated sets flags).
  3917   // Set Z = 0 (not equal) before 'repne' to indicate that class was not found.
  3919   // OpenJDK8 never compresses klass pointers in secondary-super array.
  3920   Label Loop, subtype;
  3921   bind(Loop);
  3922   beq(temp2_reg, R0, *L_failure);
  3923   delayed()->nop();
  3924   ld(AT, temp_reg, 0);
  3925   beq(AT, super_klass, subtype);
  3926   delayed()->daddi(temp_reg, temp_reg, 1 * wordSize);
  3927   b(Loop);
  3928   delayed()->daddi(temp2_reg, temp2_reg, -1);
  3930   bind(subtype);
  3931   sd(super_klass, super_cache_addr);
  3932   if (L_success != &L_fallthrough) {
  3933     b(*L_success);
  3934     delayed()->nop();
  3937   // Success.  Cache the super we found and proceed in triumph.
  3938 #undef IS_A_TEMP
  3940   bind(L_fallthrough);
  3943 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
  3944   ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
  3945   sd(R0, Address(java_thread, JavaThread::vm_result_offset()));
  3946   verify_oop(oop_result, "broken oop in call_VM_base");
  3949 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
  3950   ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
  3951   sd(R0, Address(java_thread, JavaThread::vm_result_2_offset()));
  3954 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
  3955                                          int extra_slot_offset) {
  3956   // cf. TemplateTable::prepare_invoke(), if (load_receiver).
  3957   int stackElementSize = Interpreter::stackElementSize;
  3958   int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
  3959 #ifdef ASSERT
  3960   int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
  3961   assert(offset1 - offset == stackElementSize, "correct arithmetic");
  3962 #endif
  3963   Register             scale_reg    = NOREG;
  3964   Address::ScaleFactor scale_factor = Address::no_scale;
  3965   if (arg_slot.is_constant()) {
  3966     offset += arg_slot.as_constant() * stackElementSize;
  3967   } else {
  3968     scale_reg    = arg_slot.as_register();
  3969     scale_factor = Address::times_8;
  3971   // We don't push RA on stack in prepare_invoke.
  3972   //  offset += wordSize;           // return PC is on stack
  3973   if(scale_reg==NOREG) return Address(SP, offset);
  3974   else {
  3975   dsll(scale_reg, scale_reg, scale_factor);
  3976   daddu(scale_reg, SP, scale_reg);
  3977   return Address(scale_reg, offset);
  3981 SkipIfEqual::~SkipIfEqual() {
  3982   _masm->bind(_label);
  3985 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
  3986   switch (size_in_bytes) {
  3987 #ifndef _LP64
  3988   case  8:
  3989     assert(dst2 != noreg, "second dest register required");
  3990     lw(dst,  src);
  3991     lw(dst2, src.plus_disp(BytesPerInt));
  3992     break;
  3993 #else
  3994   case  8:  ld(dst, src); break;
  3995 #endif
  3996   case  4:  lw(dst, src); break;
  3997   case  2:  is_signed ? lh(dst, src) : lhu(dst, src); break;
  3998   case  1:  is_signed ? lb( dst, src) : lbu( dst, src); break;
  3999   default:  ShouldNotReachHere();
  4003 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
  4004   switch (size_in_bytes) {
  4005 #ifndef _LP64
  4006   case  8:
  4007     assert(src2 != noreg, "second source register required");
  4008     sw(src, dst);
  4009     sw(src2, dst.plus_disp(BytesPerInt));
  4010     break;
  4011 #else
  4012   case  8:  sd(src, dst); break;
  4013 #endif
  4014   case  4:  sw(src, dst); break;
  4015   case  2:  sh(src, dst); break;
  4016   case  1:  sb(src, dst); break;
  4017   default:  ShouldNotReachHere();
  4021 // Look up the method for a megamorphic invokeinterface call.
  4022 // The target method is determined by <intf_klass, itable_index>.
  4023 // The receiver klass is in recv_klass.
  4024 // On success, the result will be in method_result, and execution falls through.
  4025 // On failure, execution transfers to the given label.
  4026 void MacroAssembler::lookup_interface_method(Register recv_klass,
  4027                                              Register intf_klass,
  4028                                              RegisterOrConstant itable_index,
  4029                                              Register method_result,
  4030                                              Register scan_temp,
  4031                                              Label& L_no_such_interface,
  4032                                              bool return_method) {
  4033   assert_different_registers(recv_klass, intf_klass, scan_temp, AT);
  4034   assert_different_registers(method_result, intf_klass, scan_temp, AT);
  4035   assert(recv_klass != method_result || !return_method,
  4036          "recv_klass can be destroyed when method isn't needed");
  4038   assert(itable_index.is_constant() || itable_index.as_register() == method_result,
  4039          "caller must use same register for non-constant itable index as for method");
  4041   // Compute start of first itableOffsetEntry (which is at the end of the vtable)
  4042   int vtable_base = InstanceKlass::vtable_start_offset() * wordSize;
  4043   int itentry_off = itableMethodEntry::method_offset_in_bytes();
  4044   int scan_step   = itableOffsetEntry::size() * wordSize;
  4045   int vte_size    = vtableEntry::size() * wordSize;
  4046   Address::ScaleFactor times_vte_scale = Address::times_ptr;
  4047   assert(vte_size == wordSize, "else adjust times_vte_scale");
  4049   lw(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize));
  4051   // %%% Could store the aligned, prescaled offset in the klassoop.
  4052   dsll(scan_temp, scan_temp, times_vte_scale);
  4053   daddu(scan_temp, recv_klass, scan_temp);
  4054   daddiu(scan_temp, scan_temp, vtable_base);
  4055   if (HeapWordsPerLong > 1) {
  4056     // Round up to align_object_offset boundary
  4057     // see code for InstanceKlass::start_of_itable!
  4058     round_to(scan_temp, BytesPerLong);
  4061   if (return_method) {
  4062     // Adjust recv_klass by scaled itable_index, so we can free itable_index.
  4063     assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
  4064     if (itable_index.is_constant()) {
  4065       set64(AT, (int)itable_index.is_constant());
  4066       dsll(AT, AT, (int)Address::times_ptr);
  4067     } else {
  4068       dsll(AT, itable_index.as_register(), (int)Address::times_ptr);
  4070     daddu(AT, AT, recv_klass);
  4071     daddiu(recv_klass, AT, itentry_off);
  4074   Label search, found_method;
  4076   for (int peel = 1; peel >= 0; peel--) {
  4077     ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
  4079     if (peel) {
  4080       beq(intf_klass, method_result, found_method);
  4081       delayed()->nop();
  4082     } else {
  4083       bne(intf_klass, method_result, search);
  4084       delayed()->nop();
  4085       // (invert the test to fall through to found_method...)
  4088     if (!peel)  break;
  4090     bind(search);
  4092     // Check that the previous entry is non-null.  A null entry means that
  4093     // the receiver class doesn't implement the interface, and wasn't the
  4094     // same as when the caller was compiled.
  4095     beq(method_result, R0, L_no_such_interface);
  4096     delayed()->nop();
  4097     daddiu(scan_temp, scan_temp, scan_step);
  4100   bind(found_method);
  4102   if (return_method) {
  4103     // Got a hit.
  4104     lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
  4105     if(UseLoongsonISA) {
  4106       gsldx(method_result, recv_klass, scan_temp, 0);
  4107     } else {
  4108       daddu(AT, recv_klass, scan_temp);
  4109       ld(method_result, AT);
  4114 // virtual method calling
  4115 void MacroAssembler::lookup_virtual_method(Register recv_klass,
  4116                                            RegisterOrConstant vtable_index,
  4117                                            Register method_result) {
  4118   Register tmp = GP;
  4119   push(tmp);
  4121   if (vtable_index.is_constant()) {
  4122     assert_different_registers(recv_klass, method_result, tmp);
  4123   } else {
  4124     assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp);
  4126   const int base = InstanceKlass::vtable_start_offset() * wordSize;
  4127   assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
  4128 /*
  4129   Address vtable_entry_addr(recv_klass,
  4130                             vtable_index, Address::times_ptr,
  4131                             base + vtableEntry::method_offset_in_bytes());
  4132 */
  4133   if (vtable_index.is_constant()) {
  4134     set64(AT, vtable_index.as_constant());
  4135     dsll(AT, AT, (int)Address::times_ptr);
  4136   } else {
  4137     dsll(AT, vtable_index.as_register(), (int)Address::times_ptr);
  4139   set64(tmp, base + vtableEntry::method_offset_in_bytes());
  4140   daddu(tmp, tmp, AT);
  4141   daddu(tmp, tmp, recv_klass);
  4142   ld(method_result, tmp, 0);
  4144   pop(tmp);
  4147 void MacroAssembler::store_for_type_by_register(Register src_reg, Register tmp_reg, int disp, BasicType type, bool wide) {
  4148   switch (type) {
  4149     case T_LONG:
  4150       st_ptr(src_reg, tmp_reg, disp);
  4151       break;
  4152     case T_ARRAY:
  4153     case T_OBJECT:
  4154       if (UseCompressedOops && !wide) {
  4155         sw(src_reg, tmp_reg, disp);
  4156       } else {
  4157         st_ptr(src_reg, tmp_reg, disp);
  4159       break;
  4160     case T_ADDRESS:
  4161       st_ptr(src_reg, tmp_reg, disp);
  4162       break;
  4163     case T_INT:
  4164       sw(src_reg, tmp_reg, disp);
  4165       break;
  4166     case T_CHAR:
  4167     case T_SHORT:
  4168       sh(src_reg, tmp_reg, disp);
  4169       break;
  4170     case T_BYTE:
  4171     case T_BOOLEAN:
  4172       sb(src_reg, tmp_reg, disp);
  4173       break;
  4174     default:
  4175       ShouldNotReachHere();
  4179 void MacroAssembler::store_for_type(Register src_reg, Address addr, BasicType type, bool wide) {
  4180   Register tmp_reg = T9;
  4181   Register index_reg = addr.index();
  4182   if (index_reg == NOREG) {
  4183     tmp_reg = NOREG;
  4186   int scale = addr.scale();
  4187   if (tmp_reg != NOREG && scale >= 0) {
  4188     dsll(tmp_reg, index_reg, scale);
  4191   int disp = addr.disp();
  4192   bool disp_is_simm16 = true;
  4193   if (!Assembler::is_simm16(disp)) {
  4194     disp_is_simm16 = false;
  4197   Register base_reg = addr.base();
  4198   if (tmp_reg != NOREG) {
  4199     assert_different_registers(tmp_reg, base_reg, index_reg);
  4202   if (tmp_reg != NOREG) {
  4203     daddu(tmp_reg, base_reg, tmp_reg);
  4204     if (!disp_is_simm16) {
  4205       move(tmp_reg, disp);
  4206       daddu(tmp_reg, base_reg, tmp_reg);
  4208     store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
  4209   } else {
  4210     if (!disp_is_simm16) {
  4211       tmp_reg = T9;
  4212       assert_different_registers(tmp_reg, base_reg);
  4213       move(tmp_reg, disp);
  4214       daddu(tmp_reg, base_reg, tmp_reg);
  4216     store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
  4220 void MacroAssembler::store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type) {
  4221   switch (type) {
  4222     case T_DOUBLE:
  4223       sdc1(src_reg, tmp_reg, disp);
  4224       break;
  4225     case T_FLOAT:
  4226       swc1(src_reg, tmp_reg, disp);
  4227       break;
  4228     default:
  4229       ShouldNotReachHere();
  4233 void MacroAssembler::store_for_type(FloatRegister src_reg, Address addr, BasicType type) {
  4234   Register tmp_reg = T9;
  4235   Register index_reg = addr.index();
  4236   if (index_reg == NOREG) {
  4237     tmp_reg = NOREG;
  4240   int scale = addr.scale();
  4241   if (tmp_reg != NOREG && scale >= 0) {
  4242     dsll(tmp_reg, index_reg, scale);
  4245   int disp = addr.disp();
  4246   bool disp_is_simm16 = true;
  4247   if (!Assembler::is_simm16(disp)) {
  4248     disp_is_simm16 = false;
  4251   Register base_reg = addr.base();
  4252   if (tmp_reg != NOREG) {
  4253     assert_different_registers(tmp_reg, base_reg, index_reg);
  4256   if (tmp_reg != NOREG) {
  4257     daddu(tmp_reg, base_reg, tmp_reg);
  4258     if (!disp_is_simm16) {
  4259       move(tmp_reg, disp);
  4260       daddu(tmp_reg, base_reg, tmp_reg);
  4262     store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type);
  4263   } else {
  4264     if (!disp_is_simm16) {
  4265       tmp_reg = T9;
  4266       assert_different_registers(tmp_reg, base_reg);
  4267       move(tmp_reg, disp);
  4268       daddu(tmp_reg, base_reg, tmp_reg);
  4270     store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type);
  4274 void MacroAssembler::load_for_type_by_register(Register dst_reg, Register tmp_reg, int disp, BasicType type, bool wide) {
  4275   switch (type) {
  4276     case T_LONG:
  4277       ld_ptr(dst_reg, tmp_reg, disp);
  4278       break;
  4279     case T_ARRAY:
  4280     case T_OBJECT:
  4281       if (UseCompressedOops && !wide) {
  4282         lwu(dst_reg, tmp_reg, disp);
  4283       } else {
  4284         ld_ptr(dst_reg, tmp_reg, disp);
  4286       break;
  4287     case T_ADDRESS:
  4288       if (UseCompressedClassPointers && disp == oopDesc::klass_offset_in_bytes()) {
  4289         lwu(dst_reg, tmp_reg, disp);
  4290       } else {
  4291         ld_ptr(dst_reg, tmp_reg, disp);
  4293       break;
  4294     case T_INT:
  4295       lw(dst_reg, tmp_reg, disp);
  4296       break;
  4297     case T_CHAR:
  4298       lhu(dst_reg, tmp_reg, disp);
  4299       break;
  4300     case T_SHORT:
  4301       lh(dst_reg, tmp_reg, disp);
  4302       break;
  4303     case T_BYTE:
  4304     case T_BOOLEAN:
  4305       lb(dst_reg, tmp_reg, disp);
  4306       break;
  4307     default:
  4308       ShouldNotReachHere();
  4312 int MacroAssembler::load_for_type(Register dst_reg, Address addr, BasicType type, bool wide) {
  4313   int code_offset = 0;
  4314   Register tmp_reg = T9;
  4315   Register index_reg = addr.index();
  4316   if (index_reg == NOREG) {
  4317     tmp_reg = NOREG;
  4320   int scale = addr.scale();
  4321   if (tmp_reg != NOREG && scale >= 0) {
  4322     dsll(tmp_reg, index_reg, scale);
  4325   int disp = addr.disp();
  4326   bool disp_is_simm16 = true;
  4327   if (!Assembler::is_simm16(disp)) {
  4328     disp_is_simm16 = false;
  4331   Register base_reg = addr.base();
  4332   if (tmp_reg != NOREG) {
  4333     assert_different_registers(tmp_reg, base_reg, index_reg);
  4336   if (tmp_reg != NOREG) {
  4337     daddu(tmp_reg, base_reg, tmp_reg);
  4338     if (!disp_is_simm16) {
  4339       move(tmp_reg, disp);
  4340       daddu(tmp_reg, base_reg, tmp_reg);
  4342     code_offset = offset();
  4343     load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
  4344   } else {
  4345     if (!disp_is_simm16) {
  4346       tmp_reg = T9;
  4347       assert_different_registers(tmp_reg, base_reg);
  4348       move(tmp_reg, disp);
  4349       daddu(tmp_reg, base_reg, tmp_reg);
  4351     code_offset = offset();
  4352     load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
  4355   return code_offset;
  4358 void MacroAssembler::load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type) {
  4359   switch (type) {
  4360     case T_DOUBLE:
  4361       ldc1(dst_reg, tmp_reg, disp);
  4362       break;
  4363     case T_FLOAT:
  4364       lwc1(dst_reg, tmp_reg, disp);
  4365       break;
  4366     default:
  4367       ShouldNotReachHere();
  4371 int MacroAssembler::load_for_type(FloatRegister dst_reg, Address addr, BasicType type) {
  4372   int code_offset = 0;
  4373   Register tmp_reg = T9;
  4374   Register index_reg = addr.index();
  4375   if (index_reg == NOREG) {
  4376     tmp_reg = NOREG;
  4379   int scale = addr.scale();
  4380   if (tmp_reg != NOREG && scale >= 0) {
  4381     dsll(tmp_reg, index_reg, scale);
  4384   int disp = addr.disp();
  4385   bool disp_is_simm16 = true;
  4386   if (!Assembler::is_simm16(disp)) {
  4387     disp_is_simm16 = false;
  4390   Register base_reg = addr.base();
  4391   if (tmp_reg != NOREG) {
  4392     assert_different_registers(tmp_reg, base_reg, index_reg);
  4395   if (tmp_reg != NOREG) {
  4396     daddu(tmp_reg, base_reg, tmp_reg);
  4397     if (!disp_is_simm16) {
  4398       move(tmp_reg, disp);
  4399       daddu(tmp_reg, base_reg, tmp_reg);
  4401     code_offset = offset();
  4402     load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type);
  4403   } else {
  4404     if (!disp_is_simm16) {
  4405       tmp_reg = T9;
  4406       assert_different_registers(tmp_reg, base_reg);
  4407       move(tmp_reg, disp);
  4408       daddu(tmp_reg, base_reg, tmp_reg);
  4410     code_offset = offset();
  4411     load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type);
  4414   return code_offset;

mercurial