src/cpu/mips/vm/macroAssembler_mips.cpp

Sat, 07 Nov 2020 10:30:02 +0800

author
aoqi
date
Sat, 07 Nov 2020 10:30:02 +0800
changeset 10026
8c95980d0b66
parent 9932
86ea9a02a717
permissions
-rw-r--r--

Added tag mips-jdk8u275-b01 for changeset d3b4d62f391f

     1 /*
     2  * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
     3  * Copyright (c) 2017, 2020, Loongson Technology. All rights reserved.
     4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     5  *
     6  * This code is free software; you can redistribute it and/or modify it
     7  * under the terms of the GNU General Public License version 2 only, as
     8  * published by the Free Software Foundation.
     9  *
    10  * This code is distributed in the hope that it will be useful, but WITHOUT
    11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    13  * version 2 for more details (a copy is included in the LICENSE file that
    14  * accompanied this code).
    15  *
    16  * You should have received a copy of the GNU General Public License version
    17  * 2 along with this work; if not, write to the Free Software Foundation,
    18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    19  *
    20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    21  * or visit www.oracle.com if you need additional information or have any
    22  * questions.
    23  *
    24  */
    26 #include "precompiled.hpp"
    27 #include "asm/assembler.hpp"
    28 #include "asm/assembler.inline.hpp"
    29 #include "asm/macroAssembler.inline.hpp"
    30 #include "compiler/disassembler.hpp"
    31 #include "gc_interface/collectedHeap.inline.hpp"
    32 #include "interpreter/interpreter.hpp"
    33 #include "memory/cardTableModRefBS.hpp"
    34 #include "memory/resourceArea.hpp"
    35 #include "memory/universe.hpp"
    36 #include "prims/methodHandles.hpp"
    37 #include "runtime/biasedLocking.hpp"
    38 #include "runtime/interfaceSupport.hpp"
    39 #include "runtime/objectMonitor.hpp"
    40 #include "runtime/os.hpp"
    41 #include "runtime/sharedRuntime.hpp"
    42 #include "runtime/stubRoutines.hpp"
    43 #include "utilities/macros.hpp"
    44 #if INCLUDE_ALL_GCS
    45 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
    46 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
    47 #include "gc_implementation/g1/heapRegion.hpp"
    48 #endif // INCLUDE_ALL_GCS
    50 #define A0 RA0
    51 #define A1 RA1
    52 #define A2 RA2
    53 #define A3 RA3
    54 #define A4 RA4
    55 #define A5 RA5
    56 #define A6 RA6
    57 #define A7 RA7
    58 #define T0 RT0
    59 #define T1 RT1
    60 #define T2 RT2
    61 #define T3 RT3
    62 #define T8 RT8
    63 #define T9 RT9
    65 // Implementation of MacroAssembler
    67 intptr_t MacroAssembler::i[32] = {0};
    68 float MacroAssembler::f[32] = {0.0};
    70 void MacroAssembler::print(outputStream *s) {
    71   unsigned int k;
    72   for(k=0; k<sizeof(i)/sizeof(i[0]); k++) {
    73     s->print_cr("i%d = 0x%.16lx", k, i[k]);
    74   }
    75   s->cr();
    77   for(k=0; k<sizeof(f)/sizeof(f[0]); k++) {
    78     s->print_cr("f%d = %f", k, f[k]);
    79   }
    80   s->cr();
    81 }
    83 int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; }
    84 int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; }
    86 void MacroAssembler::save_registers(MacroAssembler *masm) {
    87 #define __ masm->
    88   for(int k=0; k<32; k++) {
    89     __ sw (as_Register(k), A0, i_offset(k));
    90   }
    92   for(int k=0; k<32; k++) {
    93     __ swc1 (as_FloatRegister(k), A0, f_offset(k));
    94   }
    95 #undef __
    96 }
    98 void MacroAssembler::restore_registers(MacroAssembler *masm) {
    99 #define __ masm->
   100   for(int k=0; k<32; k++) {
   101     __ lw (as_Register(k), A0, i_offset(k));
   102   }
   104   for(int k=0; k<32; k++) {
   105     __ lwc1 (as_FloatRegister(k), A0, f_offset(k));
   106   }
   107 #undef __
   108 }
   111 void MacroAssembler::pd_patch_instruction(address branch, address target) {
   112   jint& stub_inst = *(jint*) branch;
   113   jint *pc = (jint *)branch;
   115   if((opcode(stub_inst) == special_op) && (special(stub_inst) == dadd_op)) {
   116     //b_far:
   117     //  move(AT, RA); // dadd
   118     //  emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
   119     //  nop();
   120     //  lui(T9, 0); // to be patched
   121     //  ori(T9, 0);
   122     //  daddu(T9, T9, RA);
   123     //  move(RA, AT);
   124     //  jr(T9);
   126     assert(opcode(pc[3]) == lui_op
   127         && opcode(pc[4]) == ori_op
   128         && special(pc[5]) == daddu_op, "Not a branch label patch");
   129     if(!(opcode(pc[3]) == lui_op
   130           && opcode(pc[4]) == ori_op
   131           && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); }
   133     int offset = target - branch;
   134     if (!is_simm16(offset)) {
   135       pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12);
   136       pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12);
   137     } else {
   138       // revert to "beq + nop"
   139       CodeBuffer cb(branch, 4 * 10);
   140       MacroAssembler masm(&cb);
   141 #define __ masm.
   142       __ b(target);
   143       __ delayed()->nop();
   144       __ nop();
   145       __ nop();
   146       __ nop();
   147       __ nop();
   148       __ nop();
   149       __ nop();
   150     }
   151     return;
   152   } else if (special(pc[4]) == jr_op
   153              && opcode(pc[4]) == special_op
   154              && (((opcode(pc[0]) == lui_op) || opcode(pc[0]) == daddiu_op) || (opcode(pc[0]) == ori_op))) {
   155     //jmp_far:
   156     //  patchable_set48(T9, target);
   157     //  jr(T9);
   158     //  nop();
   160     CodeBuffer cb(branch, 4 * 4);
   161     MacroAssembler masm(&cb);
   162     masm.patchable_set48(T9, (long)(target));
   163     return;
   164   }
   166 #ifndef PRODUCT
   167   if (!is_simm16((target - branch - 4) >> 2)) {
   168     tty->print_cr("Illegal patching: branch = 0x%lx, target = 0x%lx", branch, target);
   169     tty->print_cr("======= Start decoding at branch = 0x%lx =======", branch);
   170     Disassembler::decode(branch - 4 * 16, branch + 4 * 16, tty);
   171     tty->print_cr("======= End of decoding =======");
   172   }
   173 #endif
   175   stub_inst = patched_branch(target - branch, stub_inst, 0);
   176 }
   178 static inline address first_cache_address() {
   179   return CodeCache::low_bound() + sizeof(HeapBlock::Header);
   180 }
   182 static inline address last_cache_address() {
   183   return CodeCache::high_bound() - Assembler::InstructionSize;
   184 }
   186 int MacroAssembler::call_size(address target, bool far, bool patchable) {
   187   if (patchable) return 6 << Assembler::LogInstructionSize;
   188   if (!far) return 2 << Assembler::LogInstructionSize; // jal + nop
   189   return (insts_for_set64((jlong)target) + 2) << Assembler::LogInstructionSize;
   190 }
   192 // Can we reach target using jal/j from anywhere
   193 // in the code cache (because code can be relocated)?
   194 bool MacroAssembler::reachable_from_cache(address target) {
   195   address cl = first_cache_address();
   196   address ch = last_cache_address();
   198   return (cl <= target) && (target <= ch) && fit_in_jal(cl, ch);
   199 }
   201 void MacroAssembler::general_jump(address target) {
   202   if (reachable_from_cache(target)) {
   203     j(target);
   204     delayed()->nop();
   205   } else {
   206     set64(T9, (long)target);
   207     jr(T9);
   208     delayed()->nop();
   209   }
   210 }
   212 int MacroAssembler::insts_for_general_jump(address target) {
   213   if (reachable_from_cache(target)) {
   214     //j(target);
   215     //nop();
   216     return 2;
   217   } else {
   218     //set64(T9, (long)target);
   219     //jr(T9);
   220     //nop();
   221     return insts_for_set64((jlong)target) + 2;
   222   }
   223 }
   225 void MacroAssembler::patchable_jump(address target) {
   226   if (reachable_from_cache(target)) {
   227     nop();
   228     nop();
   229     nop();
   230     nop();
   231     j(target);
   232     delayed()->nop();
   233   } else {
   234     patchable_set48(T9, (long)target);
   235     jr(T9);
   236     delayed()->nop();
   237   }
   238 }
   240 int MacroAssembler::insts_for_patchable_jump(address target) {
   241   return 6;
   242 }
   244 void MacroAssembler::general_call(address target) {
   245   if (reachable_from_cache(target)) {
   246     jal(target);
   247     delayed()->nop();
   248   } else {
   249     set64(T9, (long)target);
   250     jalr(T9);
   251     delayed()->nop();
   252   }
   253 }
   255 int MacroAssembler::insts_for_general_call(address target) {
   256   if (reachable_from_cache(target)) {
   257     //jal(target);
   258     //nop();
   259     return 2;
   260   } else {
   261     //set64(T9, (long)target);
   262     //jalr(T9);
   263     //nop();
   264     return insts_for_set64((jlong)target) + 2;
   265   }
   266 }
   268 void MacroAssembler::patchable_call(address target) {
   269   if (reachable_from_cache(target)) {
   270     nop();
   271     nop();
   272     nop();
   273     nop();
   274     jal(target);
   275     delayed()->nop();
   276   } else {
   277     patchable_set48(T9, (long)target);
   278     jalr(T9);
   279     delayed()->nop();
   280   }
   281 }
   283 int MacroAssembler::insts_for_patchable_call(address target) {
   284   return 6;
   285 }
   287 void MacroAssembler::beq_far(Register rs, Register rt, address entry) {
   288   u_char * cur_pc = pc();
   290   // Near/Far jump
   291   if(is_simm16((entry - pc() - 4) / 4)) {
   292     Assembler::beq(rs, rt, offset(entry));
   293   } else {
   294     Label not_jump;
   295     bne(rs, rt, not_jump);
   296     delayed()->nop();
   298     b_far(entry);
   299     delayed()->nop();
   301     bind(not_jump);
   302     has_delay_slot();
   303   }
   304 }
   306 void MacroAssembler::beq_far(Register rs, Register rt, Label& L) {
   307   if (L.is_bound()) {
   308     beq_far(rs, rt, target(L));
   309   } else {
   310     u_char * cur_pc = pc();
   311     Label not_jump;
   312     bne(rs, rt, not_jump);
   313     delayed()->nop();
   315     b_far(L);
   316     delayed()->nop();
   318     bind(not_jump);
   319     has_delay_slot();
   320   }
   321 }
   323 void MacroAssembler::bne_far(Register rs, Register rt, address entry) {
   324   u_char * cur_pc = pc();
   326   //Near/Far jump
   327   if(is_simm16((entry - pc() - 4) / 4)) {
   328     Assembler::bne(rs, rt, offset(entry));
   329   } else {
   330     Label not_jump;
   331     beq(rs, rt, not_jump);
   332     delayed()->nop();
   334     b_far(entry);
   335     delayed()->nop();
   337     bind(not_jump);
   338     has_delay_slot();
   339   }
   340 }
   342 void MacroAssembler::bne_far(Register rs, Register rt, Label& L) {
   343   if (L.is_bound()) {
   344     bne_far(rs, rt, target(L));
   345   } else {
   346     u_char * cur_pc = pc();
   347     Label not_jump;
   348     beq(rs, rt, not_jump);
   349     delayed()->nop();
   351     b_far(L);
   352     delayed()->nop();
   354     bind(not_jump);
   355     has_delay_slot();
   356   }
   357 }
   359 void MacroAssembler::beq_long(Register rs, Register rt, Label& L) {
   360   Label not_taken;
   362   bne(rs, rt, not_taken);
   363   delayed()->nop();
   365   jmp_far(L);
   367   bind(not_taken);
   368 }
   370 void MacroAssembler::bne_long(Register rs, Register rt, Label& L) {
   371   Label not_taken;
   373   beq(rs, rt, not_taken);
   374   delayed()->nop();
   376   jmp_far(L);
   378   bind(not_taken);
   379 }
   381 void MacroAssembler::bc1t_long(Label& L) {
   382   Label not_taken;
   384   bc1f(not_taken);
   385   delayed()->nop();
   387   jmp_far(L);
   389   bind(not_taken);
   390 }
   392 void MacroAssembler::bc1f_long(Label& L) {
   393   Label not_taken;
   395   bc1t(not_taken);
   396   delayed()->nop();
   398   jmp_far(L);
   400   bind(not_taken);
   401 }
   403 void MacroAssembler::b_far(Label& L) {
   404   if (L.is_bound()) {
   405     b_far(target(L));
   406   } else {
   407     volatile address dest = target(L);
   408 //
   409 // MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8
   410 //   0x00000055651ed514: dadd at, ra, zero
   411 //   0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520
   412 //
   413 //   0x00000055651ed51c: sll zero, zero, 0
   414 //   0x00000055651ed520: lui t9, 0x0
   415 //   0x00000055651ed524: ori t9, t9, 0x21b8
   416 //   0x00000055651ed528: daddu t9, t9, ra
   417 //   0x00000055651ed52c: dadd ra, at, zero
   418 //   0x00000055651ed530: jr t9
   419 //   0x00000055651ed534: sll zero, zero, 0
   420 //
   421     move(AT, RA);
   422     emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
   423     nop();
   424     lui(T9, 0); // to be patched
   425     ori(T9, T9, 0);
   426     daddu(T9, T9, RA);
   427     move(RA, AT);
   428     jr(T9);
   429   }
   430 }
   432 void MacroAssembler::b_far(address entry) {
   433   u_char * cur_pc = pc();
   435   // Near/Far jump
   436   if(is_simm16((entry - pc() - 4) / 4)) {
   437     b(offset(entry));
   438   } else {
   439     // address must be bounded
   440     move(AT, RA);
   441     emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
   442     nop();
   443     li32(T9, entry - pc());
   444     daddu(T9, T9, RA);
   445     move(RA, AT);
   446     jr(T9);
   447   }
   448 }
   450 void MacroAssembler::ld_ptr(Register rt, Register offset, Register base) {
   451   addu_long(AT, base, offset);
   452   ld_ptr(rt, 0, AT);
   453 }
   455 void MacroAssembler::st_ptr(Register rt, Register offset, Register base) {
   456   addu_long(AT, base, offset);
   457   st_ptr(rt, 0, AT);
   458 }
   460 void MacroAssembler::ld_long(Register rt, Register offset, Register base) {
   461   addu_long(AT, base, offset);
   462   ld_long(rt, 0, AT);
   463 }
   465 void MacroAssembler::st_long(Register rt, Register offset, Register base) {
   466   addu_long(AT, base, offset);
   467   st_long(rt, 0, AT);
   468 }
   470 Address MacroAssembler::as_Address(AddressLiteral adr) {
   471   return Address(adr.target(), adr.rspec());
   472 }
   474 Address MacroAssembler::as_Address(ArrayAddress adr) {
   475   return Address::make_array(adr);
   476 }
   478 // tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved).
   479 void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) {
   480   Label again;
   482   li(tmp_reg1, counter_addr);
   483   bind(again);
   484   if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync();
   485   ll(tmp_reg2, tmp_reg1, 0);
   486   addi(tmp_reg2, tmp_reg2, inc);
   487   sc(tmp_reg2, tmp_reg1, 0);
   488   beq(tmp_reg2, R0, again);
   489   delayed()->nop();
   490 }
   492 int MacroAssembler::biased_locking_enter(Register lock_reg,
   493                                          Register obj_reg,
   494                                          Register swap_reg,
   495                                          Register tmp_reg,
   496                                          bool swap_reg_contains_mark,
   497                                          Label& done,
   498                                          Label* slow_case,
   499                                          BiasedLockingCounters* counters) {
   500   assert(UseBiasedLocking, "why call this otherwise?");
   501   bool need_tmp_reg = false;
   502   if (tmp_reg == noreg) {
   503     need_tmp_reg = true;
   504     tmp_reg = T9;
   505   }
   506   assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT);
   507   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
   508   Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
   509   Address saved_mark_addr(lock_reg, 0);
   511   // Biased locking
   512   // See whether the lock is currently biased toward our thread and
   513   // whether the epoch is still valid
   514   // Note that the runtime guarantees sufficient alignment of JavaThread
   515   // pointers to allow age to be placed into low bits
   516   // First check to see whether biasing is even enabled for this object
   517   Label cas_label;
   518   int null_check_offset = -1;
   519   if (!swap_reg_contains_mark) {
   520     null_check_offset = offset();
   521     ld_ptr(swap_reg, mark_addr);
   522   }
   524   if (need_tmp_reg) {
   525     push(tmp_reg);
   526   }
   527   move(tmp_reg, swap_reg);
   528   andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
   529 #ifdef _LP64
   530   daddi(AT, R0, markOopDesc::biased_lock_pattern);
   531   dsub(AT, AT, tmp_reg);
   532 #else
   533   addi(AT, R0, markOopDesc::biased_lock_pattern);
   534   sub(AT, AT, tmp_reg);
   535 #endif
   536   if (need_tmp_reg) {
   537     pop(tmp_reg);
   538   }
   540   bne(AT, R0, cas_label);
   541   delayed()->nop();
   544   // The bias pattern is present in the object's header. Need to check
   545   // whether the bias owner and the epoch are both still current.
   546   // Note that because there is no current thread register on MIPS we
   547   // need to store off the mark word we read out of the object to
   548   // avoid reloading it and needing to recheck invariants below. This
   549   // store is unfortunate but it makes the overall code shorter and
   550   // simpler.
   551   st_ptr(swap_reg, saved_mark_addr);
   552   if (need_tmp_reg) {
   553     push(tmp_reg);
   554   }
   555   if (swap_reg_contains_mark) {
   556     null_check_offset = offset();
   557   }
   558   load_prototype_header(tmp_reg, obj_reg);
   559   xorr(tmp_reg, tmp_reg, swap_reg);
   560   get_thread(swap_reg);
   561   xorr(swap_reg, swap_reg, tmp_reg);
   563   move(AT, ~((int) markOopDesc::age_mask_in_place));
   564   andr(swap_reg, swap_reg, AT);
   566   if (PrintBiasedLockingStatistics) {
   567     Label L;
   568     bne(swap_reg, R0, L);
   569     delayed()->nop();
   570     push(tmp_reg);
   571     push(A0);
   572     atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg);
   573     pop(A0);
   574     pop(tmp_reg);
   575     bind(L);
   576   }
   577   if (need_tmp_reg) {
   578     pop(tmp_reg);
   579   }
   580   beq(swap_reg, R0, done);
   581   delayed()->nop();
   582   Label try_revoke_bias;
   583   Label try_rebias;
   585   // At this point we know that the header has the bias pattern and
   586   // that we are not the bias owner in the current epoch. We need to
   587   // figure out more details about the state of the header in order to
   588   // know what operations can be legally performed on the object's
   589   // header.
   591   // If the low three bits in the xor result aren't clear, that means
   592   // the prototype header is no longer biased and we have to revoke
   593   // the bias on this object.
   595   move(AT, markOopDesc::biased_lock_mask_in_place);
   596   andr(AT, swap_reg, AT);
   597   bne(AT, R0, try_revoke_bias);
   598   delayed()->nop();
   599   // Biasing is still enabled for this data type. See whether the
   600   // epoch of the current bias is still valid, meaning that the epoch
   601   // bits of the mark word are equal to the epoch bits of the
   602   // prototype header. (Note that the prototype header's epoch bits
   603   // only change at a safepoint.) If not, attempt to rebias the object
   604   // toward the current thread. Note that we must be absolutely sure
   605   // that the current epoch is invalid in order to do this because
   606   // otherwise the manipulations it performs on the mark word are
   607   // illegal.
   609   move(AT, markOopDesc::epoch_mask_in_place);
   610   andr(AT,swap_reg, AT);
   611   bne(AT, R0, try_rebias);
   612   delayed()->nop();
   613   // The epoch of the current bias is still valid but we know nothing
   614   // about the owner; it might be set or it might be clear. Try to
   615   // acquire the bias of the object using an atomic operation. If this
   616   // fails we will go in to the runtime to revoke the object's bias.
   617   // Note that we first construct the presumed unbiased header so we
   618   // don't accidentally blow away another thread's valid bias.
   620   ld_ptr(swap_reg, saved_mark_addr);
   622   move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
   623   andr(swap_reg, swap_reg, AT);
   625   if (need_tmp_reg) {
   626     push(tmp_reg);
   627   }
   628   get_thread(tmp_reg);
   629   orr(tmp_reg, tmp_reg, swap_reg);
   630   //if (os::is_MP()) {
   631   //  sync();
   632   //}
   633   cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
   634   if (need_tmp_reg) {
   635     pop(tmp_reg);
   636   }
   637   // If the biasing toward our thread failed, this means that
   638   // another thread succeeded in biasing it toward itself and we
   639   // need to revoke that bias. The revocation will occur in the
   640   // interpreter runtime in the slow case.
   641   if (PrintBiasedLockingStatistics) {
   642     Label L;
   643     bne(AT, R0, L);
   644     delayed()->nop();
   645     push(tmp_reg);
   646     push(A0);
   647     atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg);
   648     pop(A0);
   649     pop(tmp_reg);
   650     bind(L);
   651   }
   652   if (slow_case != NULL) {
   653     beq_far(AT, R0, *slow_case);
   654     delayed()->nop();
   655   }
   656   b(done);
   657   delayed()->nop();
   659   bind(try_rebias);
   660   // At this point we know the epoch has expired, meaning that the
   661   // current "bias owner", if any, is actually invalid. Under these
   662   // circumstances _only_, we are allowed to use the current header's
   663   // value as the comparison value when doing the cas to acquire the
   664   // bias in the current epoch. In other words, we allow transfer of
   665   // the bias from one thread to another directly in this situation.
   666   //
   667   // FIXME: due to a lack of registers we currently blow away the age
   668   // bits in this situation. Should attempt to preserve them.
   669   if (need_tmp_reg) {
   670     push(tmp_reg);
   671   }
   672   load_prototype_header(tmp_reg, obj_reg);
   673   get_thread(swap_reg);
   674   orr(tmp_reg, tmp_reg, swap_reg);
   675   ld_ptr(swap_reg, saved_mark_addr);
   677   //if (os::is_MP()) {
   678   //  sync();
   679   //}
   680   cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
   681   if (need_tmp_reg) {
   682     pop(tmp_reg);
   683   }
   684   // If the biasing toward our thread failed, then another thread
   685   // succeeded in biasing it toward itself and we need to revoke that
   686   // bias. The revocation will occur in the runtime in the slow case.
   687   if (PrintBiasedLockingStatistics) {
   688     Label L;
   689     bne(AT, R0, L);
   690     delayed()->nop();
   691     push(AT);
   692     push(tmp_reg);
   693     atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg);
   694     pop(tmp_reg);
   695     pop(AT);
   696     bind(L);
   697   }
   698   if (slow_case != NULL) {
   699     beq_far(AT, R0, *slow_case);
   700     delayed()->nop();
   701   }
   703   b(done);
   704   delayed()->nop();
   705   bind(try_revoke_bias);
   706   // The prototype mark in the klass doesn't have the bias bit set any
   707   // more, indicating that objects of this data type are not supposed
   708   // to be biased any more. We are going to try to reset the mark of
   709   // this object to the prototype value and fall through to the
   710   // CAS-based locking scheme. Note that if our CAS fails, it means
   711   // that another thread raced us for the privilege of revoking the
   712   // bias of this particular object, so it's okay to continue in the
   713   // normal locking code.
   714   //
   715   // FIXME: due to a lack of registers we currently blow away the age
   716   // bits in this situation. Should attempt to preserve them.
   717   ld_ptr(swap_reg, saved_mark_addr);
   719   if (need_tmp_reg) {
   720     push(tmp_reg);
   721   }
   722   load_prototype_header(tmp_reg, obj_reg);
   723   //if (os::is_MP()) {
   724   // lock();
   725   //}
   726   cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
   727   if (need_tmp_reg) {
   728     pop(tmp_reg);
   729   }
   730   // Fall through to the normal CAS-based lock, because no matter what
   731   // the result of the above CAS, some thread must have succeeded in
   732   // removing the bias bit from the object's header.
   733   if (PrintBiasedLockingStatistics) {
   734     Label L;
   735     bne(AT, R0, L);
   736     delayed()->nop();
   737     push(AT);
   738     push(tmp_reg);
   739     atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg);
   740     pop(tmp_reg);
   741     pop(AT);
   742     bind(L);
   743   }
   745   bind(cas_label);
   746   return null_check_offset;
   747 }
   749 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
   750   assert(UseBiasedLocking, "why call this otherwise?");
   752   // Check for biased locking unlock case, which is a no-op
   753   // Note: we do not have to check the thread ID for two reasons.
   754   // First, the interpreter checks for IllegalMonitorStateException at
   755   // a higher level. Second, if the bias was revoked while we held the
   756   // lock, the object could not be rebiased toward another thread, so
   757   // the bias bit would be clear.
   758 #ifdef _LP64
   759   ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
   760   andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
   761   daddi(AT, R0, markOopDesc::biased_lock_pattern);
   762 #else
   763   lw(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
   764   andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
   765   addi(AT, R0, markOopDesc::biased_lock_pattern);
   766 #endif
   768   beq(AT, temp_reg, done);
   769   delayed()->nop();
   770 }
   772 // the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf
   773 // this method will handle the stack problem, you need not to preserve the stack space for the argument now
   774 void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) {
   775   Label L, E;
   777   assert(number_of_arguments <= 4, "just check");
   779   andi(AT, SP, 0xf);
   780   beq(AT, R0, L);
   781   delayed()->nop();
   782   daddi(SP, SP, -8);
   783   call(entry_point, relocInfo::runtime_call_type);
   784   delayed()->nop();
   785   daddi(SP, SP, 8);
   786   b(E);
   787   delayed()->nop();
   789   bind(L);
   790   call(entry_point, relocInfo::runtime_call_type);
   791   delayed()->nop();
   792   bind(E);
   793 }
   796 void MacroAssembler::jmp(address entry) {
   797   patchable_set48(T9, (long)entry);
   798   jr(T9);
   799 }
   801 void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) {
   802   switch (rtype) {
   803     case relocInfo::runtime_call_type:
   804     case relocInfo::none:
   805       jmp(entry);
   806       break;
   807     default:
   808       {
   809       InstructionMark im(this);
   810       relocate(rtype);
   811       patchable_set48(T9, (long)entry);
   812       jr(T9);
   813       }
   814       break;
   815   }
   816 }
   818 void MacroAssembler::jmp_far(Label& L) {
   819   if (L.is_bound()) {
   820     address entry = target(L);
   821     assert(entry != NULL, "jmp most probably wrong");
   822     InstructionMark im(this);
   824     relocate(relocInfo::internal_word_type);
   825     patchable_set48(T9, (long)entry);
   826   } else {
   827     InstructionMark im(this);
   828     L.add_patch_at(code(), locator());
   830     relocate(relocInfo::internal_word_type);
   831     patchable_set48(T9, (long)pc());
   832   }
   834   jr(T9);
   835   delayed()->nop();
   836 }
   837 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
   838   int oop_index;
   839   if (obj) {
   840     oop_index = oop_recorder()->find_index(obj);
   841   } else {
   842     oop_index = oop_recorder()->allocate_metadata_index(obj);
   843   }
   844   relocate(metadata_Relocation::spec(oop_index));
   845   patchable_set48(AT, (long)obj);
   846   sd(AT, dst);
   847 }
   849 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
   850   int oop_index;
   851   if (obj) {
   852     oop_index = oop_recorder()->find_index(obj);
   853   } else {
   854     oop_index = oop_recorder()->allocate_metadata_index(obj);
   855   }
   856   relocate(metadata_Relocation::spec(oop_index));
   857   patchable_set48(dst, (long)obj);
   858 }
   860 void MacroAssembler::call(address entry) {
   861 // c/c++ code assume T9 is entry point, so we just always move entry to t9
   862 // maybe there is some more graceful method to handle this. FIXME
   863 // For more info, see class NativeCall.
   864 #ifndef _LP64
   865   move(T9, (int)entry);
   866 #else
   867   patchable_set48(T9, (long)entry);
   868 #endif
   869   jalr(T9);
   870 }
   872 void MacroAssembler::call(address entry, relocInfo::relocType rtype) {
   873   switch (rtype) {
   874     case relocInfo::runtime_call_type:
   875     case relocInfo::none:
   876       call(entry);
   877       break;
   878     default:
   879       {
   880   InstructionMark im(this);
   881   relocate(rtype);
   882   call(entry);
   883       }
   884       break;
   885   }
   886 }
   888 void MacroAssembler::call(address entry, RelocationHolder& rh)
   889 {
   890   switch (rh.type()) {
   891     case relocInfo::runtime_call_type:
   892     case relocInfo::none:
   893       call(entry);
   894       break;
   895     default:
   896       {
   897   InstructionMark im(this);
   898   relocate(rh);
   899   call(entry);
   900       }
   901       break;
   902   }
   903 }
   905 void MacroAssembler::ic_call(address entry) {
   906   RelocationHolder rh = virtual_call_Relocation::spec(pc());
   907   patchable_set48(IC_Klass, (long)Universe::non_oop_word());
   908   assert(entry != NULL, "call most probably wrong");
   909   InstructionMark im(this);
   910   relocate(rh);
   911   patchable_call(entry);
   912 }
   914 void MacroAssembler::c2bool(Register r) {
   915   Label L;
   916   Assembler::beq(r, R0, L);
   917   delayed()->nop();
   918   move(r, 1);
   919   bind(L);
   920 }
   922 #ifndef PRODUCT
   923 extern "C" void findpc(intptr_t x);
   924 #endif
   926 void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) {
   927   if ( ShowMessageBoxOnError ) {
   928     JavaThreadState saved_state = JavaThread::current()->thread_state();
   929     JavaThread::current()->set_thread_state(_thread_in_vm);
   930     {
   931       // In order to get locks work, we need to fake a in_VM state
   932       ttyLocker ttyl;
   933       ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
   934       if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
   935   BytecodeCounter::print();
   936       }
   938     }
   939     ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
   940   }
   941   else
   942     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
   943 }
   946 void MacroAssembler::stop(const char* msg) {
   947   li(A0, (long)msg);
   948 #ifndef _LP64
   949   //reserver space for argument.
   950   addiu(SP, SP, - 1 * wordSize);
   951 #endif
   952   call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
   953   delayed()->nop();
   954 #ifndef _LP64
   955   //restore space for argument
   956   addiu(SP, SP, 1 * wordSize);
   957 #endif
   958   brk(17);
   959 }
   961 void MacroAssembler::warn(const char* msg) {
   962 #ifdef _LP64
   963   pushad();
   964   li(A0, (long)msg);
   965   push(S2);
   966   move(AT, -(StackAlignmentInBytes));
   967   move(S2, SP);     // use S2 as a sender SP holder
   968   andr(SP, SP, AT); // align stack as required by ABI
   969   call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
   970   delayed()->nop();
   971   move(SP, S2);     // use S2 as a sender SP holder
   972   pop(S2);
   973   popad();
   974 #else
   975   pushad();
   976   addi(SP, SP, -4);
   977   sw(A0, SP, -1 * wordSize);
   978   li(A0, (long)msg);
   979   addi(SP, SP, -1 * wordSize);
   980   call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
   981   delayed()->nop();
   982   addi(SP, SP, 1 * wordSize);
   983   lw(A0, SP, -1 * wordSize);
   984   addi(SP, SP, 4);
   985   popad();
   986 #endif
   987 }
   989 void MacroAssembler::print_reg(Register reg) {
   990   void * cur_pc = pc();
   991   pushad();
   992   NOT_LP64(push(FP);)
   994   li(A0, (long)reg->name());
   995   if (reg == SP)
   996     addiu(A1, SP, wordSize * 23); //23 registers saved in pushad()
   997   else if (reg == A0)
   998     ld(A1, SP, wordSize * 19); //A0 has been modified by li(A0, (long)reg->name()). Ugly Code!
   999   else
  1000     move(A1, reg);
  1001   li(A2, (long)cur_pc);
  1002   push(S2);
  1003   move(AT, -(StackAlignmentInBytes));
  1004   move(S2, SP);     // use S2 as a sender SP holder
  1005   andr(SP, SP, AT); // align stack as required by ABI
  1006   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_reg_with_pc),relocInfo::runtime_call_type);
  1007   delayed()->nop();
  1008   move(SP, S2);     // use S2 as a sender SP holder
  1009   pop(S2);
  1010   NOT_LP64(pop(FP);)
  1011   popad();
  1015 void MacroAssembler::print_reg(FloatRegister reg) {
  1016   void * cur_pc = pc();
  1017   pushad();
  1018   NOT_LP64(push(FP);)
  1019   li(A0, (long)reg->name());
  1020   push(S2);
  1021   move(AT, -(StackAlignmentInBytes));
  1022   move(S2, SP);     // use S2 as a sender SP holder
  1023   andr(SP, SP, AT); // align stack as required by ABI
  1024   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
  1025   delayed()->nop();
  1026   move(SP, S2);     // use S2 as a sender SP holder
  1027   pop(S2);
  1028   NOT_LP64(pop(FP);)
  1029   popad();
  1031   pushad();
  1032   NOT_LP64(push(FP);)
  1033   move(FP, SP);
  1034   move(AT, -(StackAlignmentInBytes));
  1035   andr(SP , SP , AT);
  1036   mov_d(F12, reg);
  1037   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_double),relocInfo::runtime_call_type);
  1038   delayed()->nop();
  1039   move(SP, FP);
  1040   NOT_LP64(pop(FP);)
  1041   popad();
  1045 void MacroAssembler::increment(Register reg, int imm) {
  1046   if (!imm) return;
  1047   if (is_simm16(imm)) {
  1048 #ifdef _LP64
  1049     daddiu(reg, reg, imm);
  1050 #else
  1051     addiu(reg, reg, imm);
  1052 #endif
  1053   } else {
  1054     move(AT, imm);
  1055 #ifdef _LP64
  1056     daddu(reg, reg, AT);
  1057 #else
  1058     addu(reg, reg, AT);
  1059 #endif
  1063 void MacroAssembler::decrement(Register reg, int imm) {
  1064   increment(reg, -imm);
  1068 void MacroAssembler::call_VM(Register oop_result,
  1069                              address entry_point,
  1070                              bool check_exceptions) {
  1071   call_VM_helper(oop_result, entry_point, 0, check_exceptions);
  1074 void MacroAssembler::call_VM(Register oop_result,
  1075                              address entry_point,
  1076                              Register arg_1,
  1077                              bool check_exceptions) {
  1078   if (arg_1!=A1) move(A1, arg_1);
  1079   call_VM_helper(oop_result, entry_point, 1, check_exceptions);
  1082 void MacroAssembler::call_VM(Register oop_result,
  1083                              address entry_point,
  1084                              Register arg_1,
  1085                              Register arg_2,
  1086                              bool check_exceptions) {
  1087   if (arg_1!=A1) move(A1, arg_1);
  1088   if (arg_2!=A2) move(A2, arg_2);
  1089   assert(arg_2 != A1, "smashed argument");
  1090   call_VM_helper(oop_result, entry_point, 2, check_exceptions);
  1093 void MacroAssembler::call_VM(Register oop_result,
  1094                              address entry_point,
  1095                              Register arg_1,
  1096                              Register arg_2,
  1097                              Register arg_3,
  1098                              bool check_exceptions) {
  1099   if (arg_1!=A1) move(A1, arg_1);
  1100   if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
  1101   if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
  1102   call_VM_helper(oop_result, entry_point, 3, check_exceptions);
  1105 void MacroAssembler::call_VM(Register oop_result,
  1106                              Register last_java_sp,
  1107                              address entry_point,
  1108                              int number_of_arguments,
  1109                              bool check_exceptions) {
  1110   call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions);
  1113 void MacroAssembler::call_VM(Register oop_result,
  1114                              Register last_java_sp,
  1115                              address entry_point,
  1116                              Register arg_1,
  1117                              bool check_exceptions) {
  1118   if (arg_1 != A1) move(A1, arg_1);
  1119   call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
  1122 void MacroAssembler::call_VM(Register oop_result,
  1123                              Register last_java_sp,
  1124                              address entry_point,
  1125                              Register arg_1,
  1126                              Register arg_2,
  1127                              bool check_exceptions) {
  1128   if (arg_1 != A1) move(A1, arg_1);
  1129   if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
  1130   call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
  1133 void MacroAssembler::call_VM(Register oop_result,
  1134                              Register last_java_sp,
  1135                              address entry_point,
  1136                              Register arg_1,
  1137                              Register arg_2,
  1138                              Register arg_3,
  1139                              bool check_exceptions) {
  1140   if (arg_1 != A1) move(A1, arg_1);
  1141   if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
  1142   if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
  1143   call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
  1146 void MacroAssembler::call_VM_base(Register oop_result,
  1147                                   Register java_thread,
  1148                                   Register last_java_sp,
  1149                                   address  entry_point,
  1150                                   int      number_of_arguments,
  1151                                   bool     check_exceptions) {
  1153   address before_call_pc;
  1154   // determine java_thread register
  1155   if (!java_thread->is_valid()) {
  1156 #ifndef OPT_THREAD
  1157     java_thread = T2;
  1158     get_thread(java_thread);
  1159 #else
  1160     java_thread = TREG;
  1161 #endif
  1163   // determine last_java_sp register
  1164   if (!last_java_sp->is_valid()) {
  1165     last_java_sp = SP;
  1167   // debugging support
  1168   assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
  1169   assert(number_of_arguments <= 4   , "cannot have negative number of arguments");
  1170   assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
  1171   assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
  1173   assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save fp");
  1175   // set last Java frame before call
  1176   before_call_pc = (address)pc();
  1177   set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc);
  1179   // do the call
  1180   move(A0, java_thread);
  1181   call(entry_point, relocInfo::runtime_call_type);
  1182   delayed()->nop();
  1184   // restore the thread (cannot use the pushed argument since arguments
  1185   // may be overwritten by C code generated by an optimizing compiler);
  1186   // however can use the register value directly if it is callee saved.
  1187 #ifndef OPT_THREAD
  1188   get_thread(java_thread);
  1189 #else
  1190 #ifdef ASSERT
  1192     Label L;
  1193     get_thread(AT);
  1194     beq(java_thread, AT, L);
  1195     delayed()->nop();
  1196     stop("MacroAssembler::call_VM_base: TREG not callee saved?");
  1197     bind(L);
  1199 #endif
  1200 #endif
  1202   // discard thread and arguments
  1203   ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
  1204   // reset last Java frame
  1205   reset_last_Java_frame(java_thread, false);
  1207   check_and_handle_popframe(java_thread);
  1208   check_and_handle_earlyret(java_thread);
  1209   if (check_exceptions) {
  1210     // check for pending exceptions (java_thread is set upon return)
  1211     Label L;
  1212 #ifdef _LP64
  1213     ld(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
  1214 #else
  1215     lw(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
  1216 #endif
  1217     beq(AT, R0, L);
  1218     delayed()->nop();
  1219     li(AT, before_call_pc);
  1220     push(AT);
  1221     jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
  1222     delayed()->nop();
  1223     bind(L);
  1226   // get oop result if there is one and reset the value in the thread
  1227   if (oop_result->is_valid()) {
  1228 #ifdef _LP64
  1229     ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
  1230     sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
  1231 #else
  1232     lw(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
  1233     sw(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
  1234 #endif
  1235     verify_oop(oop_result);
  1239 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
  1241   move(V0, SP);
  1242   //we also reserve space for java_thread here
  1243 #ifndef _LP64
  1244   daddi(SP, SP, (1 + number_of_arguments) * (- wordSize));
  1245 #endif
  1246   move(AT, -(StackAlignmentInBytes));
  1247   andr(SP, SP, AT);
  1248   call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions);
  1252 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
  1253   call_VM_leaf_base(entry_point, number_of_arguments);
  1256 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
  1257   if (arg_0 != A0) move(A0, arg_0);
  1258   call_VM_leaf(entry_point, 1);
  1261 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
  1262   if (arg_0 != A0) move(A0, arg_0);
  1263   if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
  1264   call_VM_leaf(entry_point, 2);
  1267 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
  1268   if (arg_0 != A0) move(A0, arg_0);
  1269   if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
  1270   if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument");
  1271   call_VM_leaf(entry_point, 3);
  1273 void MacroAssembler::super_call_VM_leaf(address entry_point) {
  1274   MacroAssembler::call_VM_leaf_base(entry_point, 0);
  1278 void MacroAssembler::super_call_VM_leaf(address entry_point,
  1279                                                    Register arg_1) {
  1280   if (arg_1 != A0) move(A0, arg_1);
  1281   MacroAssembler::call_VM_leaf_base(entry_point, 1);
  1285 void MacroAssembler::super_call_VM_leaf(address entry_point,
  1286                                                    Register arg_1,
  1287                                                    Register arg_2) {
  1288   if (arg_1 != A0) move(A0, arg_1);
  1289   if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
  1290   MacroAssembler::call_VM_leaf_base(entry_point, 2);
  1292 void MacroAssembler::super_call_VM_leaf(address entry_point,
  1293                                                    Register arg_1,
  1294                                                    Register arg_2,
  1295                                                    Register arg_3) {
  1296   if (arg_1 != A0) move(A0, arg_1);
  1297   if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
  1298   if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument");
  1299   MacroAssembler::call_VM_leaf_base(entry_point, 3);
  1302 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
  1305 void MacroAssembler::check_and_handle_popframe(Register java_thread) {
  1308 void MacroAssembler::null_check(Register reg, int offset) {
  1309   if (needs_explicit_null_check(offset)) {
  1310     // provoke OS NULL exception if reg = NULL by
  1311     // accessing M[reg] w/o changing any (non-CC) registers
  1312     // NOTE: cmpl is plenty here to provoke a segv
  1313     lw(AT, reg, 0);
  1314   } else {
  1315     // nothing to do, (later) access of M[reg + offset]
  1316     // will provoke OS NULL exception if reg = NULL
  1320 void MacroAssembler::enter() {
  1321   push2(RA, FP);
  1322   move(FP, SP);
  1325 void MacroAssembler::leave() {
  1326 #ifndef _LP64
  1327   addi(SP, FP, 2 * wordSize);
  1328   lw(RA, SP, - 1 * wordSize);
  1329   lw(FP, SP, - 2 * wordSize);
  1330 #else
  1331   daddi(SP, FP, 2 * wordSize);
  1332   ld(RA, SP, - 1 * wordSize);
  1333   ld(FP, SP, - 2 * wordSize);
  1334 #endif
  1337 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) {
  1338   // determine java_thread register
  1339   if (!java_thread->is_valid()) {
  1340 #ifndef OPT_THREAD
  1341     java_thread = T1;
  1342     get_thread(java_thread);
  1343 #else
  1344     java_thread = TREG;
  1345 #endif
  1347   // we must set sp to zero to clear frame
  1348   st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
  1349   // must clear fp, so that compiled frames are not confused; it is possible
  1350   // that we need it only for debugging
  1351   if(clear_fp) {
  1352     st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
  1355   // Always clear the pc because it could have been set by make_walkable()
  1356   st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
  1359 void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
  1360   Register thread = TREG;
  1361 #ifndef OPT_THREAD
  1362   get_thread(thread);
  1363 #endif
  1364   // we must set sp to zero to clear frame
  1365   sd(R0, Address(thread, JavaThread::last_Java_sp_offset()));
  1366   // must clear fp, so that compiled frames are not confused; it is
  1367   // possible that we need it only for debugging
  1368   if (clear_fp) {
  1369     sd(R0, Address(thread, JavaThread::last_Java_fp_offset()));
  1372   // Always clear the pc because it could have been set by make_walkable()
  1373   sd(R0, Address(thread, JavaThread::last_Java_pc_offset()));
  1376 // Write serialization page so VM thread can do a pseudo remote membar.
  1377 // We use the current thread pointer to calculate a thread specific
  1378 // offset to write to within the page. This minimizes bus traffic
  1379 // due to cache line collision.
  1380 void MacroAssembler::serialize_memory(Register thread, Register tmp) {
  1381   move(tmp, thread);
  1382   srl(tmp, tmp,os::get_serialize_page_shift_count());
  1383   move(AT, (os::vm_page_size() - sizeof(int)));
  1384   andr(tmp, tmp,AT);
  1385   sw(tmp,Address(tmp, (intptr_t)os::get_memory_serialize_page()));
  1388 // Calls to C land
  1389 //
  1390 // When entering C land, the fp, & sp of the last Java frame have to be recorded
  1391 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
  1392 // has to be reset to 0. This is required to allow proper stack traversal.
  1393 void MacroAssembler::set_last_Java_frame(Register java_thread,
  1394                                          Register last_java_sp,
  1395                                          Register last_java_fp,
  1396                                          address  last_java_pc) {
  1397   // determine java_thread register
  1398   if (!java_thread->is_valid()) {
  1399 #ifndef OPT_THREAD
  1400     java_thread = T2;
  1401     get_thread(java_thread);
  1402 #else
  1403     java_thread = TREG;
  1404 #endif
  1406   // determine last_java_sp register
  1407   if (!last_java_sp->is_valid()) {
  1408     last_java_sp = SP;
  1411   // last_java_fp is optional
  1412   if (last_java_fp->is_valid()) {
  1413     st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
  1416   // last_java_pc is optional
  1417   if (last_java_pc != NULL) {
  1418     relocate(relocInfo::internal_word_type);
  1419     patchable_set48(AT, (long)last_java_pc);
  1420     st_ptr(AT, java_thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
  1422   st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
  1425 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
  1426                                          Register last_java_fp,
  1427                                          address  last_java_pc) {
  1428   // determine last_java_sp register
  1429   if (!last_java_sp->is_valid()) {
  1430     last_java_sp = SP;
  1433   Register thread = TREG;
  1434 #ifndef OPT_THREAD
  1435   get_thread(thread);
  1436 #endif
  1437   // last_java_fp is optional
  1438   if (last_java_fp->is_valid()) {
  1439     sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset()));
  1442   // last_java_pc is optional
  1443   if (last_java_pc != NULL) {
  1444     relocate(relocInfo::internal_word_type);
  1445     patchable_set48(AT, (long)last_java_pc);
  1446     st_ptr(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
  1449   sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset()));
  1452 //////////////////////////////////////////////////////////////////////////////////
  1453 #if INCLUDE_ALL_GCS
  1455 void MacroAssembler::g1_write_barrier_pre(Register obj,
  1456                                           Register pre_val,
  1457                                           Register thread,
  1458                                           Register tmp,
  1459                                           bool tosca_live,
  1460                                           bool expand_call) {
  1462   // If expand_call is true then we expand the call_VM_leaf macro
  1463   // directly to skip generating the check by
  1464   // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
  1466 #ifdef _LP64
  1467   assert(thread == TREG, "must be");
  1468 #endif // _LP64
  1470   Label done;
  1471   Label runtime;
  1473   assert(pre_val != noreg, "check this code");
  1475   if (obj != noreg) {
  1476     assert_different_registers(obj, pre_val, tmp);
  1477     assert(pre_val != V0, "check this code");
  1480   Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
  1481                                        PtrQueue::byte_offset_of_active()));
  1482   Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
  1483                                        PtrQueue::byte_offset_of_index()));
  1484   Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
  1485                                        PtrQueue::byte_offset_of_buf()));
  1488   // Is marking active?
  1489   if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
  1490     lw(AT, in_progress);
  1491   } else {
  1492     assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
  1493     lb(AT, in_progress);
  1495   beq(AT, R0, done);
  1496   delayed()->nop();
  1498   // Do we need to load the previous value?
  1499   if (obj != noreg) {
  1500     load_heap_oop(pre_val, Address(obj, 0));
  1503   // Is the previous value null?
  1504   beq(pre_val, R0, done);
  1505   delayed()->nop();
  1507   // Can we store original value in the thread's buffer?
  1508   // Is index == 0?
  1509   // (The index field is typed as size_t.)
  1511   ld(tmp, index);
  1512   beq(tmp, R0, runtime);
  1513   delayed()->nop();
  1515   daddiu(tmp, tmp, -1 * wordSize);
  1516   sd(tmp, index);
  1517   ld(AT, buffer);
  1518   daddu(tmp, tmp, AT);
  1520   // Record the previous value
  1521   sd(pre_val, tmp, 0);
  1522   beq(R0, R0, done);
  1523   delayed()->nop();
  1525   bind(runtime);
  1526   // save the live input values
  1527   if (tosca_live) push(V0);
  1529   if (obj != noreg && obj != V0) push(obj);
  1531   if (pre_val != V0) push(pre_val);
  1533   // Calling the runtime using the regular call_VM_leaf mechanism generates
  1534   // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
  1535   // that checks that the *(fp+frame::interpreter_frame_last_sp) == NULL.
  1536   //
  1537   // If we care generating the pre-barrier without a frame (e.g. in the
  1538   // intrinsified Reference.get() routine) then fp might be pointing to
  1539   // the caller frame and so this check will most likely fail at runtime.
  1540   //
  1541   // Expanding the call directly bypasses the generation of the check.
  1542   // So when we do not have have a full interpreter frame on the stack
  1543   // expand_call should be passed true.
  1545   NOT_LP64( push(thread); )
  1547   if (expand_call) {
  1548     LP64_ONLY( assert(pre_val != A1, "smashed arg"); )
  1549     if (thread != A1) move(A1, thread);
  1550     if (pre_val != A0) move(A0, pre_val);
  1551     MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
  1552   } else {
  1553     call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
  1556   NOT_LP64( pop(thread); )
  1558   // save the live input values
  1559   if (pre_val != V0)
  1560     pop(pre_val);
  1562   if (obj != noreg && obj != V0)
  1563     pop(obj);
  1565   if(tosca_live) pop(V0);
  1567   bind(done);
  1570 void MacroAssembler::g1_write_barrier_post(Register store_addr,
  1571                                            Register new_val,
  1572                                            Register thread,
  1573                                            Register tmp,
  1574                                            Register tmp2) {
  1575   assert(tmp  != AT, "must be");
  1576   assert(tmp2 != AT, "must be");
  1577 #ifdef _LP64
  1578   assert(thread == TREG, "must be");
  1579 #endif // _LP64
  1581   Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
  1582                                        PtrQueue::byte_offset_of_index()));
  1583   Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
  1584                                        PtrQueue::byte_offset_of_buf()));
  1586   BarrierSet* bs = Universe::heap()->barrier_set();
  1587   CardTableModRefBS* ct = (CardTableModRefBS*)bs;
  1588   assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
  1590   Label done;
  1591   Label runtime;
  1593   // Does store cross heap regions?
  1594   xorr(AT, store_addr, new_val);
  1595   dsrl(AT, AT, HeapRegion::LogOfHRGrainBytes);
  1596   beq(AT, R0, done);
  1597   delayed()->nop();
  1600   // crosses regions, storing NULL?
  1601   beq(new_val, R0, done);
  1602   delayed()->nop();
  1604   // storing region crossing non-NULL, is card already dirty?
  1605   const Register card_addr = tmp;
  1606   const Register cardtable = tmp2;
  1608   move(card_addr, store_addr);
  1609   dsrl(card_addr, card_addr, CardTableModRefBS::card_shift);
  1610   // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
  1611   // a valid address and therefore is not properly handled by the relocation code.
  1612   set64(cardtable, (intptr_t)ct->byte_map_base);
  1613   daddu(card_addr, card_addr, cardtable);
  1615   lb(AT, card_addr, 0);
  1616   daddiu(AT, AT, -1 * (int)G1SATBCardTableModRefBS::g1_young_card_val());
  1617   beq(AT, R0, done);
  1618   delayed()->nop();
  1620   sync();
  1621   lb(AT, card_addr, 0);
  1622   daddiu(AT, AT, -1 * (int)(int)CardTableModRefBS::dirty_card_val());
  1623   beq(AT, R0, done);
  1624   delayed()->nop();
  1627   // storing a region crossing, non-NULL oop, card is clean.
  1628   // dirty card and log.
  1629   move(AT, (int)CardTableModRefBS::dirty_card_val());
  1630   sb(AT, card_addr, 0);
  1632   lw(AT, queue_index);
  1633   beq(AT, R0, runtime);
  1634   delayed()->nop();
  1635   daddiu(AT, AT, -1 * wordSize);
  1636   sw(AT, queue_index);
  1637   ld(tmp2, buffer);
  1638 #ifdef _LP64
  1639   ld(AT, queue_index);
  1640   daddu(tmp2, tmp2, AT);
  1641   sd(card_addr, tmp2, 0);
  1642 #else
  1643   lw(AT, queue_index);
  1644   addu32(tmp2, tmp2, AT);
  1645   sw(card_addr, tmp2, 0);
  1646 #endif
  1647   beq(R0, R0, done);
  1648   delayed()->nop();
  1650   bind(runtime);
  1651   // save the live input values
  1652   push(store_addr);
  1653   push(new_val);
  1654 #ifdef _LP64
  1655   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, TREG);
  1656 #else
  1657   push(thread);
  1658   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
  1659   pop(thread);
  1660 #endif
  1661   pop(new_val);
  1662   pop(store_addr);
  1664   bind(done);
  1667 #endif // INCLUDE_ALL_GCS
  1668 //////////////////////////////////////////////////////////////////////////////////
  1671 void MacroAssembler::store_check(Register obj) {
  1672   // Does a store check for the oop in register obj. The content of
  1673   // register obj is destroyed afterwards.
  1674   store_check_part_1(obj);
  1675   store_check_part_2(obj);
  1678 void MacroAssembler::store_check(Register obj, Address dst) {
  1679   store_check(obj);
  1683 // split the store check operation so that other instructions can be scheduled inbetween
  1684 void MacroAssembler::store_check_part_1(Register obj) {
  1685   BarrierSet* bs = Universe::heap()->barrier_set();
  1686   assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
  1687 #ifdef _LP64
  1688   dsrl(obj, obj, CardTableModRefBS::card_shift);
  1689 #else
  1690   shr(obj, CardTableModRefBS::card_shift);
  1691 #endif
  1694 void MacroAssembler::store_check_part_2(Register obj) {
  1695   BarrierSet* bs = Universe::heap()->barrier_set();
  1696   assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
  1697   CardTableModRefBS* ct = (CardTableModRefBS*)bs;
  1698   assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
  1700   set64(AT, (long)ct->byte_map_base);
  1701 #ifdef _LP64
  1702   dadd(AT, AT, obj);
  1703 #else
  1704   add(AT, AT, obj);
  1705 #endif
  1706   if (UseConcMarkSweepGC) sync();
  1707   sb(R0, AT, 0);
  1710 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
  1711 void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
  1712                                    Register t1, Register t2, Label& slow_case) {
  1713   assert_different_registers(obj, var_size_in_bytes, t1, t2, AT);
  1715   Register end = t2;
  1716 #ifndef OPT_THREAD
  1717   Register thread = t1;
  1718   get_thread(thread);
  1719 #else
  1720   Register thread = TREG;
  1721 #endif
  1722   verify_tlab(t1, t2);//blows t1&t2
  1724   ld_ptr(obj, thread, in_bytes(JavaThread::tlab_top_offset()));
  1726   if (var_size_in_bytes == NOREG) {
  1727     set64(AT, con_size_in_bytes);
  1728     add(end, obj, AT);
  1729   } else {
  1730     add(end, obj, var_size_in_bytes);
  1733   ld_ptr(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
  1734   sltu(AT, AT, end);
  1735   bne_far(AT, R0, slow_case);
  1736   delayed()->nop();
  1739   // update the tlab top pointer
  1740   st_ptr(end, thread, in_bytes(JavaThread::tlab_top_offset()));
  1742   verify_tlab(t1, t2);
  1745 // Defines obj, preserves var_size_in_bytes
  1746 void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
  1747                                    Register t1, Register t2, Label& slow_case) {
  1748   assert_different_registers(obj, var_size_in_bytes, t1, AT);
  1749   if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
  1750     // No allocation in the shared eden.
  1751     b_far(slow_case);
  1752     delayed()->nop();
  1753   } else {
  1755 #ifndef _LP64
  1756     Address heap_top(t1, Assembler::split_low((intptr_t)Universe::heap()->top_addr()));
  1757     lui(t1, split_high((intptr_t)Universe::heap()->top_addr()));
  1758 #else
  1759     Address heap_top(t1);
  1760     li(t1, (long)Universe::heap()->top_addr());
  1761 #endif
  1762     ld_ptr(obj, heap_top);
  1764     Register end = t2;
  1765     Label retry;
  1767     bind(retry);
  1768     if (var_size_in_bytes == NOREG) {
  1769       set64(AT, con_size_in_bytes);
  1770       add(end, obj, AT);
  1771     } else {
  1772       add(end, obj, var_size_in_bytes);
  1774     // if end < obj then we wrapped around => object too long => slow case
  1775     sltu(AT, end, obj);
  1776     bne_far(AT, R0, slow_case);
  1777     delayed()->nop();
  1779     li(AT, (long)Universe::heap()->end_addr());
  1780     ld_ptr(AT, AT, 0);
  1781     sltu(AT, AT, end);
  1782     bne_far(AT, R0, slow_case);
  1783     delayed()->nop();
  1784     // Compare obj with the top addr, and if still equal, store the new top addr in
  1785     // end at the address of the top addr pointer. Sets ZF if was equal, and clears
  1786     // it otherwise. Use lock prefix for atomicity on MPs.
  1787     //if (os::is_MP()) {
  1788     //  sync();
  1789     //}
  1791     // if someone beat us on the allocation, try again, otherwise continue
  1792     cmpxchg(end, heap_top, obj);
  1793     beq_far(AT, R0, retry);
  1794     delayed()->nop();
  1798 // C2 doesn't invoke this one.
  1799 void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) {
  1800   Register top = T0;
  1801   Register t1  = T1;
  1802   Register t2  = T9;
  1803   Register t3  = T3;
  1804   Register thread_reg = T8;
  1805   assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ T2, A4);
  1806   Label do_refill, discard_tlab;
  1808   if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
  1809     // No allocation in the shared eden.
  1810     b(slow_case);
  1811     delayed()->nop();
  1814   get_thread(thread_reg);
  1816   ld_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
  1817   ld_ptr(t1,  thread_reg, in_bytes(JavaThread::tlab_end_offset()));
  1819   // calculate amount of free space
  1820   sub(t1, t1, top);
  1821   shr(t1, LogHeapWordSize);
  1823   // Retain tlab and allocate object in shared space if
  1824   // the amount free in the tlab is too large to discard.
  1825   ld_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
  1826   slt(AT, t2, t1);
  1827   beq(AT, R0, discard_tlab);
  1828   delayed()->nop();
  1830   // Retain
  1831 #ifndef _LP64
  1832   move(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
  1833 #else
  1834   li(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
  1835 #endif
  1836   add(t2, t2, AT);
  1837   st_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
  1839   if (TLABStats) {
  1840     // increment number of slow_allocations
  1841     lw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
  1842     addiu(AT, AT, 1);
  1843     sw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
  1845   b(try_eden);
  1846   delayed()->nop();
  1848   bind(discard_tlab);
  1849   if (TLABStats) {
  1850     // increment number of refills
  1851     lw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
  1852     addi(AT, AT, 1);
  1853     sw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
  1854     // accumulate wastage -- t1 is amount free in tlab
  1855     lw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
  1856     add(AT, AT, t1);
  1857     sw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
  1860   // if tlab is currently allocated (top or end != null) then
  1861   // fill [top, end + alignment_reserve) with array object
  1862   beq(top, R0, do_refill);
  1863   delayed()->nop();
  1865   // set up the mark word
  1866   li(AT, (long)markOopDesc::prototype()->copy_set_hash(0x2));
  1867   st_ptr(AT, top, oopDesc::mark_offset_in_bytes());
  1869   // set the length to the remaining space
  1870   addi(t1, t1, - typeArrayOopDesc::header_size(T_INT));
  1871   addi(t1, t1, ThreadLocalAllocBuffer::alignment_reserve());
  1872   shl(t1, log2_intptr(HeapWordSize/sizeof(jint)));
  1873   sw(t1, top, arrayOopDesc::length_offset_in_bytes());
  1875   // set klass to intArrayKlass
  1876 #ifndef _LP64
  1877   lui(AT, split_high((intptr_t)Universe::intArrayKlassObj_addr()));
  1878   lw(t1, AT, split_low((intptr_t)Universe::intArrayKlassObj_addr()));
  1879 #else
  1880   li(AT, (intptr_t)Universe::intArrayKlassObj_addr());
  1881   ld_ptr(t1, AT, 0);
  1882 #endif
  1883   //st_ptr(t1, top, oopDesc::klass_offset_in_bytes());
  1884   store_klass(top, t1);
  1886   ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_start_offset()));
  1887   subu(t1, top, t1);
  1888   incr_allocated_bytes(thread_reg, t1, 0);
  1890   // refill the tlab with an eden allocation
  1891   bind(do_refill);
  1892   ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
  1893   shl(t1, LogHeapWordSize);
  1894   // add object_size ??
  1895   eden_allocate(top, t1, 0, t2, t3, slow_case);
  1897   // Check that t1 was preserved in eden_allocate.
  1898 #ifdef ASSERT
  1899   if (UseTLAB) {
  1900     Label ok;
  1901     assert_different_registers(thread_reg, t1);
  1902     ld_ptr(AT, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
  1903     shl(AT, LogHeapWordSize);
  1904     beq(AT, t1, ok);
  1905     delayed()->nop();
  1906     stop("assert(t1 != tlab size)");
  1907     should_not_reach_here();
  1909     bind(ok);
  1911 #endif
  1912   st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_start_offset()));
  1913   st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
  1914   add(top, top, t1);
  1915   addi(top, top, - ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
  1916   st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
  1917   verify_tlab(t1, t2);
  1918   b(retry);
  1919   delayed()->nop();
  1922 void MacroAssembler::incr_allocated_bytes(Register thread,
  1923                                           Register var_size_in_bytes,
  1924                                           int con_size_in_bytes,
  1925                                           Register t1) {
  1926   if (!thread->is_valid()) {
  1927 #ifndef OPT_THREAD
  1928     assert(t1->is_valid(), "need temp reg");
  1929     thread = t1;
  1930     get_thread(thread);
  1931 #else
  1932     thread = TREG;
  1933 #endif
  1936   ld_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset()));
  1937   if (var_size_in_bytes->is_valid()) {
  1938     addu(AT, AT, var_size_in_bytes);
  1939   } else {
  1940     addiu(AT, AT, con_size_in_bytes);
  1942   st_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset()));
  1945 static const double     pi_4 =  0.7853981633974483;
  1947 // must get argument(a double) in F12/F13
  1948 //void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) {
  1949 //We need to preseve the register which maybe modified during the Call
  1950 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
  1951   // save all modified register here
  1952   // FIXME, in the disassembly of tirgfunc, only used V0, V1, T9, SP, RA, so we ony save V0, V1, T9
  1953   pushad();
  1954   // we should preserve the stack space before we call
  1955   addi(SP, SP, -wordSize * 2);
  1956   switch (trig){
  1957     case 's' :
  1958       call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type );
  1959       delayed()->nop();
  1960       break;
  1961     case 'c':
  1962       call( CAST_FROM_FN_PTR(address, SharedRuntime::dcos), relocInfo::runtime_call_type );
  1963       delayed()->nop();
  1964       break;
  1965     case 't':
  1966       call( CAST_FROM_FN_PTR(address, SharedRuntime::dtan), relocInfo::runtime_call_type );
  1967       delayed()->nop();
  1968       break;
  1969     default:assert (false, "bad intrinsic");
  1970     break;
  1974   addi(SP, SP, wordSize * 2);
  1975   popad();
  1978 #ifdef _LP64
  1979 void MacroAssembler::li(Register rd, long imm) {
  1980   if (imm <= max_jint && imm >= min_jint) {
  1981     li32(rd, (int)imm);
  1982   } else if (julong(imm) <= 0xFFFFFFFF) {
  1983     assert_not_delayed();
  1984     // lui sign-extends, so we can't use that.
  1985     ori(rd, R0, julong(imm) >> 16);
  1986     dsll(rd, rd, 16);
  1987     ori(rd, rd, split_low(imm));
  1988   } else if ((imm > 0) && is_simm16(imm >> 32)) {
  1989     // A 48-bit address
  1990     li48(rd, imm);
  1991   } else {
  1992     li64(rd, imm);
  1995 #else
  1996 void MacroAssembler::li(Register rd, long imm) {
  1997   li32(rd, (int)imm);
  1999 #endif
  2001 void MacroAssembler::li32(Register reg, int imm) {
  2002   if (is_simm16(imm)) {
  2003     // for imm < 0, we should use addi instead of addiu.
  2004     //
  2005     //  java.lang.StringCoding$StringDecoder.decode(jobject, jint, jint)
  2006     //
  2007     //  78 move [int:-1|I] [a0|I]
  2008     //    : daddi a0, zero, 0xffffffff  (correct)
  2009     //    : daddiu a0, zero, 0xffffffff (incorrect)
  2010     //
  2011     if (imm >= 0)
  2012       addiu(reg, R0, imm);
  2013     else
  2014       addi(reg, R0, imm);
  2015   } else {
  2016     lui(reg, split_low(imm >> 16));
  2017     if (split_low(imm))
  2018       ori(reg, reg, split_low(imm));
  2022 #ifdef _LP64
  2023 void MacroAssembler::set64(Register d, jlong value) {
  2024   assert_not_delayed();
  2026   int hi = (int)(value >> 32);
  2027   int lo = (int)(value & ~0);
  2029   if (value == lo) {  // 32-bit integer
  2030     if (is_simm16(value)) {
  2031       daddiu(d, R0, value);
  2032     } else {
  2033       lui(d, split_low(value >> 16));
  2034       if (split_low(value)) {
  2035         ori(d, d, split_low(value));
  2038   } else if (hi == 0) {  // hardware zero-extends to upper 32
  2039       ori(d, R0, julong(value) >> 16);
  2040       dsll(d, d, 16);
  2041       if (split_low(value)) {
  2042         ori(d, d, split_low(value));
  2044   } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
  2045     // 4 insts
  2046     li48(d, value);
  2047   } else {  // li64
  2048     // 6 insts
  2049     li64(d, value);
  2054 int MacroAssembler::insts_for_set64(jlong value) {
  2055   int hi = (int)(value >> 32);
  2056   int lo = (int)(value & ~0);
  2058   int count = 0;
  2060   if (value == lo) {  // 32-bit integer
  2061     if (is_simm16(value)) {
  2062       //daddiu(d, R0, value);
  2063       count++;
  2064     } else {
  2065       //lui(d, split_low(value >> 16));
  2066       count++;
  2067       if (split_low(value)) {
  2068         //ori(d, d, split_low(value));
  2069         count++;
  2072   } else if (hi == 0) {  // hardware zero-extends to upper 32
  2073       //ori(d, R0, julong(value) >> 16);
  2074       //dsll(d, d, 16);
  2075       count += 2;
  2076       if (split_low(value)) {
  2077         //ori(d, d, split_low(value));
  2078         count++;
  2080   } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
  2081     // 4 insts
  2082     //li48(d, value);
  2083     count += 4;
  2084   } else {  // li64
  2085     // 6 insts
  2086     //li64(d, value);
  2087     count += 6;
  2090   return count;
  2093 void MacroAssembler::patchable_set48(Register d, jlong value) {
  2094   assert_not_delayed();
  2096   int hi = (int)(value >> 32);
  2097   int lo = (int)(value & ~0);
  2099   int count = 0;
  2101   if (value == lo) {  // 32-bit integer
  2102     if (is_simm16(value)) {
  2103       daddiu(d, R0, value);
  2104       count += 1;
  2105     } else {
  2106       lui(d, split_low(value >> 16));
  2107       count += 1;
  2108       if (split_low(value)) {
  2109         ori(d, d, split_low(value));
  2110         count += 1;
  2113   } else if (hi == 0) {  // hardware zero-extends to upper 32
  2114       ori(d, R0, julong(value) >> 16);
  2115       dsll(d, d, 16);
  2116       count += 2;
  2117       if (split_low(value)) {
  2118         ori(d, d, split_low(value));
  2119         count += 1;
  2121   } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
  2122     // 4 insts
  2123     li48(d, value);
  2124     count += 4;
  2125   } else {  // li64
  2126     tty->print_cr("value = 0x%x", value);
  2127     guarantee(false, "Not supported yet !");
  2130   while (count < 4) {
  2131     nop();
  2132     count++;
  2136 void MacroAssembler::patchable_set32(Register d, jlong value) {
  2137   assert_not_delayed();
  2139   int hi = (int)(value >> 32);
  2140   int lo = (int)(value & ~0);
  2142   int count = 0;
  2144   if (value == lo) {  // 32-bit integer
  2145     if (is_simm16(value)) {
  2146       daddiu(d, R0, value);
  2147       count += 1;
  2148     } else {
  2149       lui(d, split_low(value >> 16));
  2150       count += 1;
  2151       if (split_low(value)) {
  2152         ori(d, d, split_low(value));
  2153         count += 1;
  2156   } else if (hi == 0) {  // hardware zero-extends to upper 32
  2157       ori(d, R0, julong(value) >> 16);
  2158       dsll(d, d, 16);
  2159       count += 2;
  2160       if (split_low(value)) {
  2161         ori(d, d, split_low(value));
  2162         count += 1;
  2164   } else {
  2165     tty->print_cr("value = 0x%x", value);
  2166     guarantee(false, "Not supported yet !");
  2169   while (count < 3) {
  2170     nop();
  2171     count++;
  2175 void MacroAssembler::patchable_call32(Register d, jlong value) {
  2176   assert_not_delayed();
  2178   int hi = (int)(value >> 32);
  2179   int lo = (int)(value & ~0);
  2181   int count = 0;
  2183   if (value == lo) {  // 32-bit integer
  2184     if (is_simm16(value)) {
  2185       daddiu(d, R0, value);
  2186       count += 1;
  2187     } else {
  2188       lui(d, split_low(value >> 16));
  2189       count += 1;
  2190       if (split_low(value)) {
  2191         ori(d, d, split_low(value));
  2192         count += 1;
  2195   } else {
  2196     tty->print_cr("value = 0x%x", value);
  2197     guarantee(false, "Not supported yet !");
  2200   while (count < 2) {
  2201     nop();
  2202     count++;
  2206 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
  2207   assert(UseCompressedClassPointers, "should only be used for compressed header");
  2208   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
  2210   int klass_index = oop_recorder()->find_index(k);
  2211   RelocationHolder rspec = metadata_Relocation::spec(klass_index);
  2212   long narrowKlass = (long)Klass::encode_klass(k);
  2214   relocate(rspec, Assembler::narrow_oop_operand);
  2215   patchable_set48(dst, narrowKlass);
  2219 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
  2220   assert(UseCompressedOops, "should only be used for compressed header");
  2221   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
  2223   int oop_index = oop_recorder()->find_index(obj);
  2224   RelocationHolder rspec = oop_Relocation::spec(oop_index);
  2226   relocate(rspec, Assembler::narrow_oop_operand);
  2227   patchable_set48(dst, oop_index);
  2230 void MacroAssembler::li64(Register rd, long imm) {
  2231   assert_not_delayed();
  2232   lui(rd, split_low(imm >> 48));
  2233   ori(rd, rd, split_low(imm >> 32));
  2234   dsll(rd, rd, 16);
  2235   ori(rd, rd, split_low(imm >> 16));
  2236   dsll(rd, rd, 16);
  2237   ori(rd, rd, split_low(imm));
  2240 void MacroAssembler::li48(Register rd, long imm) {
  2241   assert_not_delayed();
  2242   assert(is_simm16(imm >> 32), "Not a 48-bit address");
  2243   lui(rd, imm >> 32);
  2244   ori(rd, rd, split_low(imm >> 16));
  2245   dsll(rd, rd, 16);
  2246   ori(rd, rd, split_low(imm));
  2248 #endif
  2250 void MacroAssembler::verify_oop(Register reg, const char* s) {
  2251   if (!VerifyOops) return;
  2252   const char * b = NULL;
  2253   stringStream ss;
  2254   ss.print("verify_oop: %s: %s", reg->name(), s);
  2255   b = code_string(ss.as_string());
  2256 #ifdef _LP64
  2257   pushad();
  2258   move(A1, reg);
  2259   li(A0, (long)b);
  2260   li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
  2261   ld(T9, AT, 0);
  2262   jalr(T9);
  2263   delayed()->nop();
  2264   popad();
  2265 #else
  2266   // Pass register number to verify_oop_subroutine
  2267   sw(T0, SP, - wordSize);
  2268   sw(T1, SP, - 2*wordSize);
  2269   sw(RA, SP, - 3*wordSize);
  2270   sw(A0, SP ,- 4*wordSize);
  2271   sw(A1, SP ,- 5*wordSize);
  2272   sw(AT, SP ,- 6*wordSize);
  2273   sw(T9, SP ,- 7*wordSize);
  2274   addiu(SP, SP, - 7 * wordSize);
  2275   move(A1, reg);
  2276   li(A0, (long)b);
  2277   // call indirectly to solve generation ordering problem
  2278   li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
  2279   lw(T9, AT, 0);
  2280   jalr(T9);
  2281   delayed()->nop();
  2282   lw(T0, SP, 6* wordSize);
  2283   lw(T1, SP, 5* wordSize);
  2284   lw(RA, SP, 4* wordSize);
  2285   lw(A0, SP, 3* wordSize);
  2286   lw(A1, SP, 2* wordSize);
  2287   lw(AT, SP, 1* wordSize);
  2288   lw(T9, SP, 0* wordSize);
  2289   addiu(SP, SP, 7 * wordSize);
  2290 #endif
  2294 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
  2295   if (!VerifyOops) {
  2296     nop();
  2297     return;
  2299   // Pass register number to verify_oop_subroutine
  2300   const char * b = NULL;
  2301   stringStream ss;
  2302   ss.print("verify_oop_addr: %s",  s);
  2303   b = code_string(ss.as_string());
  2305   st_ptr(T0, SP, - wordSize);
  2306   st_ptr(T1, SP, - 2*wordSize);
  2307   st_ptr(RA, SP, - 3*wordSize);
  2308   st_ptr(A0, SP, - 4*wordSize);
  2309   st_ptr(A1, SP, - 5*wordSize);
  2310   st_ptr(AT, SP, - 6*wordSize);
  2311   st_ptr(T9, SP, - 7*wordSize);
  2312   ld_ptr(A1, addr);   // addr may use SP, so load from it before change SP
  2313   addiu(SP, SP, - 7 * wordSize);
  2315   li(A0, (long)b);
  2316   // call indirectly to solve generation ordering problem
  2317   li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
  2318   ld_ptr(T9, AT, 0);
  2319   jalr(T9);
  2320   delayed()->nop();
  2321   ld_ptr(T0, SP, 6* wordSize);
  2322   ld_ptr(T1, SP, 5* wordSize);
  2323   ld_ptr(RA, SP, 4* wordSize);
  2324   ld_ptr(A0, SP, 3* wordSize);
  2325   ld_ptr(A1, SP, 2* wordSize);
  2326   ld_ptr(AT, SP, 1* wordSize);
  2327   ld_ptr(T9, SP, 0* wordSize);
  2328   addiu(SP, SP, 7 * wordSize);
  2331 // used registers :  T0, T1
  2332 void MacroAssembler::verify_oop_subroutine() {
  2333   // RA: ra
  2334   // A0: char* error message
  2335   // A1: oop   object to verify
  2337   Label exit, error;
  2338   // increment counter
  2339   li(T0, (long)StubRoutines::verify_oop_count_addr());
  2340   lw(AT, T0, 0);
  2341 #ifdef _LP64
  2342   daddi(AT, AT, 1);
  2343 #else
  2344   addi(AT, AT, 1);
  2345 #endif
  2346   sw(AT, T0, 0);
  2348   // make sure object is 'reasonable'
  2349   beq(A1, R0, exit);         // if obj is NULL it is ok
  2350   delayed()->nop();
  2352   // Check if the oop is in the right area of memory
  2353   // const int oop_mask = Universe::verify_oop_mask();
  2354   // const int oop_bits = Universe::verify_oop_bits();
  2355   const uintptr_t oop_mask = Universe::verify_oop_mask();
  2356   const uintptr_t oop_bits = Universe::verify_oop_bits();
  2357   li(AT, oop_mask);
  2358   andr(T0, A1, AT);
  2359   li(AT, oop_bits);
  2360   bne(T0, AT, error);
  2361   delayed()->nop();
  2363   // make sure klass is 'reasonable'
  2364   // add for compressedoops
  2365   reinit_heapbase();
  2366   // add for compressedoops
  2367   load_klass(T0, A1);
  2368   beq(T0, R0, error);                        // if klass is NULL it is broken
  2369   delayed()->nop();
  2370   // return if everything seems ok
  2371   bind(exit);
  2373   jr(RA);
  2374   delayed()->nop();
  2376   // handle errors
  2377   bind(error);
  2378   pushad();
  2379 #ifndef _LP64
  2380   addi(SP, SP, (-1) * wordSize);
  2381 #endif
  2382   call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
  2383   delayed()->nop();
  2384 #ifndef _LP64
  2385   addiu(SP, SP, 1 * wordSize);
  2386 #endif
  2387   popad();
  2388   jr(RA);
  2389   delayed()->nop();
  2392 void MacroAssembler::verify_tlab(Register t1, Register t2) {
  2393 #ifdef ASSERT
  2394   assert_different_registers(t1, t2, AT);
  2395   if (UseTLAB && VerifyOops) {
  2396     Label next, ok;
  2398     get_thread(t1);
  2400     ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset()));
  2401     ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset()));
  2402     sltu(AT, t2, AT);
  2403     beq(AT, R0, next);
  2404     delayed()->nop();
  2406     stop("assert(top >= start)");
  2408     bind(next);
  2409     ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset()));
  2410     sltu(AT, AT, t2);
  2411     beq(AT, R0, ok);
  2412     delayed()->nop();
  2414     stop("assert(top <= end)");
  2416     bind(ok);
  2419 #endif
  2422 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
  2423                                                        Register tmp,
  2424                                                        int offset) {
  2425   intptr_t value = *delayed_value_addr;
  2426   if (value != 0)
  2427   return RegisterOrConstant(value + offset);
  2428   AddressLiteral a(delayed_value_addr);
  2429   // load indirectly to solve generation ordering problem
  2430   //movptr(tmp, ExternalAddress((address) delayed_value_addr));
  2431   //ld(tmp, a);
  2432   if (offset != 0)
  2433     daddi(tmp,tmp, offset);
  2435   return RegisterOrConstant(tmp);
  2438 void MacroAssembler::hswap(Register reg) {
  2439   //short
  2440   //andi(reg, reg, 0xffff);
  2441   srl(AT, reg, 8);
  2442   sll(reg, reg, 24);
  2443   sra(reg, reg, 16);
  2444   orr(reg, reg, AT);
  2447 void MacroAssembler::huswap(Register reg) {
  2448 #ifdef _LP64
  2449   dsrl(AT, reg, 8);
  2450   dsll(reg, reg, 24);
  2451   dsrl(reg, reg, 16);
  2452   orr(reg, reg, AT);
  2453   andi(reg, reg, 0xffff);
  2454 #else
  2455   //andi(reg, reg, 0xffff);
  2456   srl(AT, reg, 8);
  2457   sll(reg, reg, 24);
  2458   srl(reg, reg, 16);
  2459   orr(reg, reg, AT);
  2460 #endif
  2463 // something funny to do this will only one more register AT
  2464 // 32 bits
  2465 void MacroAssembler::swap(Register reg) {
  2466   srl(AT, reg, 8);
  2467   sll(reg, reg, 24);
  2468   orr(reg, reg, AT);
  2469   //reg : 4 1 2 3
  2470   srl(AT, AT, 16);
  2471   xorr(AT, AT, reg);
  2472   andi(AT, AT, 0xff);
  2473   //AT : 0 0 0 1^3);
  2474   xorr(reg, reg, AT);
  2475   //reg : 4 1 2 1
  2476   sll(AT, AT, 16);
  2477   xorr(reg, reg, AT);
  2478   //reg : 4 3 2 1
  2481 #ifdef _LP64
  2483 // do 32-bit CAS using MIPS64 lld/scd
  2484 //
  2485 //  cas_int should only compare 32-bits of the memory value.
  2486 //  However, lld/scd will do 64-bit operation, which violates the intention of cas_int.
  2487 //  To simulate a 32-bit atomic operation, the value loaded with LLD should be split into
  2488 //  tow halves, and only the low-32 bits is compared. If equals, the low-32 bits of newval,
  2489 //  plus the high-32 bits or memory value, are stored togethor with SCD.
  2490 //
  2491 //Example:
  2492 //
  2493 //      double d = 3.1415926;
  2494 //      System.err.println("hello" + d);
  2495 //
  2496 //  sun.misc.FloatingDecimal$1.<init>()
  2497 //   |
  2498 //   `- java.util.concurrent.atomic.AtomicInteger::compareAndSet()
  2499 //
  2500 //  38 cas_int [a7a7|J] [a0|I] [a6|I]
  2501 //   a0: 0xffffffffe8ea9f63 pc: 0x55647f3354
  2502 //   a6: 0x4ab325aa
  2503 //
  2504 //again:
  2505 //   0x00000055647f3c5c: lld at, 0x0(a7)                          ; 64-bit load, "0xe8ea9f63"
  2506 //
  2507 //   0x00000055647f3c60: sll t9, at, 0                            ; t9: low-32 bits (sign extended)
  2508 //   0x00000055647f3c64: dsrl32 t8, at, 0                         ; t8: high-32 bits
  2509 //   0x00000055647f3c68: dsll32 t8, t8, 0
  2510 //   0x00000055647f3c6c: bne t9, a0, 0x00000055647f3c9c           ; goto nequal
  2511 //   0x00000055647f3c70: sll zero, zero, 0
  2512 //
  2513 //   0x00000055647f3c74: ori v1, zero, 0xffffffff                 ; v1: low-32 bits of newval (sign unextended)
  2514 //   0x00000055647f3c78: dsll v1, v1, 16                          ; v1 = a6 & 0xFFFFFFFF;
  2515 //   0x00000055647f3c7c: ori v1, v1, 0xffffffff
  2516 //   0x00000055647f3c80: and v1, a6, v1
  2517 //   0x00000055647f3c84: or at, t8, v1
  2518 //   0x00000055647f3c88: scd at, 0x0(a7)
  2519 //   0x00000055647f3c8c: beq at, zero, 0x00000055647f3c5c         ; goto again
  2520 //   0x00000055647f3c90: sll zero, zero, 0
  2521 //   0x00000055647f3c94: beq zero, zero, 0x00000055647f45ac       ; goto done
  2522 //   0x00000055647f3c98: sll zero, zero, 0
  2523 //nequal:
  2524 //   0x00000055647f45a4: dadd a0, t9, zero
  2525 //   0x00000055647f45a8: dadd at, zero, zero
  2526 //done:
  2527 //
  2529 void MacroAssembler::cmpxchg32(Register x_reg, Address dest, Register c_reg) {
  2530   // MIPS64 can use ll/sc for 32-bit atomic memory access
  2531   Label done, again, nequal;
  2533   bind(again);
  2535   if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync();
  2536   ll(AT, dest);
  2537   bne(AT, c_reg, nequal);
  2538   delayed()->nop();
  2540   move(AT, x_reg);
  2541   sc(AT, dest);
  2542   beq(AT, R0, again);
  2543   delayed()->nop();
  2544   b(done);
  2545   delayed()->nop();
  2547   // not xchged
  2548   bind(nequal);
  2549   sync();
  2550   move(c_reg, AT);
  2551   move(AT, R0);
  2553   bind(done);
  2555 #endif  // cmpxchg32
  2557 void MacroAssembler::cmpxchg(Register x_reg, Address dest, Register c_reg) {
  2558   Label done, again, nequal;
  2560   bind(again);
  2561   if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync();
  2562 #ifdef _LP64
  2563   lld(AT, dest);
  2564 #else
  2565   ll(AT, dest);
  2566 #endif
  2567   bne(AT, c_reg, nequal);
  2568   delayed()->nop();
  2570   move(AT, x_reg);
  2571 #ifdef _LP64
  2572   scd(AT, dest);
  2573 #else
  2574   sc(AT, dest);
  2575 #endif
  2576   beq(AT, R0, again);
  2577   delayed()->nop();
  2578   b(done);
  2579   delayed()->nop();
  2581   // not xchged
  2582   bind(nequal);
  2583   sync();
  2584   move(c_reg, AT);
  2585   move(AT, R0);
  2587   bind(done);
  2590 void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) {
  2591   Label done, again, nequal;
  2593   Register x_reg = x_regLo;
  2594   dsll32(x_regHi, x_regHi, 0);
  2595   dsll32(x_regLo, x_regLo, 0);
  2596   dsrl32(x_regLo, x_regLo, 0);
  2597   orr(x_reg, x_regLo, x_regHi);
  2599   Register c_reg = c_regLo;
  2600   dsll32(c_regHi, c_regHi, 0);
  2601   dsll32(c_regLo, c_regLo, 0);
  2602   dsrl32(c_regLo, c_regLo, 0);
  2603   orr(c_reg, c_regLo, c_regHi);
  2605   bind(again);
  2607   if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync();
  2608   lld(AT, dest);
  2609   bne(AT, c_reg, nequal);
  2610   delayed()->nop();
  2612   //move(AT, x_reg);
  2613   dadd(AT, x_reg, R0);
  2614   scd(AT, dest);
  2615   beq(AT, R0, again);
  2616   delayed()->nop();
  2617   b(done);
  2618   delayed()->nop();
  2620   // not xchged
  2621   bind(nequal);
  2622   sync();
  2623   //move(c_reg, AT);
  2624   //move(AT, R0);
  2625   dadd(c_reg, AT, R0);
  2626   dadd(AT, R0, R0);
  2627   bind(done);
  2630 // be sure the three register is different
  2631 void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
  2632   assert_different_registers(tmp, fs, ft);
  2633   div_s(tmp, fs, ft);
  2634   trunc_l_s(tmp, tmp);
  2635   cvt_s_l(tmp, tmp);
  2636   mul_s(tmp, tmp, ft);
  2637   sub_s(fd, fs, tmp);
  2640 // be sure the three register is different
  2641 void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
  2642   assert_different_registers(tmp, fs, ft);
  2643   div_d(tmp, fs, ft);
  2644   trunc_l_d(tmp, tmp);
  2645   cvt_d_l(tmp, tmp);
  2646   mul_d(tmp, tmp, ft);
  2647   sub_d(fd, fs, tmp);
  2650 // Fast_Lock and Fast_Unlock used by C2
  2652 // Because the transitions from emitted code to the runtime
  2653 // monitorenter/exit helper stubs are so slow it's critical that
  2654 // we inline both the stack-locking fast-path and the inflated fast path.
  2655 //
  2656 // See also: cmpFastLock and cmpFastUnlock.
  2657 //
  2658 // What follows is a specialized inline transliteration of the code
  2659 // in slow_enter() and slow_exit().  If we're concerned about I$ bloat
  2660 // another option would be to emit TrySlowEnter and TrySlowExit methods
  2661 // at startup-time.  These methods would accept arguments as
  2662 // (Obj, Self, box, Scratch) and return success-failure
  2663 // indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
  2664 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
  2665 // In practice, however, the # of lock sites is bounded and is usually small.
  2666 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
  2667 // if the processor uses simple bimodal branch predictors keyed by EIP
  2668 // Since the helper routines would be called from multiple synchronization
  2669 // sites.
  2670 //
  2671 // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
  2672 // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
  2673 // to those specialized methods.  That'd give us a mostly platform-independent
  2674 // implementation that the JITs could optimize and inline at their pleasure.
  2675 // Done correctly, the only time we'd need to cross to native could would be
  2676 // to park() or unpark() threads.  We'd also need a few more unsafe operators
  2677 // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
  2678 // (b) explicit barriers or fence operations.
  2679 //
  2680 // TODO:
  2681 //
  2682 // *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
  2683 //    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
  2684 //    Given TLAB allocation, Self is usually manifested in a register, so passing it into
  2685 //    the lock operators would typically be faster than reifying Self.
  2686 //
  2687 // *  Ideally I'd define the primitives as:
  2688 //       fast_lock   (nax Obj, nax box, tmp, nax scr) where box, tmp and scr are KILLED.
  2689 //       fast_unlock (nax Obj, box, nax tmp) where box and tmp are KILLED
  2690 //    Unfortunately ADLC bugs prevent us from expressing the ideal form.
  2691 //    Instead, we're stuck with a rather awkward and brittle register assignments below.
  2692 //    Furthermore the register assignments are overconstrained, possibly resulting in
  2693 //    sub-optimal code near the synchronization site.
  2694 //
  2695 // *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
  2696 //    Alternately, use a better sp-proximity test.
  2697 //
  2698 // *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
  2699 //    Either one is sufficient to uniquely identify a thread.
  2700 //    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
  2701 //
  2702 // *  Intrinsify notify() and notifyAll() for the common cases where the
  2703 //    object is locked by the calling thread but the waitlist is empty.
  2704 //    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
  2705 //
  2706 // *  use jccb and jmpb instead of jcc and jmp to improve code density.
  2707 //    But beware of excessive branch density on AMD Opterons.
  2708 //
  2709 // *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
  2710 //    or failure of the fast-path.  If the fast-path fails then we pass
  2711 //    control to the slow-path, typically in C.  In Fast_Lock and
  2712 //    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
  2713 //    will emit a conditional branch immediately after the node.
  2714 //    So we have branches to branches and lots of ICC.ZF games.
  2715 //    Instead, it might be better to have C2 pass a "FailureLabel"
  2716 //    into Fast_Lock and Fast_Unlock.  In the case of success, control
  2717 //    will drop through the node.  ICC.ZF is undefined at exit.
  2718 //    In the case of failure, the node will branch directly to the
  2719 //    FailureLabel
  2722 // obj: object to lock
  2723 // box: on-stack box address (displaced header location) - KILLED
  2724 // tmp: tmp -- KILLED
  2725 // scr: tmp -- KILLED
  2726 void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg) {
  2728   // Ensure the register assignents are disjoint
  2729   guarantee (objReg != boxReg, "") ;
  2730   guarantee (objReg != tmpReg, "") ;
  2731   guarantee (objReg != scrReg, "") ;
  2732   guarantee (boxReg != tmpReg, "") ;
  2733   guarantee (boxReg != scrReg, "") ;
  2736   block_comment("FastLock");
  2737   if (PrintBiasedLockingStatistics) {
  2738     push(tmpReg);
  2739     atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, AT, tmpReg);
  2740     pop(tmpReg);
  2743   if (EmitSync & 1) {
  2744     move(AT, 0x0);
  2745     return;
  2746   } else
  2747     if (EmitSync & 2) {
  2748       Label DONE_LABEL ;
  2749       if (UseBiasedLocking) {
  2750         // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
  2751         biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
  2754       ld(tmpReg, Address(objReg, 0)) ;          // fetch markword
  2755       ori(tmpReg, tmpReg, 0x1);
  2756       sd(tmpReg, Address(boxReg, 0));           // Anticipate successful CAS
  2758       cmpxchg(boxReg, Address(objReg, 0), tmpReg);          // Updates tmpReg
  2759       bne(AT, R0, DONE_LABEL);
  2760       delayed()->nop();
  2762       // Recursive locking
  2763       dsubu(tmpReg, tmpReg, SP);
  2764       li(AT, (7 - os::vm_page_size() ));
  2765       andr(tmpReg, tmpReg, AT);
  2766       sd(tmpReg, Address(boxReg, 0));
  2767       bind(DONE_LABEL) ;
  2768     } else {
  2769       // Possible cases that we'll encounter in fast_lock
  2770       // ------------------------------------------------
  2771       // * Inflated
  2772       //    -- unlocked
  2773       //    -- Locked
  2774       //       = by self
  2775       //       = by other
  2776       // * biased
  2777       //    -- by Self
  2778       //    -- by other
  2779       // * neutral
  2780       // * stack-locked
  2781       //    -- by self
  2782       //       = sp-proximity test hits
  2783       //       = sp-proximity test generates false-negative
  2784       //    -- by other
  2785       //
  2787       Label IsInflated, DONE_LABEL, PopDone ;
  2789       // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
  2790       // order to reduce the number of conditional branches in the most common cases.
  2791       // Beware -- there's a subtle invariant that fetch of the markword
  2792       // at [FETCH], below, will never observe a biased encoding (*101b).
  2793       // If this invariant is not held we risk exclusion (safety) failure.
  2794       if (UseBiasedLocking && !UseOptoBiasInlining) {
  2795         biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
  2798       ld(tmpReg, Address(objReg, 0)) ;         //Fetch the markword of the object.
  2799       andi(AT, tmpReg, markOopDesc::monitor_value);
  2800       bne(AT, R0, IsInflated);                      // inflated vs stack-locked|neutral|bias
  2801       delayed()->nop();
  2803       // Attempt stack-locking ...
  2804       ori (tmpReg, tmpReg, markOopDesc::unlocked_value);
  2805       sd(tmpReg, Address(boxReg, 0));          // Anticipate successful CAS
  2806       //if (os::is_MP()) {
  2807       //  sync();
  2808       //}
  2810       cmpxchg(boxReg, Address(objReg, 0), tmpReg);           // Updates tmpReg
  2811       //AT == 1: unlocked
  2813       if (PrintBiasedLockingStatistics) {
  2814         Label L;
  2815         beq(AT, R0, L);
  2816         delayed()->nop();
  2817         push(T0);
  2818         push(T1);
  2819         atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
  2820         pop(T1);
  2821         pop(T0);
  2822         bind(L);
  2824       bne(AT, R0, DONE_LABEL);
  2825       delayed()->nop();
  2827       // Recursive locking
  2828       // The object is stack-locked: markword contains stack pointer to BasicLock.
  2829       // Locked by current thread if difference with current SP is less than one page.
  2830       dsubu(tmpReg, tmpReg, SP);
  2831       li(AT, 7 - os::vm_page_size() );
  2832       andr(tmpReg, tmpReg, AT);
  2833       sd(tmpReg, Address(boxReg, 0));
  2834       if (PrintBiasedLockingStatistics) {
  2835         Label L;
  2836         // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++
  2837         bne(tmpReg, R0, L);
  2838         delayed()->nop();
  2839         push(T0);
  2840         push(T1);
  2841         atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
  2842         pop(T1);
  2843         pop(T0);
  2844         bind(L);
  2846       sltiu(AT, tmpReg, 1); // AT = (tmpReg == 0) ? 1 : 0
  2848       b(DONE_LABEL) ;
  2849       delayed()->nop();
  2851       bind(IsInflated) ;
  2852       // The object's monitor m is unlocked iff m->owner == NULL,
  2853       // otherwise m->owner may contain a thread or a stack address.
  2855       // TODO: someday avoid the ST-before-CAS penalty by
  2856       // relocating (deferring) the following ST.
  2857       // We should also think about trying a CAS without having
  2858       // fetched _owner.  If the CAS is successful we may
  2859       // avoid an RTO->RTS upgrade on the $line.
  2860       // Without cast to int32_t a movptr will destroy r10 which is typically obj
  2861       li(AT, (int32_t)intptr_t(markOopDesc::unused_mark()));
  2862       sd(AT, Address(boxReg, 0));
  2864       move(boxReg, tmpReg) ;
  2865       ld(tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
  2866       // if (m->owner != 0) => AT = 0, goto slow path.
  2867       move(AT, R0);
  2868       bne(tmpReg, R0, DONE_LABEL);
  2869       delayed()->nop();
  2871 #ifndef OPT_THREAD
  2872       get_thread (TREG) ;
  2873 #endif
  2874       // It's inflated and appears unlocked
  2875       //if (os::is_MP()) {
  2876       //  sync();
  2877       //}
  2878       cmpxchg(TREG, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), tmpReg) ;
  2879       // Intentional fall-through into DONE_LABEL ...
  2882       // DONE_LABEL is a hot target - we'd really like to place it at the
  2883       // start of cache line by padding with NOPs.
  2884       // See the AMD and Intel software optimization manuals for the
  2885       // most efficient "long" NOP encodings.
  2886       // Unfortunately none of our alignment mechanisms suffice.
  2887       bind(DONE_LABEL);
  2889       // At DONE_LABEL the AT is set as follows ...
  2890       // Fast_Unlock uses the same protocol.
  2891       // AT == 1 -> Success
  2892       // AT == 0 -> Failure - force control through the slow-path
  2894       // Avoid branch-to-branch on AMD processors
  2895       // This appears to be superstition.
  2896       if (EmitSync & 32) nop() ;
  2901 // obj: object to unlock
  2902 // box: box address (displaced header location), killed.
  2903 // tmp: killed tmp; cannot be obj nor box.
  2904 //
  2905 // Some commentary on balanced locking:
  2906 //
  2907 // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
  2908 // Methods that don't have provably balanced locking are forced to run in the
  2909 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
  2910 // The interpreter provides two properties:
  2911 // I1:  At return-time the interpreter automatically and quietly unlocks any
  2912 //      objects acquired the current activation (frame).  Recall that the
  2913 //      interpreter maintains an on-stack list of locks currently held by
  2914 //      a frame.
  2915 // I2:  If a method attempts to unlock an object that is not held by the
  2916 //      the frame the interpreter throws IMSX.
  2917 //
  2918 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
  2919 // B() doesn't have provably balanced locking so it runs in the interpreter.
  2920 // Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
  2921 // is still locked by A().
  2922 //
  2923 // The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
  2924 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
  2925 // should not be unlocked by "normal" java-level locking and vice-versa.  The specification
  2926 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
  2928 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) {
  2930   guarantee (objReg != boxReg, "") ;
  2931   guarantee (objReg != tmpReg, "") ;
  2932   guarantee (boxReg != tmpReg, "") ;
  2934   block_comment("FastUnlock");
  2937   if (EmitSync & 4) {
  2938     // Disable - inhibit all inlining.  Force control through the slow-path
  2939     move(AT, 0x0);
  2940     return;
  2941   } else
  2942     if (EmitSync & 8) {
  2943       Label DONE_LABEL ;
  2944       if (UseBiasedLocking) {
  2945         biased_locking_exit(objReg, tmpReg, DONE_LABEL);
  2947       // classic stack-locking code ...
  2948       ld(tmpReg, Address(boxReg, 0)) ;
  2949       beq(tmpReg, R0, DONE_LABEL) ;
  2950       move(AT, 0x1);  // delay slot
  2952       cmpxchg(tmpReg, Address(objReg, 0), boxReg);
  2953       bind(DONE_LABEL);
  2954     } else {
  2955       Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
  2957       // Critically, the biased locking test must have precedence over
  2958       // and appear before the (box->dhw == 0) recursive stack-lock test.
  2959       if (UseBiasedLocking && !UseOptoBiasInlining) {
  2960         biased_locking_exit(objReg, tmpReg, DONE_LABEL);
  2963       ld(AT, Address(boxReg, 0)) ;            // Examine the displaced header
  2964       beq(AT, R0, DONE_LABEL) ;      // 0 indicates recursive stack-lock
  2965       delayed()->daddiu(AT, R0, 0x1);
  2967       ld(tmpReg, Address(objReg, 0)) ;       // Examine the object's markword
  2968       andi(AT, tmpReg, markOopDesc::monitor_value) ;                     // Inflated?
  2969       beq(AT, R0, Stacked) ;                     // Inflated?
  2970       delayed()->nop();
  2972       bind(Inflated) ;
  2973       // It's inflated.
  2974       // Despite our balanced locking property we still check that m->_owner == Self
  2975       // as java routines or native JNI code called by this thread might
  2976       // have released the lock.
  2977       // Refer to the comments in synchronizer.cpp for how we might encode extra
  2978       // state in _succ so we can avoid fetching EntryList|cxq.
  2979       //
  2980       // I'd like to add more cases in fast_lock() and fast_unlock() --
  2981       // such as recursive enter and exit -- but we have to be wary of
  2982       // I$ bloat, T$ effects and BP$ effects.
  2983       //
  2984       // If there's no contention try a 1-0 exit.  That is, exit without
  2985       // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
  2986       // we detect and recover from the race that the 1-0 exit admits.
  2987       //
  2988       // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
  2989       // before it STs null into _owner, releasing the lock.  Updates
  2990       // to data protected by the critical section must be visible before
  2991       // we drop the lock (and thus before any other thread could acquire
  2992       // the lock and observe the fields protected by the lock).
  2993 #ifndef OPT_THREAD
  2994       get_thread (TREG) ;
  2995 #endif
  2997       // It's inflated
  2998       ld(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
  2999       xorr(boxReg, boxReg, TREG);
  3001       ld(AT, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
  3002       orr(boxReg, boxReg, AT);
  3004       move(AT, R0);
  3005       bne(boxReg, R0, DONE_LABEL);
  3006       delayed()->nop();
  3008       ld(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
  3009       ld(AT, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
  3010       orr(boxReg, boxReg, AT);
  3012       move(AT, R0);
  3013       bne(boxReg, R0, DONE_LABEL);
  3014       delayed()->nop();
  3016       sync();
  3017       sd(R0, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
  3018       move(AT, 0x1);
  3019       b(DONE_LABEL);
  3020       delayed()->nop();
  3022       bind  (Stacked);
  3023       ld(tmpReg, Address(boxReg, 0)) ;
  3024       //if (os::is_MP()) { sync(); }
  3025       cmpxchg(tmpReg, Address(objReg, 0), boxReg);
  3027       if (EmitSync & 65536) {
  3028         bind (CheckSucc);
  3031       bind(DONE_LABEL);
  3033       // Avoid branch to branch on AMD processors
  3034       if (EmitSync & 32768) { nop() ; }
  3038 void MacroAssembler::align(int modulus) {
  3039   while (offset() % modulus != 0) nop();
  3043 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
  3044   //Unimplemented();
  3047 #ifdef _LP64
  3048 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
  3049 Register caller_saved_registers_except_v0[] = {AT, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
  3051 //In MIPS64, F0~23 are all caller-saved registers
  3052 FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13};
  3053 #else
  3054 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, T4, T5, T6, T7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
  3055 Register caller_saved_registers_except_v0[] = {AT, V1, A0, A1, A2, A3, T4, T5, T6, T7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
  3057 Register caller_saved_fpu_registers[] = {};
  3058 #endif
  3060 // We preserve all caller-saved register
  3061 void  MacroAssembler::pushad(){
  3062   int i;
  3064   // Fixed-point registers
  3065   int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
  3066   daddi(SP, SP, -1 * len * wordSize);
  3067   for (i = 0; i < len; i++)
  3069 #ifdef _LP64
  3070     sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
  3071 #else
  3072     sw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
  3073 #endif
  3076   // Floating-point registers
  3077   len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
  3078   daddi(SP, SP, -1 * len * wordSize);
  3079   for (i = 0; i < len; i++)
  3081 #ifdef _LP64
  3082     sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3083 #else
  3084     swc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3085 #endif
  3087 };
  3089 void  MacroAssembler::popad(){
  3090   int i;
  3092   // Floating-point registers
  3093   int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
  3094   for (i = 0; i < len; i++)
  3096 #ifdef _LP64
  3097     ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3098 #else
  3099     lwc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3100 #endif
  3102   daddi(SP, SP, len * wordSize);
  3104   // Fixed-point registers
  3105   len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
  3106   for (i = 0; i < len; i++)
  3108 #ifdef _LP64
  3109     ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
  3110 #else
  3111     lw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
  3112 #endif
  3114   daddi(SP, SP, len * wordSize);
  3115 };
  3117 // We preserve all caller-saved register except V0
  3118 void MacroAssembler::pushad_except_v0() {
  3119   int i;
  3121   // Fixed-point registers
  3122   int len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]);
  3123   daddi(SP, SP, -1 * len * wordSize);
  3124   for (i = 0; i < len; i++) {
  3125 #ifdef _LP64
  3126     sd(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
  3127 #else
  3128     sw(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
  3129 #endif
  3132   // Floating-point registers
  3133   len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
  3134   daddi(SP, SP, -1 * len * wordSize);
  3135   for (i = 0; i < len; i++) {
  3136 #ifdef _LP64
  3137     sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3138 #else
  3139     swc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3140 #endif
  3144 void MacroAssembler::popad_except_v0() {
  3145   int i;
  3147   // Floating-point registers
  3148   int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
  3149   for (i = 0; i < len; i++) {
  3150 #ifdef _LP64
  3151     ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3152 #else
  3153     lwc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3154 #endif
  3156   daddi(SP, SP, len * wordSize);
  3158   // Fixed-point registers
  3159   len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]);
  3160   for (i = 0; i < len; i++) {
  3161 #ifdef _LP64
  3162     ld(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
  3163 #else
  3164     lw(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
  3165 #endif
  3167   daddi(SP, SP, len * wordSize);
  3170 void MacroAssembler::push2(Register reg1, Register reg2) {
  3171 #ifdef _LP64
  3172   daddi(SP, SP, -16);
  3173   sd(reg2, SP, 0);
  3174   sd(reg1, SP, 8);
  3175 #else
  3176   addi(SP, SP, -8);
  3177   sw(reg2, SP, 0);
  3178   sw(reg1, SP, 4);
  3179 #endif
  3182 void MacroAssembler::pop2(Register reg1, Register reg2) {
  3183 #ifdef _LP64
  3184   ld(reg1, SP, 0);
  3185   ld(reg2, SP, 8);
  3186   daddi(SP, SP, 16);
  3187 #else
  3188   lw(reg1, SP, 0);
  3189   lw(reg2, SP, 4);
  3190   addi(SP, SP, 8);
  3191 #endif
  3194 // for UseCompressedOops Option
  3195 void MacroAssembler::load_klass(Register dst, Register src) {
  3196 #ifdef _LP64
  3197   if(UseCompressedClassPointers){
  3198     lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
  3199     decode_klass_not_null(dst);
  3200   } else
  3201 #endif
  3202   ld(dst, src, oopDesc::klass_offset_in_bytes());
  3205 void MacroAssembler::store_klass(Register dst, Register src) {
  3206 #ifdef _LP64
  3207   if(UseCompressedClassPointers){
  3208     encode_klass_not_null(src);
  3209     sw(src, dst, oopDesc::klass_offset_in_bytes());
  3210   } else {
  3211 #endif
  3212     sd(src, dst, oopDesc::klass_offset_in_bytes());
  3216 void MacroAssembler::load_prototype_header(Register dst, Register src) {
  3217   load_klass(dst, src);
  3218   ld(dst, Address(dst, Klass::prototype_header_offset()));
  3221 #ifdef _LP64
  3222 void MacroAssembler::store_klass_gap(Register dst, Register src) {
  3223   if (UseCompressedClassPointers) {
  3224     sw(src, dst, oopDesc::klass_gap_offset_in_bytes());
  3228 void MacroAssembler::load_heap_oop(Register dst, Address src) {
  3229   if(UseCompressedOops){
  3230     lwu(dst, src);
  3231     decode_heap_oop(dst);
  3232   } else {
  3233     ld(dst, src);
  3237 void MacroAssembler::store_heap_oop(Address dst, Register src){
  3238   if(UseCompressedOops){
  3239     assert(!dst.uses(src), "not enough registers");
  3240     encode_heap_oop(src);
  3241     sw(src, dst);
  3242   } else {
  3243     sd(src, dst);
  3247 void MacroAssembler::store_heap_oop_null(Address dst){
  3248   if(UseCompressedOops){
  3249     sw(R0, dst);
  3250   } else {
  3251     sd(R0, dst);
  3255 #ifdef ASSERT
  3256 void MacroAssembler::verify_heapbase(const char* msg) {
  3257   assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed");
  3258   assert (Universe::heap() != NULL, "java heap should be initialized");
  3260 #endif
  3263 // Algorithm must match oop.inline.hpp encode_heap_oop.
  3264 void MacroAssembler::encode_heap_oop(Register r) {
  3265 #ifdef ASSERT
  3266   verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
  3267 #endif
  3268   verify_oop(r, "broken oop in encode_heap_oop");
  3269   if (Universe::narrow_oop_base() == NULL) {
  3270     if (Universe::narrow_oop_shift() != 0) {
  3271       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3272       shr(r, LogMinObjAlignmentInBytes);
  3274     return;
  3277   movz(r, S5_heapbase, r);
  3278   dsub(r, r, S5_heapbase);
  3279   if (Universe::narrow_oop_shift() != 0) {
  3280     assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3281     shr(r, LogMinObjAlignmentInBytes);
  3285 void MacroAssembler::encode_heap_oop(Register dst, Register src) {
  3286 #ifdef ASSERT
  3287   verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
  3288 #endif
  3289   verify_oop(src, "broken oop in encode_heap_oop");
  3290   if (Universe::narrow_oop_base() == NULL) {
  3291     if (Universe::narrow_oop_shift() != 0) {
  3292       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3293       dsrl(dst, src, LogMinObjAlignmentInBytes);
  3294     } else {
  3295       if (dst != src) move(dst, src);
  3297   } else {
  3298     if (dst == src) {
  3299       movz(dst, S5_heapbase, dst);
  3300       dsub(dst, dst, S5_heapbase);
  3301       if (Universe::narrow_oop_shift() != 0) {
  3302         assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3303         shr(dst, LogMinObjAlignmentInBytes);
  3305     } else {
  3306       dsub(dst, src, S5_heapbase);
  3307       if (Universe::narrow_oop_shift() != 0) {
  3308         assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3309         shr(dst, LogMinObjAlignmentInBytes);
  3311       movz(dst, R0, src);
  3316 void MacroAssembler::encode_heap_oop_not_null(Register r) {
  3317   assert (UseCompressedOops, "should be compressed");
  3318 #ifdef ASSERT
  3319   if (CheckCompressedOops) {
  3320     Label ok;
  3321     bne(r, R0, ok);
  3322     delayed()->nop();
  3323     stop("null oop passed to encode_heap_oop_not_null");
  3324     bind(ok);
  3326 #endif
  3327   verify_oop(r, "broken oop in encode_heap_oop_not_null");
  3328   if (Universe::narrow_oop_base() != NULL) {
  3329     dsub(r, r, S5_heapbase);
  3331   if (Universe::narrow_oop_shift() != 0) {
  3332     assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3333     shr(r, LogMinObjAlignmentInBytes);
  3338 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
  3339   assert (UseCompressedOops, "should be compressed");
  3340 #ifdef ASSERT
  3341   if (CheckCompressedOops) {
  3342     Label ok;
  3343     bne(src, R0, ok);
  3344     delayed()->nop();
  3345     stop("null oop passed to encode_heap_oop_not_null2");
  3346     bind(ok);
  3348 #endif
  3349   verify_oop(src, "broken oop in encode_heap_oop_not_null2");
  3351   if (Universe::narrow_oop_base() != NULL) {
  3352     dsub(dst, src, S5_heapbase);
  3353     if (Universe::narrow_oop_shift() != 0) {
  3354       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3355       shr(dst, LogMinObjAlignmentInBytes);
  3357   } else {
  3358     if (Universe::narrow_oop_shift() != 0) {
  3359       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3360       dsrl(dst, src, LogMinObjAlignmentInBytes);
  3361     } else {
  3362       if (dst != src) move(dst, src);
  3367 void  MacroAssembler::decode_heap_oop(Register r) {
  3368 #ifdef ASSERT
  3369   verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
  3370 #endif
  3371   if (Universe::narrow_oop_base() == NULL) {
  3372     if (Universe::narrow_oop_shift() != 0) {
  3373       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3374       shl(r, LogMinObjAlignmentInBytes);
  3376   } else {
  3377     move(AT, r);
  3378     if (Universe::narrow_oop_shift() != 0) {
  3379       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3380       shl(r, LogMinObjAlignmentInBytes);
  3382     dadd(r, r, S5_heapbase);
  3383     movz(r, R0, AT);
  3385   verify_oop(r, "broken oop in decode_heap_oop");
  3388 void  MacroAssembler::decode_heap_oop(Register dst, Register src) {
  3389 #ifdef ASSERT
  3390   verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
  3391 #endif
  3392   if (Universe::narrow_oop_base() == NULL) {
  3393     if (Universe::narrow_oop_shift() != 0) {
  3394       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3395       if (dst != src) nop(); // DON'T DELETE THIS GUY.
  3396       dsll(dst, src, LogMinObjAlignmentInBytes);
  3397     } else {
  3398       if (dst != src) move(dst, src);
  3400   } else {
  3401     if (dst == src) {
  3402       move(AT, dst);
  3403       if (Universe::narrow_oop_shift() != 0) {
  3404         assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3405         shl(dst, LogMinObjAlignmentInBytes);
  3407       dadd(dst, dst, S5_heapbase);
  3408       movz(dst, R0, AT);
  3409     } else {
  3410       if (Universe::narrow_oop_shift() != 0) {
  3411         assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3412         dsll(dst, src, LogMinObjAlignmentInBytes);
  3413         daddu(dst, dst, S5_heapbase);
  3414       } else {
  3415         daddu(dst, src, S5_heapbase);
  3417       movz(dst, R0, src);
  3420   verify_oop(dst, "broken oop in decode_heap_oop");
  3423 void  MacroAssembler::decode_heap_oop_not_null(Register r) {
  3424   // Note: it will change flags
  3425   assert (UseCompressedOops, "should only be used for compressed headers");
  3426   assert (Universe::heap() != NULL, "java heap should be initialized");
  3427   // Cannot assert, unverified entry point counts instructions (see .ad file)
  3428   // vtableStubs also counts instructions in pd_code_size_limit.
  3429   // Also do not verify_oop as this is called by verify_oop.
  3430   if (Universe::narrow_oop_shift() != 0) {
  3431     assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3432     shl(r, LogMinObjAlignmentInBytes);
  3433     if (Universe::narrow_oop_base() != NULL) {
  3434       daddu(r, r, S5_heapbase);
  3436   } else {
  3437     assert (Universe::narrow_oop_base() == NULL, "sanity");
  3441 void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
  3442   assert (UseCompressedOops, "should only be used for compressed headers");
  3443   assert (Universe::heap() != NULL, "java heap should be initialized");
  3445   // Cannot assert, unverified entry point counts instructions (see .ad file)
  3446   // vtableStubs also counts instructions in pd_code_size_limit.
  3447   // Also do not verify_oop as this is called by verify_oop.
  3448   //lea(dst, Address(S5_heapbase, src, Address::times_8, 0));
  3449   if (Universe::narrow_oop_shift() != 0) {
  3450     assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3451     if (LogMinObjAlignmentInBytes == Address::times_8) {
  3452       dsll(dst, src, LogMinObjAlignmentInBytes);
  3453       daddu(dst, dst, S5_heapbase);
  3454     } else {
  3455       dsll(dst, src, LogMinObjAlignmentInBytes);
  3456       if (Universe::narrow_oop_base() != NULL) {
  3457         daddu(dst, dst, S5_heapbase);
  3460   } else {
  3461     assert (Universe::narrow_oop_base() == NULL, "sanity");
  3462     if (dst != src) {
  3463       move(dst, src);
  3468 void MacroAssembler::encode_klass_not_null(Register r) {
  3469   if (Universe::narrow_klass_base() != NULL) {
  3470     assert(r != AT, "Encoding a klass in AT");
  3471     set64(AT, (int64_t)Universe::narrow_klass_base());
  3472     dsub(r, r, AT);
  3474   if (Universe::narrow_klass_shift() != 0) {
  3475     assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  3476     shr(r, LogKlassAlignmentInBytes);
  3480 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
  3481   if (dst == src) {
  3482     encode_klass_not_null(src);
  3483   } else {
  3484     if (Universe::narrow_klass_base() != NULL) {
  3485       set64(dst, (int64_t)Universe::narrow_klass_base());
  3486       dsub(dst, src, dst);
  3487       if (Universe::narrow_klass_shift() != 0) {
  3488         assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  3489         shr(dst, LogKlassAlignmentInBytes);
  3491     } else {
  3492       if (Universe::narrow_klass_shift() != 0) {
  3493         assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  3494         dsrl(dst, src, LogKlassAlignmentInBytes);
  3495       } else {
  3496         move(dst, src);
  3502 // Function instr_size_for_decode_klass_not_null() counts the instructions
  3503 // generated by decode_klass_not_null(register r) and reinit_heapbase(),
  3504 // when (Universe::heap() != NULL).  Hence, if the instructions they
  3505 // generate change, then this method needs to be updated.
  3506 int MacroAssembler::instr_size_for_decode_klass_not_null() {
  3507   assert (UseCompressedClassPointers, "only for compressed klass ptrs");
  3508   if (Universe::narrow_klass_base() != NULL) {
  3509     // mov64 + addq + shlq? + mov64  (for reinit_heapbase()).
  3510     return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10);
  3511   } else {
  3512     // longest load decode klass function, mov64, leaq
  3513     return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1);
  3517 void  MacroAssembler::decode_klass_not_null(Register r) {
  3518   assert (UseCompressedClassPointers, "should only be used for compressed headers");
  3519   assert(r != AT, "Decoding a klass in AT");
  3520   // Cannot assert, unverified entry point counts instructions (see .ad file)
  3521   // vtableStubs also counts instructions in pd_code_size_limit.
  3522   // Also do not verify_oop as this is called by verify_oop.
  3523   if (Universe::narrow_klass_shift() != 0) {
  3524     assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  3525     shl(r, LogKlassAlignmentInBytes);
  3527   if (Universe::narrow_klass_base() != NULL) {
  3528     set64(AT, (int64_t)Universe::narrow_klass_base());
  3529     daddu(r, r, AT);
  3530     //Not neccessary for MIPS at all.
  3531     //reinit_heapbase();
  3535 void  MacroAssembler::decode_klass_not_null(Register dst, Register src) {
  3536   assert (UseCompressedClassPointers, "should only be used for compressed headers");
  3538   if (dst == src) {
  3539     decode_klass_not_null(dst);
  3540   } else {
  3541     // Cannot assert, unverified entry point counts instructions (see .ad file)
  3542     // vtableStubs also counts instructions in pd_code_size_limit.
  3543     // Also do not verify_oop as this is called by verify_oop.
  3544     set64(dst, (int64_t)Universe::narrow_klass_base());
  3545     if (Universe::narrow_klass_shift() != 0) {
  3546       assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  3547       assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
  3548       dsll(AT, src, Address::times_8);
  3549       daddu(dst, dst, AT);
  3550     } else {
  3551       daddu(dst, src, dst);
  3556 void MacroAssembler::incrementl(Register reg, int value) {
  3557   if (value == min_jint) {
  3558      move(AT, value);
  3559      LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
  3560      return;
  3562   if (value <  0) { decrementl(reg, -value); return; }
  3563   if (value == 0) {                        ; return; }
  3565   if(Assembler::is_simm16(value)) {
  3566      NOT_LP64(addiu(reg, reg, value));
  3567      LP64_ONLY(move(AT, value); addu32(reg, reg, AT));
  3568   } else {
  3569      move(AT, value);
  3570      LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
  3574 void MacroAssembler::decrementl(Register reg, int value) {
  3575   if (value == min_jint) {
  3576      move(AT, value);
  3577      LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
  3578      return;
  3580   if (value <  0) { incrementl(reg, -value); return; }
  3581   if (value == 0) {                        ; return; }
  3583   if (Assembler::is_simm16(value)) {
  3584      NOT_LP64(addiu(reg, reg, -value));
  3585      LP64_ONLY(move(AT, value); subu32(reg, reg, AT));
  3586   } else {
  3587      move(AT, value);
  3588      LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
  3592 void MacroAssembler::reinit_heapbase() {
  3593   if (UseCompressedOops || UseCompressedClassPointers) {
  3594     if (Universe::heap() != NULL) {
  3595       if (Universe::narrow_oop_base() == NULL) {
  3596         move(S5_heapbase, R0);
  3597       } else {
  3598         set64(S5_heapbase, (int64_t)Universe::narrow_ptrs_base());
  3600     } else {
  3601       set64(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr());
  3602       ld(S5_heapbase, S5_heapbase, 0);
  3606 #endif // _LP64
  3608 void MacroAssembler::check_klass_subtype(Register sub_klass,
  3609                            Register super_klass,
  3610                            Register temp_reg,
  3611                            Label& L_success) {
  3612 //implement ind   gen_subtype_check
  3613   Label L_failure;
  3614   check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_failure, NULL);
  3615   check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
  3616   bind(L_failure);
  3619 SkipIfEqual::SkipIfEqual(
  3620     MacroAssembler* masm, const bool* flag_addr, bool value) {
  3621   _masm = masm;
  3622   _masm->li(AT, (address)flag_addr);
  3623   _masm->lb(AT, AT, 0);
  3624   _masm->addi(AT, AT, -value);
  3625   _masm->beq(AT, R0, _label);
  3626   _masm->delayed()->nop();
  3628 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
  3629                                                    Register super_klass,
  3630                                                    Register temp_reg,
  3631                                                    Label* L_success,
  3632                                                    Label* L_failure,
  3633                                                    Label* L_slow_path,
  3634                                         RegisterOrConstant super_check_offset) {
  3635   assert_different_registers(sub_klass, super_klass, temp_reg);
  3636   bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
  3637   if (super_check_offset.is_register()) {
  3638     assert_different_registers(sub_klass, super_klass,
  3639                                super_check_offset.as_register());
  3640   } else if (must_load_sco) {
  3641     assert(temp_reg != noreg, "supply either a temp or a register offset");
  3644   Label L_fallthrough;
  3645   int label_nulls = 0;
  3646   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
  3647   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
  3648   if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
  3649   assert(label_nulls <= 1, "at most one NULL in the batch");
  3651   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
  3652   int sco_offset = in_bytes(Klass::super_check_offset_offset());
  3653   // If the pointers are equal, we are done (e.g., String[] elements).
  3654   // This self-check enables sharing of secondary supertype arrays among
  3655   // non-primary types such as array-of-interface.  Otherwise, each such
  3656   // type would need its own customized SSA.
  3657   // We move this check to the front of the fast path because many
  3658   // type checks are in fact trivially successful in this manner,
  3659   // so we get a nicely predicted branch right at the start of the check.
  3660   beq(sub_klass, super_klass, *L_success);
  3661   delayed()->nop();
  3662   // Check the supertype display:
  3663   if (must_load_sco) {
  3664     // Positive movl does right thing on LP64.
  3665     lwu(temp_reg, super_klass, sco_offset);
  3666     super_check_offset = RegisterOrConstant(temp_reg);
  3668   dsll(AT, super_check_offset.register_or_noreg(), Address::times_1);
  3669   daddu(AT, sub_klass, AT);
  3670   ld(AT, AT, super_check_offset.constant_or_zero()*Address::times_1);
  3672   // This check has worked decisively for primary supers.
  3673   // Secondary supers are sought in the super_cache ('super_cache_addr').
  3674   // (Secondary supers are interfaces and very deeply nested subtypes.)
  3675   // This works in the same check above because of a tricky aliasing
  3676   // between the super_cache and the primary super display elements.
  3677   // (The 'super_check_addr' can address either, as the case requires.)
  3678   // Note that the cache is updated below if it does not help us find
  3679   // what we need immediately.
  3680   // So if it was a primary super, we can just fail immediately.
  3681   // Otherwise, it's the slow path for us (no success at this point).
  3683   if (super_check_offset.is_register()) {
  3684     beq(super_klass, AT, *L_success);
  3685     delayed()->nop();
  3686     addi(AT, super_check_offset.as_register(), -sc_offset);
  3687     if (L_failure == &L_fallthrough) {
  3688       beq(AT, R0, *L_slow_path);
  3689       delayed()->nop();
  3690     } else {
  3691       bne_far(AT, R0, *L_failure);
  3692       delayed()->nop();
  3693       b(*L_slow_path);
  3694       delayed()->nop();
  3696   } else if (super_check_offset.as_constant() == sc_offset) {
  3697     // Need a slow path; fast failure is impossible.
  3698     if (L_slow_path == &L_fallthrough) {
  3699       beq(super_klass, AT, *L_success);
  3700       delayed()->nop();
  3701     } else {
  3702       bne(super_klass, AT, *L_slow_path);
  3703       delayed()->nop();
  3704       b(*L_success);
  3705       delayed()->nop();
  3707   } else {
  3708     // No slow path; it's a fast decision.
  3709     if (L_failure == &L_fallthrough) {
  3710       beq(super_klass, AT, *L_success);
  3711       delayed()->nop();
  3712     } else {
  3713       bne_far(super_klass, AT, *L_failure);
  3714       delayed()->nop();
  3715       b(*L_success);
  3716       delayed()->nop();
  3720   bind(L_fallthrough);
  3725 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
  3726                                                    Register super_klass,
  3727                                                    Register temp_reg,
  3728                                                    Register temp2_reg,
  3729                                                    Label* L_success,
  3730                                                    Label* L_failure,
  3731                                                    bool set_cond_codes) {
  3732   if (temp2_reg == noreg)
  3733     temp2_reg = TSR;
  3734   assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
  3735 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
  3737   Label L_fallthrough;
  3738   int label_nulls = 0;
  3739   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
  3740   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
  3741   assert(label_nulls <= 1, "at most one NULL in the batch");
  3743   // a couple of useful fields in sub_klass:
  3744   int ss_offset = in_bytes(Klass::secondary_supers_offset());
  3745   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
  3746   Address secondary_supers_addr(sub_klass, ss_offset);
  3747   Address super_cache_addr(     sub_klass, sc_offset);
  3749   // Do a linear scan of the secondary super-klass chain.
  3750   // This code is rarely used, so simplicity is a virtue here.
  3751   // The repne_scan instruction uses fixed registers, which we must spill.
  3752   // Don't worry too much about pre-existing connections with the input regs.
  3754 #ifndef PRODUCT
  3755   int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
  3756   ExternalAddress pst_counter_addr((address) pst_counter);
  3757   NOT_LP64(  incrementl(pst_counter_addr) );
  3758 #endif //PRODUCT
  3760   // We will consult the secondary-super array.
  3761   ld(temp_reg, secondary_supers_addr);
  3762   // Load the array length.  (Positive movl does right thing on LP64.)
  3763   lw(temp2_reg, Address(temp_reg, Array<Klass*>::length_offset_in_bytes()));
  3764   // Skip to start of data.
  3765   daddiu(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes());
  3767   // OpenJDK8 never compresses klass pointers in secondary-super array.
  3768   Label Loop, subtype;
  3769   bind(Loop);
  3770   beq(temp2_reg, R0, *L_failure);
  3771   delayed()->nop();
  3772   ld(AT, temp_reg, 0);
  3773   beq(AT, super_klass, subtype);
  3774   delayed()->daddi(temp_reg, temp_reg, 1 * wordSize);
  3775   b(Loop);
  3776   delayed()->daddi(temp2_reg, temp2_reg, -1);
  3778   bind(subtype);
  3779   sd(super_klass, super_cache_addr);
  3780   if (L_success != &L_fallthrough) {
  3781     b(*L_success);
  3782     delayed()->nop();
  3785   // Success.  Cache the super we found and proceed in triumph.
  3786 #undef IS_A_TEMP
  3788   bind(L_fallthrough);
  3791 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
  3792   ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
  3793   sd(R0, Address(java_thread, JavaThread::vm_result_offset()));
  3794   verify_oop(oop_result, "broken oop in call_VM_base");
  3797 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
  3798   ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
  3799   sd(R0, Address(java_thread, JavaThread::vm_result_2_offset()));
  3802 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
  3803                                          int extra_slot_offset) {
  3804   // cf. TemplateTable::prepare_invoke(), if (load_receiver).
  3805   int stackElementSize = Interpreter::stackElementSize;
  3806   int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
  3807 #ifdef ASSERT
  3808   int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
  3809   assert(offset1 - offset == stackElementSize, "correct arithmetic");
  3810 #endif
  3811   Register             scale_reg    = NOREG;
  3812   Address::ScaleFactor scale_factor = Address::no_scale;
  3813   if (arg_slot.is_constant()) {
  3814     offset += arg_slot.as_constant() * stackElementSize;
  3815   } else {
  3816     scale_reg    = arg_slot.as_register();
  3817     scale_factor = Address::times_8;
  3819   // We don't push RA on stack in prepare_invoke.
  3820   //  offset += wordSize;           // return PC is on stack
  3821   if(scale_reg==NOREG) return Address(SP, offset);
  3822   else {
  3823   dsll(scale_reg, scale_reg, scale_factor);
  3824   daddu(scale_reg, SP, scale_reg);
  3825   return Address(scale_reg, offset);
  3829 SkipIfEqual::~SkipIfEqual() {
  3830   _masm->bind(_label);
  3833 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
  3834   switch (size_in_bytes) {
  3835 #ifndef _LP64
  3836   case  8:
  3837     assert(dst2 != noreg, "second dest register required");
  3838     lw(dst,  src);
  3839     lw(dst2, src.plus_disp(BytesPerInt));
  3840     break;
  3841 #else
  3842   case  8:  ld(dst, src); break;
  3843 #endif
  3844   case  4:  lw(dst, src); break;
  3845   case  2:  is_signed ? lh(dst, src) : lhu(dst, src); break;
  3846   case  1:  is_signed ? lb( dst, src) : lbu( dst, src); break;
  3847   default:  ShouldNotReachHere();
  3851 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
  3852   switch (size_in_bytes) {
  3853 #ifndef _LP64
  3854   case  8:
  3855     assert(src2 != noreg, "second source register required");
  3856     sw(src, dst);
  3857     sw(src2, dst.plus_disp(BytesPerInt));
  3858     break;
  3859 #else
  3860   case  8:  sd(src, dst); break;
  3861 #endif
  3862   case  4:  sw(src, dst); break;
  3863   case  2:  sh(src, dst); break;
  3864   case  1:  sb(src, dst); break;
  3865   default:  ShouldNotReachHere();
  3869 // Look up the method for a megamorphic invokeinterface call.
  3870 // The target method is determined by <intf_klass, itable_index>.
  3871 // The receiver klass is in recv_klass.
  3872 // On success, the result will be in method_result, and execution falls through.
  3873 // On failure, execution transfers to the given label.
  3874 void MacroAssembler::lookup_interface_method(Register recv_klass,
  3875                                              Register intf_klass,
  3876                                              RegisterOrConstant itable_index,
  3877                                              Register method_result,
  3878                                              Register scan_temp,
  3879                                              Label& L_no_such_interface,
  3880                                              bool return_method) {
  3881   assert_different_registers(recv_klass, intf_klass, scan_temp, AT);
  3882   assert_different_registers(method_result, intf_klass, scan_temp, AT);
  3883   assert(recv_klass != method_result || !return_method,
  3884          "recv_klass can be destroyed when method isn't needed");
  3886   assert(itable_index.is_constant() || itable_index.as_register() == method_result,
  3887          "caller must use same register for non-constant itable index as for method");
  3889   // Compute start of first itableOffsetEntry (which is at the end of the vtable)
  3890   int vtable_base = InstanceKlass::vtable_start_offset() * wordSize;
  3891   int itentry_off = itableMethodEntry::method_offset_in_bytes();
  3892   int scan_step   = itableOffsetEntry::size() * wordSize;
  3893   int vte_size    = vtableEntry::size() * wordSize;
  3894   Address::ScaleFactor times_vte_scale = Address::times_ptr;
  3895   assert(vte_size == wordSize, "else adjust times_vte_scale");
  3897   lw(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize));
  3899   // %%% Could store the aligned, prescaled offset in the klassoop.
  3900   dsll(scan_temp, scan_temp, times_vte_scale);
  3901   daddu(scan_temp, recv_klass, scan_temp);
  3902   daddiu(scan_temp, scan_temp, vtable_base);
  3903   if (HeapWordsPerLong > 1) {
  3904     // Round up to align_object_offset boundary
  3905     // see code for InstanceKlass::start_of_itable!
  3906     round_to(scan_temp, BytesPerLong);
  3909   if (return_method) {
  3910     // Adjust recv_klass by scaled itable_index, so we can free itable_index.
  3911     assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
  3912     if (itable_index.is_constant()) {
  3913       set64(AT, (int)itable_index.is_constant());
  3914       dsll(AT, AT, (int)Address::times_ptr);
  3915     } else {
  3916       dsll(AT, itable_index.as_register(), (int)Address::times_ptr);
  3918     daddu(AT, AT, recv_klass);
  3919     daddiu(recv_klass, AT, itentry_off);
  3922   Label search, found_method;
  3924   for (int peel = 1; peel >= 0; peel--) {
  3925     ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
  3927     if (peel) {
  3928       beq(intf_klass, method_result, found_method);
  3929       delayed()->nop();
  3930     } else {
  3931       bne(intf_klass, method_result, search);
  3932       delayed()->nop();
  3933       // (invert the test to fall through to found_method...)
  3936     if (!peel)  break;
  3938     bind(search);
  3940     // Check that the previous entry is non-null.  A null entry means that
  3941     // the receiver class doesn't implement the interface, and wasn't the
  3942     // same as when the caller was compiled.
  3943     beq(method_result, R0, L_no_such_interface);
  3944     delayed()->nop();
  3945     daddiu(scan_temp, scan_temp, scan_step);
  3948   bind(found_method);
  3950   if (return_method) {
  3951     // Got a hit.
  3952     lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
  3953     if (UseLEXT1) {
  3954       gsldx(method_result, recv_klass, scan_temp, 0);
  3955     } else {
  3956       daddu(AT, recv_klass, scan_temp);
  3957       ld(method_result, AT, 0);
  3962 // virtual method calling
  3963 void MacroAssembler::lookup_virtual_method(Register recv_klass,
  3964                                            RegisterOrConstant vtable_index,
  3965                                            Register method_result) {
  3966   Register tmp = GP;
  3967   push(tmp);
  3969   if (vtable_index.is_constant()) {
  3970     assert_different_registers(recv_klass, method_result, tmp);
  3971   } else {
  3972     assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp);
  3974   const int base = InstanceKlass::vtable_start_offset() * wordSize;
  3975   assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
  3976   if (vtable_index.is_constant()) {
  3977     set64(AT, vtable_index.as_constant());
  3978     dsll(AT, AT, (int)Address::times_ptr);
  3979   } else {
  3980     dsll(AT, vtable_index.as_register(), (int)Address::times_ptr);
  3982   set64(tmp, base + vtableEntry::method_offset_in_bytes());
  3983   daddu(tmp, tmp, AT);
  3984   daddu(tmp, tmp, recv_klass);
  3985   ld(method_result, tmp, 0);
  3987   pop(tmp);
  3990 void MacroAssembler::store_for_type_by_register(Register src_reg, Register tmp_reg, int disp, BasicType type, bool wide) {
  3991   switch (type) {
  3992     case T_LONG:
  3993       st_ptr(src_reg, tmp_reg, disp);
  3994       break;
  3995     case T_ARRAY:
  3996     case T_OBJECT:
  3997       if (UseCompressedOops && !wide) {
  3998         sw(src_reg, tmp_reg, disp);
  3999       } else {
  4000         st_ptr(src_reg, tmp_reg, disp);
  4002       break;
  4003     case T_ADDRESS:
  4004       st_ptr(src_reg, tmp_reg, disp);
  4005       break;
  4006     case T_INT:
  4007       sw(src_reg, tmp_reg, disp);
  4008       break;
  4009     case T_CHAR:
  4010     case T_SHORT:
  4011       sh(src_reg, tmp_reg, disp);
  4012       break;
  4013     case T_BYTE:
  4014     case T_BOOLEAN:
  4015       sb(src_reg, tmp_reg, disp);
  4016       break;
  4017     default:
  4018       ShouldNotReachHere();
  4022 void MacroAssembler::store_for_type(Register src_reg, Address addr, BasicType type, bool wide) {
  4023   Register tmp_reg = T9;
  4024   Register index_reg = addr.index();
  4025   if (index_reg == NOREG) {
  4026     tmp_reg = NOREG;
  4029   int scale = addr.scale();
  4030   if (tmp_reg != NOREG && scale >= 0) {
  4031     dsll(tmp_reg, index_reg, scale);
  4034   int disp = addr.disp();
  4035   bool disp_is_simm16 = true;
  4036   if (!Assembler::is_simm16(disp)) {
  4037     disp_is_simm16 = false;
  4040   Register base_reg = addr.base();
  4041   if (tmp_reg != NOREG) {
  4042     assert_different_registers(tmp_reg, base_reg, index_reg);
  4045   if (tmp_reg != NOREG) {
  4046     daddu(tmp_reg, base_reg, tmp_reg);
  4047     if (!disp_is_simm16) {
  4048       move(tmp_reg, disp);
  4049       daddu(tmp_reg, base_reg, tmp_reg);
  4051     store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
  4052   } else {
  4053     if (!disp_is_simm16) {
  4054       tmp_reg = T9;
  4055       assert_different_registers(tmp_reg, base_reg);
  4056       move(tmp_reg, disp);
  4057       daddu(tmp_reg, base_reg, tmp_reg);
  4059     store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
  4063 void MacroAssembler::store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type) {
  4064   switch (type) {
  4065     case T_DOUBLE:
  4066       sdc1(src_reg, tmp_reg, disp);
  4067       break;
  4068     case T_FLOAT:
  4069       swc1(src_reg, tmp_reg, disp);
  4070       break;
  4071     default:
  4072       ShouldNotReachHere();
  4076 void MacroAssembler::store_for_type(FloatRegister src_reg, Address addr, BasicType type) {
  4077   Register tmp_reg = T9;
  4078   Register index_reg = addr.index();
  4079   if (index_reg == NOREG) {
  4080     tmp_reg = NOREG;
  4083   int scale = addr.scale();
  4084   if (tmp_reg != NOREG && scale >= 0) {
  4085     dsll(tmp_reg, index_reg, scale);
  4088   int disp = addr.disp();
  4089   bool disp_is_simm16 = true;
  4090   if (!Assembler::is_simm16(disp)) {
  4091     disp_is_simm16 = false;
  4094   Register base_reg = addr.base();
  4095   if (tmp_reg != NOREG) {
  4096     assert_different_registers(tmp_reg, base_reg, index_reg);
  4099   if (tmp_reg != NOREG) {
  4100     daddu(tmp_reg, base_reg, tmp_reg);
  4101     if (!disp_is_simm16) {
  4102       move(tmp_reg, disp);
  4103       daddu(tmp_reg, base_reg, tmp_reg);
  4105     store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type);
  4106   } else {
  4107     if (!disp_is_simm16) {
  4108       tmp_reg = T9;
  4109       assert_different_registers(tmp_reg, base_reg);
  4110       move(tmp_reg, disp);
  4111       daddu(tmp_reg, base_reg, tmp_reg);
  4113     store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type);
  4117 void MacroAssembler::load_for_type_by_register(Register dst_reg, Register tmp_reg, int disp, BasicType type, bool wide) {
  4118   switch (type) {
  4119     case T_LONG:
  4120       ld_ptr(dst_reg, tmp_reg, disp);
  4121       break;
  4122     case T_ARRAY:
  4123     case T_OBJECT:
  4124       if (UseCompressedOops && !wide) {
  4125         lwu(dst_reg, tmp_reg, disp);
  4126       } else {
  4127         ld_ptr(dst_reg, tmp_reg, disp);
  4129       break;
  4130     case T_ADDRESS:
  4131       if (UseCompressedClassPointers && disp == oopDesc::klass_offset_in_bytes()) {
  4132         lwu(dst_reg, tmp_reg, disp);
  4133       } else {
  4134         ld_ptr(dst_reg, tmp_reg, disp);
  4136       break;
  4137     case T_INT:
  4138       lw(dst_reg, tmp_reg, disp);
  4139       break;
  4140     case T_CHAR:
  4141       lhu(dst_reg, tmp_reg, disp);
  4142       break;
  4143     case T_SHORT:
  4144       lh(dst_reg, tmp_reg, disp);
  4145       break;
  4146     case T_BYTE:
  4147     case T_BOOLEAN:
  4148       lb(dst_reg, tmp_reg, disp);
  4149       break;
  4150     default:
  4151       ShouldNotReachHere();
  4155 int MacroAssembler::load_for_type(Register dst_reg, Address addr, BasicType type, bool wide) {
  4156   int code_offset = 0;
  4157   Register tmp_reg = T9;
  4158   Register index_reg = addr.index();
  4159   if (index_reg == NOREG) {
  4160     tmp_reg = NOREG;
  4163   int scale = addr.scale();
  4164   if (tmp_reg != NOREG && scale >= 0) {
  4165     dsll(tmp_reg, index_reg, scale);
  4168   int disp = addr.disp();
  4169   bool disp_is_simm16 = true;
  4170   if (!Assembler::is_simm16(disp)) {
  4171     disp_is_simm16 = false;
  4174   Register base_reg = addr.base();
  4175   if (tmp_reg != NOREG) {
  4176     assert_different_registers(tmp_reg, base_reg, index_reg);
  4179   if (tmp_reg != NOREG) {
  4180     daddu(tmp_reg, base_reg, tmp_reg);
  4181     if (!disp_is_simm16) {
  4182       move(tmp_reg, disp);
  4183       daddu(tmp_reg, base_reg, tmp_reg);
  4185     code_offset = offset();
  4186     load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
  4187   } else {
  4188     if (!disp_is_simm16) {
  4189       tmp_reg = T9;
  4190       assert_different_registers(tmp_reg, base_reg);
  4191       move(tmp_reg, disp);
  4192       daddu(tmp_reg, base_reg, tmp_reg);
  4194     code_offset = offset();
  4195     load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
  4198   return code_offset;
  4201 void MacroAssembler::load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type) {
  4202   switch (type) {
  4203     case T_DOUBLE:
  4204       ldc1(dst_reg, tmp_reg, disp);
  4205       break;
  4206     case T_FLOAT:
  4207       lwc1(dst_reg, tmp_reg, disp);
  4208       break;
  4209     default:
  4210       ShouldNotReachHere();
  4214 int MacroAssembler::load_for_type(FloatRegister dst_reg, Address addr, BasicType type) {
  4215   int code_offset = 0;
  4216   Register tmp_reg = T9;
  4217   Register index_reg = addr.index();
  4218   if (index_reg == NOREG) {
  4219     tmp_reg = NOREG;
  4222   int scale = addr.scale();
  4223   if (tmp_reg != NOREG && scale >= 0) {
  4224     dsll(tmp_reg, index_reg, scale);
  4227   int disp = addr.disp();
  4228   bool disp_is_simm16 = true;
  4229   if (!Assembler::is_simm16(disp)) {
  4230     disp_is_simm16 = false;
  4233   Register base_reg = addr.base();
  4234   if (tmp_reg != NOREG) {
  4235     assert_different_registers(tmp_reg, base_reg, index_reg);
  4238   if (tmp_reg != NOREG) {
  4239     daddu(tmp_reg, base_reg, tmp_reg);
  4240     if (!disp_is_simm16) {
  4241       move(tmp_reg, disp);
  4242       daddu(tmp_reg, base_reg, tmp_reg);
  4244     code_offset = offset();
  4245     load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type);
  4246   } else {
  4247     if (!disp_is_simm16) {
  4248       tmp_reg = T9;
  4249       assert_different_registers(tmp_reg, base_reg);
  4250       move(tmp_reg, disp);
  4251       daddu(tmp_reg, base_reg, tmp_reg);
  4253     code_offset = offset();
  4254     load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type);
  4257   return code_offset;
  4260 void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
  4261   const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
  4262   STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code
  4263   // The inverted mask is sign-extended
  4264   move(AT, inverted_jweak_mask);
  4265   andr(possibly_jweak, AT, possibly_jweak);
  4268 void MacroAssembler::resolve_jobject(Register value,
  4269                                      Register thread,
  4270                                      Register tmp) {
  4271   assert_different_registers(value, thread, tmp);
  4272   Label done, not_weak;
  4273   beq(value, R0, done);                // Use NULL as-is.
  4274   delayed()->nop();
  4275   move(AT, JNIHandles::weak_tag_mask); // Test for jweak tag.
  4276   andr(AT, value, AT);
  4277   beq(AT, R0, not_weak);
  4278   delayed()->nop();
  4279   // Resolve jweak.
  4280   ld(value, value, -JNIHandles::weak_tag_value);
  4281   verify_oop(value);
  4282   #if INCLUDE_ALL_GCS
  4283     if (UseG1GC) {
  4284       g1_write_barrier_pre(noreg /* obj */,
  4285                            value /* pre_val */,
  4286                            thread /* thread */,
  4287                            tmp /* tmp */,
  4288                            true /* tosca_live */,
  4289                            true /* expand_call */);
  4291   #endif // INCLUDE_ALL_GCS
  4292   b(done);
  4293   delayed()->nop();
  4294   bind(not_weak);
  4295   // Resolve (untagged) jobject.
  4296   ld(value, value, 0);
  4297   verify_oop(value);
  4298   bind(done);

mercurial