src/cpu/mips/vm/macroAssembler_mips.cpp

Tue, 12 Dec 2017 10:30:27 +0800

author
aoqi
date
Tue, 12 Dec 2017 10:30:27 +0800
changeset 8019
3fb3ceb7398f
parent 8009
0477693968a6
child 8862
fd13a567f179
permissions
-rw-r--r--

#6345 sync is controled by UseSyncLevel instead of Use3A2000
Reviewed-by: fujie

     1 /*
     2  * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
     3  * Copyright (c) 2017, Loongson Technology. All rights reserved.
     4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     5  *
     6  * This code is free software; you can redistribute it and/or modify it
     7  * under the terms of the GNU General Public License version 2 only, as
     8  * published by the Free Software Foundation.
     9  *
    10  * This code is distributed in the hope that it will be useful, but WITHOUT
    11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    13  * version 2 for more details (a copy is included in the LICENSE file that
    14  * accompanied this code).
    15  *
    16  * You should have received a copy of the GNU General Public License version
    17  * 2 along with this work; if not, write to the Free Software Foundation,
    18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    19  *
    20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    21  * or visit www.oracle.com if you need additional information or have any
    22  * questions.
    23  *
    24  */
    26 #include "precompiled.hpp"
    27 #include "asm/assembler.hpp"
    28 #include "asm/assembler.inline.hpp"
    29 #include "asm/macroAssembler.inline.hpp"
    30 #include "compiler/disassembler.hpp"
    31 #include "gc_interface/collectedHeap.inline.hpp"
    32 #include "interpreter/interpreter.hpp"
    33 #include "memory/cardTableModRefBS.hpp"
    34 #include "memory/resourceArea.hpp"
    35 #include "memory/universe.hpp"
    36 #include "prims/methodHandles.hpp"
    37 #include "runtime/biasedLocking.hpp"
    38 #include "runtime/interfaceSupport.hpp"
    39 #include "runtime/objectMonitor.hpp"
    40 #include "runtime/os.hpp"
    41 #include "runtime/sharedRuntime.hpp"
    42 #include "runtime/stubRoutines.hpp"
    43 #include "utilities/macros.hpp"
    44 #if INCLUDE_ALL_GCS
    45 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
    46 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
    47 #include "gc_implementation/g1/heapRegion.hpp"
    48 #endif // INCLUDE_ALL_GCS
    50 // Implementation of MacroAssembler
    52 intptr_t MacroAssembler::i[32] = {0};
    53 float MacroAssembler::f[32] = {0.0};
    55 void MacroAssembler::print(outputStream *s) {
    56   unsigned int k;
    57   for(k=0; k<sizeof(i)/sizeof(i[0]); k++) {
    58     s->print_cr("i%d = 0x%.16lx", k, i[k]);
    59   }
    60   s->cr();
    62   for(k=0; k<sizeof(f)/sizeof(f[0]); k++) {
    63     s->print_cr("f%d = %f", k, f[k]);
    64   }
    65   s->cr();
    66 }
    68 int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; }
    69 int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; }
    71 void MacroAssembler::save_registers(MacroAssembler *masm) {
    72 #define __ masm->
    73   for(int k=0; k<32; k++) {
    74     __ sw (as_Register(k), A0, i_offset(k));
    75   }
    77   for(int k=0; k<32; k++) {
    78     __ swc1 (as_FloatRegister(k), A0, f_offset(k));
    79   }
    80 #undef __
    81 }
    83 void MacroAssembler::restore_registers(MacroAssembler *masm) {
    84 #define __ masm->
    85   for(int k=0; k<32; k++) {
    86     __ lw (as_Register(k), A0, i_offset(k));
    87   }
    89   for(int k=0; k<32; k++) {
    90     __ lwc1 (as_FloatRegister(k), A0, f_offset(k));
    91   }
    92 #undef __
    93 }
    96 void MacroAssembler::pd_patch_instruction(address branch, address target) {
    97   jint& stub_inst = *(jint*) branch;
    99 /* *
   100   move(AT, RA); // dadd
   101   emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
   102   nop();
   103         lui(T9, 0); // to be patched
   104         ori(T9, 0);
   105   daddu(T9, T9, RA);
   106   move(RA, AT);
   107   jr(T9);
   108  */
   109   if(special(stub_inst) == dadd_op) {
   110     jint *pc = (jint *)branch;
   112     assert(opcode(pc[3]) == lui_op
   113           && opcode(pc[4]) == ori_op
   114           && special(pc[5]) == daddu_op, "Not a branch label patch");
   115     if(!(opcode(pc[3]) == lui_op
   116           && opcode(pc[4]) == ori_op
   117           && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); }
   119     int offset = target - branch;
   120     if (!is_simm16(offset)) {
   121       pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12);
   122       pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12);
   123     } else {
   124       /* revert to "beq + nop" */
   125       CodeBuffer cb(branch, 4 * 10);
   126       MacroAssembler masm(&cb);
   127 #define __ masm.
   128       __ b(target);
   129       __ nop();
   130       __ nop();
   131       __ nop();
   132       __ nop();
   133       __ nop();
   134       __ nop();
   135       __ nop();
   136     }
   137     return;
   138   }
   140 #ifndef PRODUCT
   141   if (!is_simm16((target - branch - 4) >> 2)) {
   142     tty->print_cr("Illegal patching: target=0x%lx", target);
   143     int *p = (int *)branch;
   144     for (int i = -10; i < 10; i++) {
   145        tty->print("0x%lx, ", p[i]);
   146     }
   147     tty->print_cr("");
   148   }
   149 #endif
   151   stub_inst = patched_branch(target - branch, stub_inst, 0);
   152 }
   154 static inline address first_cache_address() {
   155   return CodeCache::low_bound() + sizeof(HeapBlock::Header);
   156 }
   158 static inline address last_cache_address() {
   159   return CodeCache::high_bound() - Assembler::InstructionSize;
   160 }
   162 int MacroAssembler::call_size(address target, bool far, bool patchable) {
   163   if (patchable) return 6 << Assembler::LogInstructionSize;
   164   if (!far) return 2 << Assembler::LogInstructionSize; // jal + nop
   165   return (insts_for_set64((jlong)target) + 2) << Assembler::LogInstructionSize;
   166 }
   168 // Can we reach target using jal/j from anywhere
   169 // in the code cache (because code can be relocated)?
   170 bool MacroAssembler::reachable_from_cache(address target) {
   171   address cl = first_cache_address();
   172   address ch = last_cache_address();
   174   return fit_in_jal(target, cl) && fit_in_jal(target, ch);
   175 }
   177 void MacroAssembler::general_jump(address target) {
   178   if (reachable_from_cache(target)) {
   179     j(target);
   180     nop();
   181   } else {
   182     set64(T9, (long)target);
   183     jr(T9);
   184     nop();
   185   }
   186 }
   188 int MacroAssembler::insts_for_general_jump(address target) {
   189   if (reachable_from_cache(target)) {
   190     //j(target);
   191     //nop();
   192     return 2;
   193   } else {
   194     //set64(T9, (long)target);
   195     //jr(T9);
   196     //nop();
   197     return insts_for_set64((jlong)target) + 2;
   198   }
   199 }
   201 void MacroAssembler::patchable_jump(address target) {
   202   if (reachable_from_cache(target)) {
   203     nop();
   204     nop();
   205     nop();
   206     nop();
   207     j(target);
   208     nop();
   209   } else {
   210     patchable_set48(T9, (long)target);
   211     jr(T9);
   212     nop();
   213   }
   214 }
   216 int MacroAssembler::insts_for_patchable_jump(address target) {
   217   return 6;
   218 }
   220 void MacroAssembler::general_call(address target) {
   221   if (reachable_from_cache(target)) {
   222     jal(target);
   223     nop();
   224   } else {
   225     set64(T9, (long)target);
   226     jalr(T9);
   227     nop();
   228   }
   229 }
   231 int MacroAssembler::insts_for_general_call(address target) {
   232   if (reachable_from_cache(target)) {
   233     //jal(target);
   234     //nop();
   235     return 2;
   236   } else {
   237     //set64(T9, (long)target);
   238     //jalr(T9);
   239     //nop();
   240     return insts_for_set64((jlong)target) + 2;
   241   }
   242 }
   244 void MacroAssembler::patchable_call(address target) {
   245   if (reachable_from_cache(target)) {
   246     nop();
   247     nop();
   248     nop();
   249     nop();
   250     jal(target);
   251     nop();
   252   } else {
   253     patchable_set48(T9, (long)target);
   254     jalr(T9);
   255     nop();
   256   }
   257 }
   259 int MacroAssembler::insts_for_patchable_call(address target) {
   260   return 6;
   261 }
   263 void MacroAssembler::beq_far(Register rs, Register rt, address entry) {
   264   u_char * cur_pc = pc();
   266   /* Jin: Near/Far jump */
   267   if(is_simm16((entry - pc() - 4) / 4)) {
   268     Assembler::beq(rs, rt, offset(entry));
   269   } else {
   270     Label not_jump;
   271     bne(rs, rt, not_jump);
   272     delayed()->nop();
   274     b_far(entry);
   275     delayed()->nop();
   277     bind(not_jump);
   278     has_delay_slot();
   279   }
   280 }
   282 void MacroAssembler::beq_far(Register rs, Register rt, Label& L) {
   283   if (L.is_bound()) {
   284     beq_far(rs, rt, target(L));
   285   } else {
   286     u_char * cur_pc = pc();
   287     Label not_jump;
   288     bne(rs, rt, not_jump);
   289     delayed()->nop();
   291     b_far(L);
   292     delayed()->nop();
   294     bind(not_jump);
   295     has_delay_slot();
   296   }
   297 }
   299 void MacroAssembler::bne_far(Register rs, Register rt, address entry) {
   300   u_char * cur_pc = pc();
   302   /* Jin: Near/Far jump */
   303   if(is_simm16((entry - pc() - 4) / 4)) {
   304     Assembler::bne(rs, rt, offset(entry));
   305   } else {
   306     Label not_jump;
   307     beq(rs, rt, not_jump);
   308     delayed()->nop();
   310     b_far(entry);
   311     delayed()->nop();
   313     bind(not_jump);
   314     has_delay_slot();
   315   }
   316 }
   318 void MacroAssembler::bne_far(Register rs, Register rt, Label& L) {
   319   if (L.is_bound()) {
   320     bne_far(rs, rt, target(L));
   321   } else {
   322     u_char * cur_pc = pc();
   323     Label not_jump;
   324     beq(rs, rt, not_jump);
   325     delayed()->nop();
   327     b_far(L);
   328     delayed()->nop();
   330     bind(not_jump);
   331     has_delay_slot();
   332   }
   333 }
   335 void MacroAssembler::b_far(Label& L) {
   336   if (L.is_bound()) {
   337     b_far(target(L));
   338   } else {
   339     volatile address dest = target(L);
   340 /*
   341 MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8
   342    0x00000055651ed514: dadd at, ra, zero
   343    0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520
   345    0x00000055651ed51c: sll zero, zero, 0
   346    0x00000055651ed520: lui t9, 0x0
   347    0x00000055651ed524: ori t9, t9, 0x21b8
   348    0x00000055651ed528: daddu t9, t9, ra
   349    0x00000055651ed52c: dadd ra, at, zero
   350    0x00000055651ed530: jr t9
   351    0x00000055651ed534: sll zero, zero, 0
   352 */
   353     move(AT, RA);
   354     emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
   355     nop();
   356     lui(T9, 0); // to be patched
   357     ori(T9, T9, 0);
   358     daddu(T9, T9, RA);
   359     move(RA, AT);
   360     jr(T9);
   361   }
   362 }
   364 void MacroAssembler::b_far(address entry) {
   365   u_char * cur_pc = pc();
   367   /* Jin: Near/Far jump */
   368   if(is_simm16((entry - pc() - 4) / 4)) {
   369     b(offset(entry));
   370   } else {
   371     /* address must be bounded */
   372     move(AT, RA);
   373     emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
   374     nop();
   375     li32(T9, entry - pc());
   376     daddu(T9, T9, RA);
   377     move(RA, AT);
   378     jr(T9);
   379   }
   380 }
   382 void MacroAssembler::ld_ptr(Register rt, Register offset, Register base) {
   383   addu_long(AT, base, offset);
   384   ld_ptr(rt, 0, AT);
   385 }
   387 void MacroAssembler::st_ptr(Register rt, Register offset, Register base) {
   388   addu_long(AT, base, offset);
   389   st_ptr(rt, 0, AT);
   390 }
   392 void MacroAssembler::ld_long(Register rt, Register offset, Register base) {
   393   addu_long(AT, base, offset);
   394   ld_long(rt, 0, AT);
   395 }
   397 void MacroAssembler::st_long(Register rt, Register offset, Register base) {
   398   addu_long(AT, base, offset);
   399   st_long(rt, 0, AT);
   400 }
   402 Address MacroAssembler::as_Address(AddressLiteral adr) {
   403   return Address(adr.target(), adr.rspec());
   404 }
   406 Address MacroAssembler::as_Address(ArrayAddress adr) {
   407   return Address::make_array(adr);
   408 }
   410 // tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved).
   411 void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) {
   412   Label again;
   414   li(tmp_reg1, counter_addr);
   415   bind(again);
   416   if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
   417   ll(tmp_reg2, tmp_reg1, 0);
   418   addi(tmp_reg2, tmp_reg2, inc);
   419   sc(tmp_reg2, tmp_reg1, 0);
   420   beq(tmp_reg2, R0, again);
   421   delayed()->nop();
   422 }
   424 int MacroAssembler::biased_locking_enter(Register lock_reg,
   425                                          Register obj_reg,
   426                                          Register swap_reg,
   427                                          Register tmp_reg,
   428                                          bool swap_reg_contains_mark,
   429                                          Label& done,
   430                                          Label* slow_case,
   431                                          BiasedLockingCounters* counters) {
   432   assert(UseBiasedLocking, "why call this otherwise?");
   433   bool need_tmp_reg = false;
   434   if (tmp_reg == noreg) {
   435     need_tmp_reg = true;
   436     tmp_reg = T9;
   437   }
   438   assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT);
   439   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
   440   Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
   441   Address saved_mark_addr(lock_reg, 0);
   443   // Biased locking
   444   // See whether the lock is currently biased toward our thread and
   445   // whether the epoch is still valid
   446   // Note that the runtime guarantees sufficient alignment of JavaThread
   447   // pointers to allow age to be placed into low bits
   448   // First check to see whether biasing is even enabled for this object
   449   Label cas_label;
   450   int null_check_offset = -1;
   451   if (!swap_reg_contains_mark) {
   452     null_check_offset = offset();
   453     ld_ptr(swap_reg, mark_addr);
   454   }
   456   if (need_tmp_reg) {
   457     push(tmp_reg);
   458   }
   459   move(tmp_reg, swap_reg);
   460   andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
   461 #ifdef _LP64
   462   daddi(AT, R0, markOopDesc::biased_lock_pattern);
   463   dsub(AT, AT, tmp_reg);
   464 #else
   465   addi(AT, R0, markOopDesc::biased_lock_pattern);
   466   sub(AT, AT, tmp_reg);
   467 #endif
   468   if (need_tmp_reg) {
   469     pop(tmp_reg);
   470   }
   472   bne(AT, R0, cas_label);
   473   delayed()->nop();
   476   // The bias pattern is present in the object's header. Need to check
   477   // whether the bias owner and the epoch are both still current.
   478   // Note that because there is no current thread register on MIPS we
   479   // need to store off the mark word we read out of the object to
   480   // avoid reloading it and needing to recheck invariants below. This
   481   // store is unfortunate but it makes the overall code shorter and
   482   // simpler.
   483   st_ptr(swap_reg, saved_mark_addr);
   484   if (need_tmp_reg) {
   485     push(tmp_reg);
   486   }
   487   if (swap_reg_contains_mark) {
   488     null_check_offset = offset();
   489   }
   490   load_prototype_header(tmp_reg, obj_reg);
   491   xorr(tmp_reg, tmp_reg, swap_reg);
   492   get_thread(swap_reg);
   493   xorr(swap_reg, swap_reg, tmp_reg);
   495   move(AT, ~((int) markOopDesc::age_mask_in_place));
   496   andr(swap_reg, swap_reg, AT);
   498   if (PrintBiasedLockingStatistics) {
   499     Label L;
   500     bne(swap_reg, R0, L);
   501     delayed()->nop();
   502     push(tmp_reg);
   503     push(A0);
   504     atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg);
   505     pop(A0);
   506     pop(tmp_reg);
   507     bind(L);
   508   }
   509   if (need_tmp_reg) {
   510     pop(tmp_reg);
   511   }
   512   beq(swap_reg, R0, done);
   513   delayed()->nop();
   514   Label try_revoke_bias;
   515   Label try_rebias;
   517   // At this point we know that the header has the bias pattern and
   518   // that we are not the bias owner in the current epoch. We need to
   519   // figure out more details about the state of the header in order to
   520   // know what operations can be legally performed on the object's
   521   // header.
   523   // If the low three bits in the xor result aren't clear, that means
   524   // the prototype header is no longer biased and we have to revoke
   525   // the bias on this object.
   527   move(AT, markOopDesc::biased_lock_mask_in_place);
   528   andr(AT, swap_reg, AT);
   529   bne(AT, R0, try_revoke_bias);
   530   delayed()->nop();
   531   // Biasing is still enabled for this data type. See whether the
   532   // epoch of the current bias is still valid, meaning that the epoch
   533   // bits of the mark word are equal to the epoch bits of the
   534   // prototype header. (Note that the prototype header's epoch bits
   535   // only change at a safepoint.) If not, attempt to rebias the object
   536   // toward the current thread. Note that we must be absolutely sure
   537   // that the current epoch is invalid in order to do this because
   538   // otherwise the manipulations it performs on the mark word are
   539   // illegal.
   541   move(AT, markOopDesc::epoch_mask_in_place);
   542   andr(AT,swap_reg, AT);
   543   bne(AT, R0, try_rebias);
   544   delayed()->nop();
   545   // The epoch of the current bias is still valid but we know nothing
   546   // about the owner; it might be set or it might be clear. Try to
   547   // acquire the bias of the object using an atomic operation. If this
   548   // fails we will go in to the runtime to revoke the object's bias.
   549   // Note that we first construct the presumed unbiased header so we
   550   // don't accidentally blow away another thread's valid bias.
   552   ld_ptr(swap_reg, saved_mark_addr);
   554   move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
   555   andr(swap_reg, swap_reg, AT);
   557   if (need_tmp_reg) {
   558     push(tmp_reg);
   559   }
   560   get_thread(tmp_reg);
   561   orr(tmp_reg, tmp_reg, swap_reg);
   562   //if (os::is_MP()) {
   563   //  sync();
   564   //}
   565   cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
   566   if (need_tmp_reg) {
   567     pop(tmp_reg);
   568   }
   569   // If the biasing toward our thread failed, this means that
   570   // another thread succeeded in biasing it toward itself and we
   571   // need to revoke that bias. The revocation will occur in the
   572   // interpreter runtime in the slow case.
   573   if (PrintBiasedLockingStatistics) {
   574     Label L;
   575     bne(AT, R0, L);
   576     delayed()->nop();
   577     push(tmp_reg);
   578     push(A0);
   579     atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg);
   580     pop(A0);
   581     pop(tmp_reg);
   582     bind(L);
   583   }
   584   if (slow_case != NULL) {
   585     beq_far(AT, R0, *slow_case);
   586     delayed()->nop();
   587   }
   588   b(done);
   589   delayed()->nop();
   591   bind(try_rebias);
   592   // At this point we know the epoch has expired, meaning that the
   593   // current "bias owner", if any, is actually invalid. Under these
   594   // circumstances _only_, we are allowed to use the current header's
   595   // value as the comparison value when doing the cas to acquire the
   596   // bias in the current epoch. In other words, we allow transfer of
   597   // the bias from one thread to another directly in this situation.
   598   //
   599   // FIXME: due to a lack of registers we currently blow away the age
   600   // bits in this situation. Should attempt to preserve them.
   601   if (need_tmp_reg) {
   602     push(tmp_reg);
   603   }
   604   load_prototype_header(tmp_reg, obj_reg);
   605   get_thread(swap_reg);
   606   orr(tmp_reg, tmp_reg, swap_reg);
   607   ld_ptr(swap_reg, saved_mark_addr);
   609   //if (os::is_MP()) {
   610   //  sync();
   611   //}
   612   cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
   613   if (need_tmp_reg) {
   614     pop(tmp_reg);
   615   }
   616   // If the biasing toward our thread failed, then another thread
   617   // succeeded in biasing it toward itself and we need to revoke that
   618   // bias. The revocation will occur in the runtime in the slow case.
   619   if (PrintBiasedLockingStatistics) {
   620     Label L;
   621     bne(AT, R0, L);
   622     delayed()->nop();
   623     push(AT);
   624     push(tmp_reg);
   625     atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg);
   626     pop(tmp_reg);
   627     pop(AT);
   628     bind(L);
   629   }
   630   if (slow_case != NULL) {
   631     beq_far(AT, R0, *slow_case);
   632     delayed()->nop();
   633   }
   635   b(done);
   636   delayed()->nop();
   637   bind(try_revoke_bias);
   638   // The prototype mark in the klass doesn't have the bias bit set any
   639   // more, indicating that objects of this data type are not supposed
   640   // to be biased any more. We are going to try to reset the mark of
   641   // this object to the prototype value and fall through to the
   642   // CAS-based locking scheme. Note that if our CAS fails, it means
   643   // that another thread raced us for the privilege of revoking the
   644   // bias of this particular object, so it's okay to continue in the
   645   // normal locking code.
   646   //
   647   // FIXME: due to a lack of registers we currently blow away the age
   648   // bits in this situation. Should attempt to preserve them.
   649   ld_ptr(swap_reg, saved_mark_addr);
   651   if (need_tmp_reg) {
   652     push(tmp_reg);
   653   }
   654   load_prototype_header(tmp_reg, obj_reg);
   655   //if (os::is_MP()) {
   656   // lock();
   657   //}
   658   cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
   659   if (need_tmp_reg) {
   660     pop(tmp_reg);
   661   }
   662   // Fall through to the normal CAS-based lock, because no matter what
   663   // the result of the above CAS, some thread must have succeeded in
   664   // removing the bias bit from the object's header.
   665   if (PrintBiasedLockingStatistics) {
   666     Label L;
   667     bne(AT, R0, L);
   668     delayed()->nop();
   669     push(AT);
   670     push(tmp_reg);
   671     atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg);
   672     pop(tmp_reg);
   673     pop(AT);
   674     bind(L);
   675   }
   677   bind(cas_label);
   678   return null_check_offset;
   679 }
   681 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
   682   assert(UseBiasedLocking, "why call this otherwise?");
   684   // Check for biased locking unlock case, which is a no-op
   685   // Note: we do not have to check the thread ID for two reasons.
   686   // First, the interpreter checks for IllegalMonitorStateException at
   687   // a higher level. Second, if the bias was revoked while we held the
   688   // lock, the object could not be rebiased toward another thread, so
   689   // the bias bit would be clear.
   690 #ifdef _LP64
   691   ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
   692   andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
   693   daddi(AT, R0, markOopDesc::biased_lock_pattern);
   694 #else
   695   lw(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
   696   andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
   697   addi(AT, R0, markOopDesc::biased_lock_pattern);
   698 #endif
   700   beq(AT, temp_reg, done);
   701   delayed()->nop();
   702 }
   704 // the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf
   705 // this method will handle the stack problem, you need not to preserve the stack space for the argument now
   706 void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) {
   707   Label L, E;
   709   assert(number_of_arguments <= 4, "just check");
   711   andi(AT, SP, 0xf);
   712   beq(AT, R0, L);
   713   delayed()->nop();
   714   daddi(SP, SP, -8);
   715   call(entry_point, relocInfo::runtime_call_type);
   716   delayed()->nop();
   717   daddi(SP, SP, 8);
   718   b(E);
   719   delayed()->nop();
   721   bind(L);
   722   call(entry_point, relocInfo::runtime_call_type);
   723   delayed()->nop();
   724   bind(E);
   725 }
   728 void MacroAssembler::jmp(address entry) {
   729   patchable_set48(T9, (long)entry);
   730   jr(T9);
   731 }
   733 void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) {
   734   switch (rtype) {
   735     case relocInfo::runtime_call_type:
   736     case relocInfo::none:
   737       jmp(entry);
   738       break;
   739     default:
   740       {
   741       InstructionMark im(this);
   742       relocate(rtype);
   743       patchable_set48(T9, (long)entry);
   744       jr(T9);
   745       }
   746       break;
   747   }
   748 }
   750 void MacroAssembler::call(address entry) {
   751 // c/c++ code assume T9 is entry point, so we just always move entry to t9
   752 // maybe there is some more graceful method to handle this. FIXME
   753 // For more info, see class NativeCall.
   754 #ifndef _LP64
   755   move(T9, (int)entry);
   756 #else
   757   patchable_set48(T9, (long)entry);
   758 #endif
   759   jalr(T9);
   760 }
   762 void MacroAssembler::call(address entry, relocInfo::relocType rtype) {
   763   switch (rtype) {
   764     case relocInfo::runtime_call_type:
   765     case relocInfo::none:
   766       call(entry);
   767       break;
   768     default:
   769       {
   770   InstructionMark im(this);
   771   relocate(rtype);
   772   call(entry);
   773       }
   774       break;
   775   }
   776 }
   778 void MacroAssembler::call(address entry, RelocationHolder& rh)
   779 {
   780   switch (rh.type()) {
   781     case relocInfo::runtime_call_type:
   782     case relocInfo::none:
   783       call(entry);
   784       break;
   785     default:
   786       {
   787   InstructionMark im(this);
   788   relocate(rh);
   789   call(entry);
   790       }
   791       break;
   792   }
   793 }
   795 void MacroAssembler::ic_call(address entry) {
   796   RelocationHolder rh = virtual_call_Relocation::spec(pc());
   797   patchable_set48(IC_Klass, (long)Universe::non_oop_word());
   798   assert(entry != NULL, "call most probably wrong");
   799   InstructionMark im(this);
   800   relocate(rh);
   801         patchable_call(entry);
   802 }
   804 void MacroAssembler::c2bool(Register r) {
   805   Label L;
   806   Assembler::beq(r, R0, L);
   807   delayed()->nop();
   808   move(r, 1);
   809   bind(L);
   810 }
   812 #ifndef PRODUCT
   813 extern "C" void findpc(intptr_t x);
   814 #endif
   816 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
   817   // In order to get locks to work, we need to fake a in_VM state
   818   JavaThread* thread = JavaThread::current();
   819   JavaThreadState saved_state = thread->thread_state();
   820   thread->set_thread_state(_thread_in_vm);
   821   if (ShowMessageBoxOnError) {
   822     JavaThread* thread = JavaThread::current();
   823     JavaThreadState saved_state = thread->thread_state();
   824     thread->set_thread_state(_thread_in_vm);
   825     if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
   826       ttyLocker ttyl;
   827       BytecodeCounter::print();
   828     }
   829     // To see where a verify_oop failed, get $ebx+40/X for this frame.
   830     // This is the value of eip which points to where verify_oop will return.
   831     if (os::message_box(msg, "Execution stopped, print registers?")) {
   832       ttyLocker ttyl;
   833       tty->print_cr("eip = 0x%08x", eip);
   834 #ifndef PRODUCT
   835       tty->cr();
   836       findpc(eip);
   837       tty->cr();
   838 #endif
   839       tty->print_cr("rax, = 0x%08x", rax);
   840       tty->print_cr("rbx, = 0x%08x", rbx);
   841       tty->print_cr("rcx = 0x%08x", rcx);
   842       tty->print_cr("rdx = 0x%08x", rdx);
   843       tty->print_cr("rdi = 0x%08x", rdi);
   844       tty->print_cr("rsi = 0x%08x", rsi);
   845       tty->print_cr("rbp, = 0x%08x", rbp);
   846       tty->print_cr("rsp = 0x%08x", rsp);
   847       BREAKPOINT;
   848     }
   849   } else {
   850     ttyLocker ttyl;
   851     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
   852     assert(false, "DEBUG MESSAGE");
   853   }
   854   ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
   855 }
   857 void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) {
   858   if ( ShowMessageBoxOnError ) {
   859     JavaThreadState saved_state = JavaThread::current()->thread_state();
   860     JavaThread::current()->set_thread_state(_thread_in_vm);
   861     {
   862       // In order to get locks work, we need to fake a in_VM state
   863       ttyLocker ttyl;
   864       ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
   865       if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
   866   BytecodeCounter::print();
   867       }
   869       //      if (os::message_box(msg, "Execution stopped, print registers?"))
   870       //        regs->print(::tty);
   871     }
   872     ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
   873   }
   874   else
   875     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
   876 }
   879 void MacroAssembler::stop(const char* msg) {
   880   li(A0, (long)msg);
   881 #ifndef _LP64
   882   //reserver space for argument. added by yjl 7/10/2005
   883   addiu(SP, SP, - 1 * wordSize);
   884 #endif
   885   call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
   886   delayed()->nop();
   887 #ifndef _LP64
   888   //restore space for argument
   889   addiu(SP, SP, 1 * wordSize);
   890 #endif
   891   brk(17);
   892 }
   894 void MacroAssembler::warn(const char* msg) {
   895 #ifdef _LP64
   896   pushad();
   897   li(A0, (long)msg);
   898   push(S2);
   899   move(AT, -(StackAlignmentInBytes));
   900   move(S2, SP);     // use S2 as a sender SP holder
   901   andr(SP, SP, AT); // align stack as required by ABI
   902   call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
   903   delayed()->nop();
   904   move(SP, S2);     // use S2 as a sender SP holder
   905   pop(S2);
   906   popad();
   907 #else
   908   pushad();
   909   addi(SP, SP, -4);
   910   sw(A0, SP, -1 * wordSize);
   911   li(A0, (long)msg);
   912   addi(SP, SP, -1 * wordSize);
   913   call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
   914   delayed()->nop();
   915   addi(SP, SP, 1 * wordSize);
   916   lw(A0, SP, -1 * wordSize);
   917   addi(SP, SP, 4);
   918   popad();
   919 #endif
   920 }
   922 void MacroAssembler::print_reg(Register reg) {
   923 /*
   924 char *s = getenv("PRINT_REG");
   925 if (s == NULL)
   926   return;
   927 if (strcmp(s, "1") != 0)
   928   return;
   929 */
   930   void * cur_pc = pc();
   931   pushad();
   932   NOT_LP64(push(FP);)
   934   li(A0, (long)reg->name());
   935   if (reg == SP)
   936     addiu(A1, SP, wordSize * 23); //23 registers saved in pushad()
   937   else if (reg == A0)
   938     ld(A1, SP, wordSize * 19); //A0 has been modified by li(A0, (long)reg->name()). Ugly Code!
   939   else
   940     move(A1, reg);
   941   li(A2, (long)cur_pc);
   942   push(S2);
   943   move(AT, -(StackAlignmentInBytes));
   944   move(S2, SP);     // use S2 as a sender SP holder
   945   andr(SP, SP, AT); // align stack as required by ABI
   946   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_reg_with_pc),relocInfo::runtime_call_type);
   947   delayed()->nop();
   948   move(SP, S2);     // use S2 as a sender SP holder
   949   pop(S2);
   950   NOT_LP64(pop(FP);)
   951   popad();
   953 /*
   954   pushad();
   955 #ifdef _LP64
   956   if (reg == SP)
   957     addiu(A0, SP, wordSize * 23); //23 registers saved in pushad()
   958   else
   959     move(A0, reg);
   960   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type);
   961   delayed()->nop();
   962 #else
   963   push(FP);
   964   move(A0, reg);
   965   dsrl32(A1, reg, 0);
   966   //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_int),relocInfo::runtime_call_type);
   967   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type);
   968   delayed()->nop();
   969   pop(FP);
   970 #endif
   971   popad();
   972   pushad();
   973   NOT_LP64(push(FP);)
   974   char b[50];
   975   sprintf((char *)b, " pc: %p\n",cur_pc);
   976   li(A0, (long)(char *)b);
   977   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
   978   delayed()->nop();
   979   NOT_LP64(pop(FP);)
   980   popad();
   981 */
   982 }
   984 void MacroAssembler::print_reg(FloatRegister reg) {
   985   void * cur_pc = pc();
   986   pushad();
   987   NOT_LP64(push(FP);)
   988   li(A0, (long)reg->name());
   989   push(S2);
   990   move(AT, -(StackAlignmentInBytes));
   991   move(S2, SP);     // use S2 as a sender SP holder
   992   andr(SP, SP, AT); // align stack as required by ABI
   993   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
   994   delayed()->nop();
   995   move(SP, S2);     // use S2 as a sender SP holder
   996   pop(S2);
   997   NOT_LP64(pop(FP);)
   998   popad();
  1000   pushad();
  1001   NOT_LP64(push(FP);)
  1002 #if 1
  1003   move(FP, SP);
  1004   move(AT, -(StackAlignmentInBytes));
  1005   andr(SP , SP , AT);
  1006   mov_d(F12, reg);
  1007   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_double),relocInfo::runtime_call_type);
  1008   delayed()->nop();
  1009   move(SP, FP);
  1010 #else
  1011   mov_s(F12, reg);
  1012   //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_float),relocInfo::runtime_call_type);
  1013   //delayed()->nop();
  1014 #endif
  1015   NOT_LP64(pop(FP);)
  1016   popad();
  1018 #if 0
  1019   pushad();
  1020   NOT_LP64(push(FP);)
  1021   char* b = new char[50];
  1022   sprintf(b, " pc: %p\n", cur_pc);
  1023   li(A0, (long)b);
  1024   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
  1025   delayed()->nop();
  1026   NOT_LP64(pop(FP);)
  1027   popad();
  1028 #endif
  1031 void MacroAssembler::increment(Register reg, int imm) {
  1032   if (!imm) return;
  1033   if (is_simm16(imm)) {
  1034 #ifdef _LP64
  1035     daddiu(reg, reg, imm);
  1036 #else
  1037     addiu(reg, reg, imm);
  1038 #endif
  1039   } else {
  1040     move(AT, imm);
  1041 #ifdef _LP64
  1042     daddu(reg, reg, AT);
  1043 #else
  1044     addu(reg, reg, AT);
  1045 #endif
  1049 void MacroAssembler::decrement(Register reg, int imm) {
  1050   increment(reg, -imm);
  1054 void MacroAssembler::call_VM(Register oop_result,
  1055                              address entry_point,
  1056                              bool check_exceptions) {
  1057   call_VM_helper(oop_result, entry_point, 0, check_exceptions);
  1060 void MacroAssembler::call_VM(Register oop_result,
  1061                              address entry_point,
  1062                              Register arg_1,
  1063                              bool check_exceptions) {
  1064   if (arg_1!=A1) move(A1, arg_1);
  1065   call_VM_helper(oop_result, entry_point, 1, check_exceptions);
  1068 void MacroAssembler::call_VM(Register oop_result,
  1069                              address entry_point,
  1070                              Register arg_1,
  1071                              Register arg_2,
  1072                              bool check_exceptions) {
  1073   if (arg_1!=A1) move(A1, arg_1);
  1074   if (arg_2!=A2) move(A2, arg_2);
  1075   assert(arg_2 != A1, "smashed argument");
  1076   call_VM_helper(oop_result, entry_point, 2, check_exceptions);
  1079 void MacroAssembler::call_VM(Register oop_result,
  1080                              address entry_point,
  1081                              Register arg_1,
  1082                              Register arg_2,
  1083                              Register arg_3,
  1084                              bool check_exceptions) {
  1085   if (arg_1!=A1) move(A1, arg_1);
  1086   if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
  1087   if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
  1088   call_VM_helper(oop_result, entry_point, 3, check_exceptions);
  1091 void MacroAssembler::call_VM(Register oop_result,
  1092                              Register last_java_sp,
  1093                              address entry_point,
  1094                              int number_of_arguments,
  1095                              bool check_exceptions) {
  1096   call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions);
  1099 void MacroAssembler::call_VM(Register oop_result,
  1100                              Register last_java_sp,
  1101                              address entry_point,
  1102                              Register arg_1,
  1103                              bool check_exceptions) {
  1104   if (arg_1 != A1) move(A1, arg_1);
  1105   call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
  1108 void MacroAssembler::call_VM(Register oop_result,
  1109                              Register last_java_sp,
  1110                              address entry_point,
  1111                              Register arg_1,
  1112                              Register arg_2,
  1113                              bool check_exceptions) {
  1114   if (arg_1 != A1) move(A1, arg_1);
  1115   if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
  1116   call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
  1119 void MacroAssembler::call_VM(Register oop_result,
  1120                              Register last_java_sp,
  1121                              address entry_point,
  1122                              Register arg_1,
  1123                              Register arg_2,
  1124                              Register arg_3,
  1125                              bool check_exceptions) {
  1126   if (arg_1 != A1) move(A1, arg_1);
  1127   if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
  1128   if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
  1129   call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
  1132 void MacroAssembler::call_VM_base(Register oop_result,
  1133                                   Register java_thread,
  1134                                   Register last_java_sp,
  1135                                   address  entry_point,
  1136                                   int      number_of_arguments,
  1137                                   bool     check_exceptions) {
  1139   address before_call_pc;
  1140   // determine java_thread register
  1141   if (!java_thread->is_valid()) {
  1142 #ifndef OPT_THREAD
  1143     java_thread = T2;
  1144     get_thread(java_thread);
  1145 #else
  1146     java_thread = TREG;
  1147 #endif
  1149   // determine last_java_sp register
  1150   if (!last_java_sp->is_valid()) {
  1151     last_java_sp = SP;
  1153   // debugging support
  1154   assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
  1155   assert(number_of_arguments <= 4   , "cannot have negative number of arguments");
  1156   assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
  1157   assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
  1159   assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save ebp");
  1161   // set last Java frame before call
  1162   before_call_pc = (address)pc();
  1163   set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc);
  1165   // do the call
  1166   move(A0, java_thread);
  1167   call(entry_point, relocInfo::runtime_call_type);
  1168   delayed()->nop();
  1169   //MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
  1171   // restore the thread (cannot use the pushed argument since arguments
  1172   // may be overwritten by C code generated by an optimizing compiler);
  1173   // however can use the register value directly if it is callee saved.
  1174 #ifndef OPT_THREAD
  1175   get_thread(java_thread);
  1176 #else
  1177 #ifdef ASSERT
  1179     Label L;
  1180     get_thread(AT);
  1181     beq(java_thread, AT, L);
  1182     delayed()->nop();
  1183     stop("MacroAssembler::call_VM_base: TREG not callee saved?");
  1184     bind(L);
  1186 #endif
  1187 #endif
  1189   // discard thread and arguments
  1190   ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
  1191   // reset last Java frame
  1192   reset_last_Java_frame(java_thread, false, true);
  1194   check_and_handle_popframe(java_thread);
  1195   check_and_handle_earlyret(java_thread);
  1196   if (check_exceptions) {
  1197     // check for pending exceptions (java_thread is set upon return)
  1198     Label L;
  1199 #ifdef _LP64
  1200     ld(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
  1201 #else
  1202     lw(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
  1203 #endif
  1204     beq(AT, R0, L);
  1205     delayed()->nop();
  1206     li(AT, before_call_pc);
  1207     push(AT);
  1208     jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
  1209     delayed()->nop();
  1210     bind(L);
  1213   // get oop result if there is one and reset the value in the thread
  1214   if (oop_result->is_valid()) {
  1215 #ifdef _LP64
  1216     ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
  1217     sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
  1218 #else
  1219     lw(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
  1220     sw(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
  1221 #endif
  1222     verify_oop(oop_result);
  1226 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
  1228   move(V0, SP);
  1229   //we also reserve space for java_thread here
  1230 #ifndef _LP64
  1231   daddi(SP, SP, (1 + number_of_arguments) * (- wordSize));
  1232 #endif
  1233   move(AT, -(StackAlignmentInBytes));
  1234   andr(SP, SP, AT);
  1235   call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions);
  1239 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
  1240   call_VM_leaf_base(entry_point, number_of_arguments);
  1243 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
  1244   if (arg_0 != A0) move(A0, arg_0);
  1245   call_VM_leaf(entry_point, 1);
  1248 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
  1249   if (arg_0 != A0) move(A0, arg_0);
  1250   if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
  1251   call_VM_leaf(entry_point, 2);
  1254 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
  1255   if (arg_0 != A0) move(A0, arg_0);
  1256   if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
  1257   if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument");
  1258   call_VM_leaf(entry_point, 3);
  1260 void MacroAssembler::super_call_VM_leaf(address entry_point) {
  1261   MacroAssembler::call_VM_leaf_base(entry_point, 0);
  1265 void MacroAssembler::super_call_VM_leaf(address entry_point,
  1266                                                    Register arg_1) {
  1267   if (arg_1 != A0) move(A0, arg_1);
  1268   MacroAssembler::call_VM_leaf_base(entry_point, 1);
  1272 void MacroAssembler::super_call_VM_leaf(address entry_point,
  1273                                                    Register arg_1,
  1274                                                    Register arg_2) {
  1275   if (arg_1 != A0) move(A0, arg_1);
  1276   if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
  1277   MacroAssembler::call_VM_leaf_base(entry_point, 2);
  1279 void MacroAssembler::super_call_VM_leaf(address entry_point,
  1280                                                    Register arg_1,
  1281                                                    Register arg_2,
  1282                                                    Register arg_3) {
  1283   if (arg_1 != A0) move(A0, arg_1);
  1284   if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
  1285   if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument");
  1286   MacroAssembler::call_VM_leaf_base(entry_point, 3);
  1289 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
  1292 void MacroAssembler::check_and_handle_popframe(Register java_thread) {
  1295 void MacroAssembler::null_check(Register reg, int offset) {
  1296   if (needs_explicit_null_check(offset)) {
  1297     // provoke OS NULL exception if reg = NULL by
  1298     // accessing M[reg] w/o changing any (non-CC) registers
  1299     // NOTE: cmpl is plenty here to provoke a segv
  1300     lw(AT, reg, 0);
  1301     // Note: should probably use testl(rax, Address(reg, 0));
  1302     //       may be shorter code (however, this version of
  1303     //       testl needs to be implemented first)
  1304   } else {
  1305     // nothing to do, (later) access of M[reg + offset]
  1306     // will provoke OS NULL exception if reg = NULL
  1310 void MacroAssembler::enter() {
  1311   push2(RA, FP);
  1312   move(FP, SP);
  1315 void MacroAssembler::leave() {
  1316 #ifndef _LP64
  1317   //move(SP, FP);
  1318   //pop2(FP, RA);
  1319   addi(SP, FP, 2 * wordSize);
  1320   lw(RA, SP, - 1 * wordSize);
  1321   lw(FP, SP, - 2 * wordSize);
  1322 #else
  1323   daddi(SP, FP, 2 * wordSize);
  1324   ld(RA, SP, - 1 * wordSize);
  1325   ld(FP, SP, - 2 * wordSize);
  1326 #endif
  1328 /*
  1329 void MacroAssembler::os_breakpoint() {
  1330   // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
  1331   // (e.g., MSVC can't call ps() otherwise)
  1332   call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
  1334 */
  1335 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) {
  1336   // determine java_thread register
  1337   if (!java_thread->is_valid()) {
  1338 #ifndef OPT_THREAD
  1339     java_thread = T1;
  1340     get_thread(java_thread);
  1341 #else
  1342     java_thread = TREG;
  1343 #endif
  1345   // we must set sp to zero to clear frame
  1346   st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
  1347   // must clear fp, so that compiled frames are not confused; it is possible
  1348   // that we need it only for debugging
  1349   if(clear_fp)
  1350     st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
  1352   if (clear_pc)
  1353     st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
  1356 void MacroAssembler::reset_last_Java_frame(bool clear_fp,
  1357                                            bool clear_pc) {
  1358   Register thread = TREG;
  1359 #ifndef OPT_THREAD
  1360   get_thread(thread);
  1361 #endif
  1362   // we must set sp to zero to clear frame
  1363   sd(R0, Address(thread, JavaThread::last_Java_sp_offset()));
  1364   // must clear fp, so that compiled frames are not confused; it is
  1365   // possible that we need it only for debugging
  1366   if (clear_fp) {
  1367     sd(R0, Address(thread, JavaThread::last_Java_fp_offset()));
  1370   if (clear_pc) {
  1371     sd(R0, Address(thread, JavaThread::last_Java_pc_offset()));
  1375 // Write serialization page so VM thread can do a pseudo remote membar.
  1376 // We use the current thread pointer to calculate a thread specific
  1377 // offset to write to within the page. This minimizes bus traffic
  1378 // due to cache line collision.
  1379 void MacroAssembler::serialize_memory(Register thread, Register tmp) {
  1380   move(tmp, thread);
  1381   srl(tmp, tmp,os::get_serialize_page_shift_count());
  1382   move(AT, (os::vm_page_size() - sizeof(int)));
  1383   andr(tmp, tmp,AT);
  1384   sw(tmp,Address(tmp, (intptr_t)os::get_memory_serialize_page()));
  1387 // Calls to C land
  1388 //
  1389 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded
  1390 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
  1391 // has to be reset to 0. This is required to allow proper stack traversal.
  1392 void MacroAssembler::set_last_Java_frame(Register java_thread,
  1393                                          Register last_java_sp,
  1394                                          Register last_java_fp,
  1395                                          address  last_java_pc) {
  1396   // determine java_thread register
  1397   if (!java_thread->is_valid()) {
  1398 #ifndef OPT_THREAD
  1399     java_thread = T2;
  1400     get_thread(java_thread);
  1401 #else
  1402     java_thread = TREG;
  1403 #endif
  1405   // determine last_java_sp register
  1406   if (!last_java_sp->is_valid()) {
  1407     last_java_sp = SP;
  1410   // last_java_fp is optional
  1412   if (last_java_fp->is_valid()) {
  1413     st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
  1416   // last_java_pc is optional
  1418   if (last_java_pc != NULL) {
  1419     relocate(relocInfo::internal_pc_type);
  1420     patchable_set48(AT, (long)last_java_pc);
  1421     st_ptr(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
  1423   st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
  1426 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
  1427                                          Register last_java_fp,
  1428                                          address  last_java_pc) {
  1429   // determine last_java_sp register
  1430   if (!last_java_sp->is_valid()) {
  1431     last_java_sp = SP;
  1434   Register thread = TREG;
  1435 #ifndef OPT_THREAD
  1436   get_thread(thread);
  1437 #endif
  1438   // last_java_fp is optional
  1439   if (last_java_fp->is_valid()) {
  1440     sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset()));
  1443   // last_java_pc is optional
  1444   if (last_java_pc != NULL) {
  1445     Address java_pc(thread,
  1446                     JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
  1447     li(AT, (intptr_t)(last_java_pc));
  1448     sd(AT, java_pc);
  1451   sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset()));
  1454 //////////////////////////////////////////////////////////////////////////////////
  1455 #if INCLUDE_ALL_GCS
  1457 void MacroAssembler::g1_write_barrier_pre(Register obj,
  1458                                           Register pre_val,
  1459                                           Register thread,
  1460                                           Register tmp,
  1461                                           bool tosca_live,
  1462                                           bool expand_call) {
  1464   // If expand_call is true then we expand the call_VM_leaf macro
  1465   // directly to skip generating the check by
  1466   // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
  1468 #ifdef _LP64
  1469   assert(thread == TREG, "must be");
  1470 #endif // _LP64
  1472   Label done;
  1473   Label runtime;
  1475   assert(pre_val != noreg, "check this code");
  1477   if (obj != noreg) {
  1478     assert_different_registers(obj, pre_val, tmp);
  1479     assert(pre_val != V0, "check this code");
  1482   Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
  1483                                        PtrQueue::byte_offset_of_active()));
  1484   Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
  1485                                        PtrQueue::byte_offset_of_index()));
  1486   Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
  1487                                        PtrQueue::byte_offset_of_buf()));
  1490   // Is marking active?
  1491   if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
  1492     lw(AT, in_progress);
  1493   } else {
  1494     assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
  1495     lb(AT, in_progress);
  1497   beq(AT, R0, done);
  1498   nop();
  1500   // Do we need to load the previous value?
  1501   if (obj != noreg) {
  1502     load_heap_oop(pre_val, Address(obj, 0));
  1505   // Is the previous value null?
  1506   beq(pre_val, R0, done);
  1507   nop();
  1509   // Can we store original value in the thread's buffer?
  1510   // Is index == 0?
  1511   // (The index field is typed as size_t.)
  1513   ld(tmp, index);
  1514   beq(tmp, R0, runtime);
  1515   nop();
  1517   daddiu(tmp, tmp, -1 * wordSize);
  1518   sd(tmp, index);
  1519   ld(AT, buffer);
  1520   daddu(tmp, tmp, AT);
  1522   // Record the previous value
  1523   sd(pre_val, tmp, 0);
  1524   beq(R0, R0, done);
  1525   nop();
  1527   bind(runtime);
  1528   // save the live input values
  1529   if (tosca_live) push(V0);
  1531   if (obj != noreg && obj != V0) push(obj);
  1533   if (pre_val != V0) push(pre_val);
  1535   // Calling the runtime using the regular call_VM_leaf mechanism generates
  1536   // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
  1537   // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
  1538   //
  1539   // If we care generating the pre-barrier without a frame (e.g. in the
  1540   // intrinsified Reference.get() routine) then ebp might be pointing to
  1541   // the caller frame and so this check will most likely fail at runtime.
  1542   //
  1543   // Expanding the call directly bypasses the generation of the check.
  1544   // So when we do not have have a full interpreter frame on the stack
  1545   // expand_call should be passed true.
  1547   NOT_LP64( push(thread); )
  1549   if (expand_call) {
  1550     LP64_ONLY( assert(pre_val != A1, "smashed arg"); )
  1551     if (thread != A1) move(A1, thread);
  1552     if (pre_val != A0) move(A0, pre_val);
  1553     MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
  1554   } else {
  1555     call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
  1558   NOT_LP64( pop(thread); )
  1560   // save the live input values
  1561   if (pre_val != V0)
  1562     pop(pre_val);
  1564   if (obj != noreg && obj != V0)
  1565     pop(obj);
  1567   if(tosca_live) pop(V0);
  1569   bind(done);
  1572 void MacroAssembler::g1_write_barrier_post(Register store_addr,
  1573                                            Register new_val,
  1574                                            Register thread,
  1575                                            Register tmp,
  1576                                            Register tmp2) {
  1577   assert(tmp  != AT, "must be");
  1578   assert(tmp2 != AT, "must be");
  1579 #ifdef _LP64
  1580   assert(thread == TREG, "must be");
  1581 #endif // _LP64
  1583   Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
  1584                                        PtrQueue::byte_offset_of_index()));
  1585   Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
  1586                                        PtrQueue::byte_offset_of_buf()));
  1588   BarrierSet* bs = Universe::heap()->barrier_set();
  1589   CardTableModRefBS* ct = (CardTableModRefBS*)bs;
  1590   assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
  1592   Label done;
  1593   Label runtime;
  1595   // Does store cross heap regions?
  1596   xorr(AT, store_addr, new_val);
  1597   dsrl(AT, AT, HeapRegion::LogOfHRGrainBytes);
  1598   beq(AT, R0, done);
  1599   nop();
  1602   // crosses regions, storing NULL?
  1603   beq(new_val, R0, done);
  1604   nop();
  1606   // storing region crossing non-NULL, is card already dirty?
  1607   const Register card_addr = tmp;
  1608   const Register cardtable = tmp2;
  1610   move(card_addr, store_addr);
  1611   dsrl(card_addr, card_addr, CardTableModRefBS::card_shift);
  1612   // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
  1613   // a valid address and therefore is not properly handled by the relocation code.
  1614   set64(cardtable, (intptr_t)ct->byte_map_base);
  1615   daddu(card_addr, card_addr, cardtable);
  1617   lb(AT, card_addr, 0);
  1618   daddiu(AT, AT, -1 * (int)G1SATBCardTableModRefBS::g1_young_card_val());
  1619   beq(AT, R0, done);
  1620   nop();
  1622   sync();
  1623   lb(AT, card_addr, 0);
  1624   daddiu(AT, AT, -1 * (int)(int)CardTableModRefBS::dirty_card_val());
  1625   beq(AT, R0, done);
  1626   nop();
  1629   // storing a region crossing, non-NULL oop, card is clean.
  1630   // dirty card and log.
  1631   move(AT, (int)CardTableModRefBS::dirty_card_val());
  1632   sb(AT, card_addr, 0);
  1634   lw(AT, queue_index);
  1635   beq(AT, R0, runtime);
  1636   nop();
  1637   daddiu(AT, AT, -1 * wordSize);
  1638   sw(AT, queue_index);
  1639   ld(tmp2, buffer);
  1640 #ifdef _LP64
  1641   ld(AT, queue_index);
  1642   daddu(tmp2, tmp2, AT);
  1643   sd(card_addr, tmp2, 0);
  1644 #else
  1645   lw(AT, queue_index);
  1646   addu32(tmp2, tmp2, AT);
  1647   sw(card_addr, tmp2, 0);
  1648 #endif
  1649   beq(R0, R0, done);
  1650   nop();
  1652   bind(runtime);
  1653   // save the live input values
  1654   push(store_addr);
  1655   push(new_val);
  1656 #ifdef _LP64
  1657   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, TREG);
  1658 #else
  1659   push(thread);
  1660   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
  1661   pop(thread);
  1662 #endif
  1663   pop(new_val);
  1664   pop(store_addr);
  1666   bind(done);
  1669 #endif // INCLUDE_ALL_GCS
  1670 //////////////////////////////////////////////////////////////////////////////////
  1673 void MacroAssembler::store_check(Register obj) {
  1674   // Does a store check for the oop in register obj. The content of
  1675   // register obj is destroyed afterwards.
  1676   store_check_part_1(obj);
  1677   store_check_part_2(obj);
  1680 void MacroAssembler::store_check(Register obj, Address dst) {
  1681   store_check(obj);
  1685 // split the store check operation so that other instructions can be scheduled inbetween
  1686 void MacroAssembler::store_check_part_1(Register obj) {
  1687   BarrierSet* bs = Universe::heap()->barrier_set();
  1688   assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
  1689 #ifdef _LP64
  1690   dsrl(obj, obj, CardTableModRefBS::card_shift);
  1691 #else
  1692   shr(obj, CardTableModRefBS::card_shift);
  1693 #endif
  1696 void MacroAssembler::store_check_part_2(Register obj) {
  1697   BarrierSet* bs = Universe::heap()->barrier_set();
  1698   assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
  1699   CardTableModRefBS* ct = (CardTableModRefBS*)bs;
  1700   assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
  1702   set64(AT, (long)ct->byte_map_base);
  1703 #ifdef _LP64
  1704   dadd(AT, AT, obj);
  1705 #else
  1706   add(AT, AT, obj);
  1707 #endif
  1708   if (UseConcMarkSweepGC) sync();
  1709   sb(R0, AT, 0);
  1712 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
  1713 void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
  1714                                    Register t1, Register t2, Label& slow_case) {
  1715   assert_different_registers(obj, var_size_in_bytes, t1, t2, AT);
  1717   Register end = t2;
  1718 #ifndef OPT_THREAD
  1719   Register thread = t1;
  1720   get_thread(thread);
  1721 #else
  1722   Register thread = TREG;
  1723 #endif
  1724   verify_tlab(t1, t2);//blows t1&t2
  1726   ld_ptr(obj, thread, in_bytes(JavaThread::tlab_top_offset()));
  1728   if (var_size_in_bytes == NOREG) {
  1729     // i dont think we need move con_size_in_bytes to a register first.
  1730     // by yjl 8/17/2005
  1731     assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first");
  1732     addi(end, obj, con_size_in_bytes);
  1733   } else {
  1734     add(end, obj, var_size_in_bytes);
  1737   ld_ptr(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
  1738   sltu(AT, AT, end);
  1739   bne_far(AT, R0, slow_case);
  1740   delayed()->nop();
  1743   // update the tlab top pointer
  1744   st_ptr(end, thread, in_bytes(JavaThread::tlab_top_offset()));
  1746   // recover var_size_in_bytes if necessary
  1747   /*if (var_size_in_bytes == end) {
  1748     sub(var_size_in_bytes, end, obj);
  1749     }*/
  1751   verify_tlab(t1, t2);
  1754 // Defines obj, preserves var_size_in_bytes
  1755 void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
  1756                                    Register t1, Register t2, Label& slow_case) {
  1757   assert_different_registers(obj, var_size_in_bytes, t1, AT);
  1758   if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq
  1759     // No allocation in the shared eden.
  1760     b_far(slow_case);
  1761     delayed()->nop();
  1762   } else {
  1764 #ifndef _LP64
  1765     Address heap_top(t1, Assembler::split_low((intptr_t)Universe::heap()->top_addr()));
  1766     lui(t1, split_high((intptr_t)Universe::heap()->top_addr()));
  1767 #else
  1768     Address heap_top(t1);
  1769     li(t1, (long)Universe::heap()->top_addr());
  1770 #endif
  1771     ld_ptr(obj, heap_top);
  1773     Register end = t2;
  1774     Label retry;
  1776     bind(retry);
  1777     if (var_size_in_bytes == NOREG) {
  1778     // i dont think we need move con_size_in_bytes to a register first.
  1779       assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first");
  1780       addi(end, obj, con_size_in_bytes);
  1781     } else {
  1782       add(end, obj, var_size_in_bytes);
  1784     // if end < obj then we wrapped around => object too long => slow case
  1785     sltu(AT, end, obj);
  1786     bne_far(AT, R0, slow_case);
  1787     delayed()->nop();
  1789     li(AT, (long)Universe::heap()->end_addr());
  1790     sltu(AT, AT, end);
  1791     bne_far(AT, R0, slow_case);
  1792     delayed()->nop();
  1793     // Compare obj with the top addr, and if still equal, store the new top addr in
  1794     // end at the address of the top addr pointer. Sets ZF if was equal, and clears
  1795     // it otherwise. Use lock prefix for atomicity on MPs.
  1796     //if (os::is_MP()) {
  1797     //  sync();
  1798     //}
  1800     // if someone beat us on the allocation, try again, otherwise continue
  1801     cmpxchg(end, heap_top, obj);
  1802     beq_far(AT, R0, retry);    //by yyq
  1803     delayed()->nop();
  1808 // C2 doesn't invoke this one.
  1809 void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) {
  1810   Register top = T0;
  1811   Register t1  = T1;
  1812 /* Jin: tlab_refill() is called in
  1814      [c1_Runtime1_mips.cpp] Runtime1::generate_code_for(new_type_array_id);
  1816   In generate_code_for(), T2 has been assigned as a register(length), which is used
  1817  after calling tlab_refill();
  1818   Therefore, tlab_refill() should not use T2.
  1820  Source:
  1822 Exception in thread "main" java.lang.ArrayIndexOutOfBoundsException
  1823         at java.lang.System.arraycopy(Native Method)
  1824         at java.util.Arrays.copyOf(Arrays.java:2799)  <-- alloc_array
  1825         at sun.misc.Resource.getBytes(Resource.java:117)
  1826         at java.net.URLClassLoader.defineClass(URLClassLoader.java:273)
  1827         at java.net.URLClassLoader.findClass(URLClassLoader.java:205)
  1828         at java.lang.ClassLoader.loadClass(ClassLoader.java:321)
  1829  */
  1830   Register t2  = T9;
  1831   Register t3  = T3;
  1832   Register thread_reg = T8;
  1833   Label do_refill, discard_tlab;
  1834   if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq
  1835     // No allocation in the shared eden.
  1836     b(slow_case);
  1837     delayed()->nop();
  1840   get_thread(thread_reg);
  1842   ld_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
  1843   ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
  1845   // calculate amount of free space
  1846   sub(t1, t1, top);
  1847   shr(t1, LogHeapWordSize);
  1849   // Retain tlab and allocate object in shared space if
  1850   // the amount free in the tlab is too large to discard.
  1851   ld_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
  1852   slt(AT, t2, t1);
  1853   beq(AT, R0, discard_tlab);
  1854   delayed()->nop();
  1856   // Retain
  1858 #ifndef _LP64
  1859   move(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
  1860 #else
  1861   li(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
  1862 #endif
  1863   add(t2, t2, AT);
  1864   st_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
  1866   if (TLABStats) {
  1867     // increment number of slow_allocations
  1868     lw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
  1869     addiu(AT, AT, 1);
  1870     sw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
  1872   b(try_eden);
  1873   delayed()->nop();
  1875   bind(discard_tlab);
  1876   if (TLABStats) {
  1877     // increment number of refills
  1878     lw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
  1879     addi(AT, AT, 1);
  1880     sw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
  1881     // accumulate wastage -- t1 is amount free in tlab
  1882     lw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
  1883     add(AT, AT, t1);
  1884     sw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
  1887   // if tlab is currently allocated (top or end != null) then
  1888   // fill [top, end + alignment_reserve) with array object
  1889   beq(top, R0, do_refill);
  1890   delayed()->nop();
  1892   // set up the mark word
  1893   li(AT, (long)markOopDesc::prototype()->copy_set_hash(0x2));
  1894   st_ptr(AT, top, oopDesc::mark_offset_in_bytes());
  1896   // set the length to the remaining space
  1897   addi(t1, t1, - typeArrayOopDesc::header_size(T_INT));
  1898   addi(t1, t1, ThreadLocalAllocBuffer::alignment_reserve());
  1899   shl(t1, log2_intptr(HeapWordSize/sizeof(jint)));
  1900   sw(t1, top, arrayOopDesc::length_offset_in_bytes());
  1902   // set klass to intArrayKlass
  1903 #ifndef _LP64
  1904   lui(AT, split_high((intptr_t)Universe::intArrayKlassObj_addr()));
  1905   lw(t1, AT, split_low((intptr_t)Universe::intArrayKlassObj_addr()));
  1906 #else
  1907   li(AT, (intptr_t)Universe::intArrayKlassObj_addr());
  1908   ld_ptr(t1, AT, 0);
  1909 #endif
  1910   //st_ptr(t1, top, oopDesc::klass_offset_in_bytes());
  1911   store_klass(top, t1);
  1913   // refill the tlab with an eden allocation
  1914   bind(do_refill);
  1915   ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
  1916   shl(t1, LogHeapWordSize);
  1917   // add object_size ??
  1918   eden_allocate(top, t1, 0, t2, t3, slow_case);
  1920   // Check that t1 was preserved in eden_allocate.
  1921 #ifdef ASSERT
  1922   if (UseTLAB) {
  1923     Label ok;
  1924     assert_different_registers(thread_reg, t1);
  1925     ld_ptr(AT, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
  1926     shl(AT, LogHeapWordSize);
  1927     beq(AT, t1, ok);
  1928     delayed()->nop();
  1929     stop("assert(t1 != tlab size)");
  1930     should_not_reach_here();
  1932     bind(ok);
  1934 #endif
  1935   st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_start_offset()));
  1936   st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
  1937   add(top, top, t1);
  1938   addi(top, top, - ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
  1939   st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
  1940   verify_tlab(t1, t2);
  1941   b(retry);
  1942   delayed()->nop();
  1945 static const double     pi_4 =  0.7853981633974483;
  1947 // the x86 version is to clumsy, i dont think we need that fuss. maybe i'm wrong, FIXME
  1948 // must get argument(a double) in F12/F13
  1949 //void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) {
  1950 //We need to preseve the register which maybe modified during the Call @Jerome
  1951 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
  1952 //save all modified register here
  1953 //  if (preserve_cpu_regs) {
  1954 //  }
  1955 //FIXME, in the disassembly of tirgfunc, only used V0,V1,T9, SP,RA,so we ony save V0,V1,T9
  1956   pushad();
  1957 //we should preserve the stack space before we call
  1958   addi(SP, SP, -wordSize * 2);
  1959         switch (trig){
  1960     case 's' :
  1961                   call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type );
  1962       delayed()->nop();
  1963       break;
  1964     case 'c':
  1965       call( CAST_FROM_FN_PTR(address, SharedRuntime::dcos), relocInfo::runtime_call_type );
  1966       delayed()->nop();
  1967       break;
  1968     case 't':
  1969       call( CAST_FROM_FN_PTR(address, SharedRuntime::dtan), relocInfo::runtime_call_type );
  1970       delayed()->nop();
  1971       break;
  1972     default:assert (false, "bad intrinsic");
  1973     break;
  1977   addi(SP, SP, wordSize * 2);
  1978   popad();
  1979 //  if (preserve_cpu_regs) {
  1980 //  }
  1983 #ifdef _LP64
  1984 void MacroAssembler::li(Register rd, long imm) {
  1985   if (imm <= max_jint && imm >= min_jint) {
  1986     li32(rd, (int)imm);
  1987   } else if (julong(imm) <= 0xFFFFFFFF) {
  1988     assert_not_delayed();
  1989     // lui sign-extends, so we can't use that.
  1990     ori(rd, R0, julong(imm) >> 16);
  1991     dsll(rd, rd, 16);
  1992     ori(rd, rd, split_low(imm));
  1993   //aoqi_test
  1994   //} else if ((imm > 0) && ((imm >> 48) == 0)) {
  1995   } else if ((imm > 0) && is_simm16(imm >> 32)) {
  1996     /* A 48-bit address */
  1997     li48(rd, imm);
  1998   } else {
  1999     li64(rd, imm);
  2002 #else
  2003 void MacroAssembler::li(Register rd, long imm) {
  2004   li32(rd, (int)imm);
  2006 #endif
  2008 void MacroAssembler::li32(Register reg, int imm) {
  2009   if (is_simm16(imm)) {
  2010     /* Jin: for imm < 0, we should use addi instead of addiu.
  2012      *  java.lang.StringCoding$StringDecoder.decode(jobject, jint, jint)
  2014      *  78 move [int:-1|I] [a0|I]
  2015      *    : daddi a0, zero, 0xffffffff  (correct)
  2016      *    : daddiu a0, zero, 0xffffffff (incorrect)
  2017      */
  2018     if (imm >= 0)
  2019       addiu(reg, R0, imm);
  2020     else
  2021       addi(reg, R0, imm);
  2022   } else {
  2023     lui(reg, split_low(imm >> 16));
  2024     if (split_low(imm))
  2025       ori(reg, reg, split_low(imm));
  2029 #ifdef _LP64
  2030 void MacroAssembler::set64(Register d, jlong value) {
  2031   assert_not_delayed();
  2033   int hi = (int)(value >> 32);
  2034   int lo = (int)(value & ~0);
  2036   if (value == lo) {  // 32-bit integer
  2037     if (is_simm16(value)) {
  2038       daddiu(d, R0, value);
  2039     } else {
  2040       lui(d, split_low(value >> 16));
  2041       if (split_low(value)) {
  2042         ori(d, d, split_low(value));
  2045   } else if (hi == 0) {  // hardware zero-extends to upper 32
  2046       ori(d, R0, julong(value) >> 16);
  2047       dsll(d, d, 16);
  2048       if (split_low(value)) {
  2049         ori(d, d, split_low(value));
  2051   } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
  2052     // 4 insts
  2053     li48(d, value);
  2054   } else {  // li64
  2055     // 6 insts
  2056     li64(d, value);
  2061 int MacroAssembler::insts_for_set64(jlong value) {
  2062   int hi = (int)(value >> 32);
  2063   int lo = (int)(value & ~0);
  2065   int count = 0;
  2067   if (value == lo) {  // 32-bit integer
  2068     if (is_simm16(value)) {
  2069       //daddiu(d, R0, value);
  2070       count++;
  2071     } else {
  2072       //lui(d, split_low(value >> 16));
  2073       count++;
  2074       if (split_low(value)) {
  2075         //ori(d, d, split_low(value));
  2076         count++;
  2079   } else if (hi == 0) {  // hardware zero-extends to upper 32
  2080       //ori(d, R0, julong(value) >> 16);
  2081       //dsll(d, d, 16);
  2082       count += 2;
  2083       if (split_low(value)) {
  2084         //ori(d, d, split_low(value));
  2085         count++;
  2087   } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
  2088     // 4 insts
  2089     //li48(d, value);
  2090     count += 4;
  2091   } else {  // li64
  2092     // 6 insts
  2093     //li64(d, value);
  2094     count += 6;
  2097   return count;
  2100 void MacroAssembler::patchable_set48(Register d, jlong value) {
  2101   assert_not_delayed();
  2103   int hi = (int)(value >> 32);
  2104   int lo = (int)(value & ~0);
  2106   int count = 0;
  2108   if (value == lo) {  // 32-bit integer
  2109     if (is_simm16(value)) {
  2110       daddiu(d, R0, value);
  2111       count += 1;
  2112     } else {
  2113       lui(d, split_low(value >> 16));
  2114       count += 1;
  2115       if (split_low(value)) {
  2116         ori(d, d, split_low(value));
  2117         count += 1;
  2120   } else if (hi == 0) {  // hardware zero-extends to upper 32
  2121       ori(d, R0, julong(value) >> 16);
  2122       dsll(d, d, 16);
  2123       count += 2;
  2124       if (split_low(value)) {
  2125         ori(d, d, split_low(value));
  2126         count += 1;
  2128   } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
  2129     // 4 insts
  2130     li48(d, value);
  2131     count += 4;
  2132   } else {  // li64
  2133     tty->print_cr("value = 0x%x", value);
  2134     guarantee(false, "Not supported yet !");
  2137   for (count; count < 4; count++) {
  2138     nop();
  2142 void MacroAssembler::patchable_set32(Register d, jlong value) {
  2143   assert_not_delayed();
  2145   int hi = (int)(value >> 32);
  2146   int lo = (int)(value & ~0);
  2148   int count = 0;
  2150   if (value == lo) {  // 32-bit integer
  2151     if (is_simm16(value)) {
  2152       daddiu(d, R0, value);
  2153       count += 1;
  2154     } else {
  2155       lui(d, split_low(value >> 16));
  2156       count += 1;
  2157       if (split_low(value)) {
  2158         ori(d, d, split_low(value));
  2159         count += 1;
  2162   } else if (hi == 0) {  // hardware zero-extends to upper 32
  2163       ori(d, R0, julong(value) >> 16);
  2164       dsll(d, d, 16);
  2165       count += 2;
  2166       if (split_low(value)) {
  2167         ori(d, d, split_low(value));
  2168         count += 1;
  2170   } else {
  2171     tty->print_cr("value = 0x%x", value);
  2172     guarantee(false, "Not supported yet !");
  2175   for (count; count < 3; count++) {
  2176     nop();
  2180 void MacroAssembler::patchable_call32(Register d, jlong value) {
  2181   assert_not_delayed();
  2183   int hi = (int)(value >> 32);
  2184   int lo = (int)(value & ~0);
  2186   int count = 0;
  2188   if (value == lo) {  // 32-bit integer
  2189     if (is_simm16(value)) {
  2190       daddiu(d, R0, value);
  2191       count += 1;
  2192     } else {
  2193       lui(d, split_low(value >> 16));
  2194       count += 1;
  2195       if (split_low(value)) {
  2196         ori(d, d, split_low(value));
  2197         count += 1;
  2200   } else {
  2201     tty->print_cr("value = 0x%x", value);
  2202     guarantee(false, "Not supported yet !");
  2205   for (count; count < 2; count++) {
  2206     nop();
  2210 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
  2211   assert(UseCompressedClassPointers, "should only be used for compressed header");
  2212   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
  2214   int klass_index = oop_recorder()->find_index(k);
  2215   RelocationHolder rspec = metadata_Relocation::spec(klass_index);
  2216   long narrowKlass = (long)Klass::encode_klass(k);
  2218   relocate(rspec, Assembler::narrow_oop_operand);
  2219   patchable_set48(dst, narrowKlass);
  2223 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
  2224   assert(UseCompressedOops, "should only be used for compressed header");
  2225   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
  2227   int oop_index = oop_recorder()->find_index(obj);
  2228   RelocationHolder rspec = oop_Relocation::spec(oop_index);
  2230   relocate(rspec, Assembler::narrow_oop_operand);
  2231   patchable_set48(dst, oop_index);
  2234 void MacroAssembler::li64(Register rd, long imm) {
  2235   assert_not_delayed();
  2236   lui(rd, imm >> 48);
  2237   ori(rd, rd, split_low(imm >> 32));
  2238   dsll(rd, rd, 16);
  2239   ori(rd, rd, split_low(imm >> 16));
  2240   dsll(rd, rd, 16);
  2241   ori(rd, rd, split_low(imm));
  2244 void MacroAssembler::li48(Register rd, long imm) {
  2245   assert_not_delayed();
  2246   assert(is_simm16(imm >> 32), "Not a 48-bit address");
  2247   lui(rd, imm >> 32);
  2248   ori(rd, rd, split_low(imm >> 16));
  2249   dsll(rd, rd, 16);
  2250   ori(rd, rd, split_low(imm));
  2252 #endif
  2253 // NOTE: i dont push eax as i486.
  2254 // the x86 save eax for it use eax as the jump register
  2255 void MacroAssembler::verify_oop(Register reg, const char* s) {
  2256   /*
  2257      if (!VerifyOops) return;
  2259   // Pass register number to verify_oop_subroutine
  2260   char* b = new char[strlen(s) + 50];
  2261   sprintf(b, "verify_oop: %s: %s", reg->name(), s);
  2262   push(rax);                          // save rax,
  2263   push(reg);                          // pass register argument
  2264   ExternalAddress buffer((address) b);
  2265   // avoid using pushptr, as it modifies scratch registers
  2266   // and our contract is not to modify anything
  2267   movptr(rax, buffer.addr());
  2268   push(rax);
  2269   // call indirectly to solve generation ordering problem
  2270   movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
  2271   call(rax);
  2272    */
  2273   if (!VerifyOops) return;
  2274   const char * b = NULL;
  2275   stringStream ss;
  2276   ss.print("verify_oop: %s: %s", reg->name(), s);
  2277   b = code_string(ss.as_string());
  2278 #ifdef _LP64
  2279   pushad();
  2280   move(A1, reg);
  2281   li(A0, (long)b);
  2282   li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
  2283   ld(T9, AT, 0);
  2284   jalr(T9);
  2285   delayed()->nop();
  2286   popad();
  2287 #else
  2288   // Pass register number to verify_oop_subroutine
  2289   sw(T0, SP, - wordSize);
  2290   sw(T1, SP, - 2*wordSize);
  2291   sw(RA, SP, - 3*wordSize);
  2292   sw(A0, SP ,- 4*wordSize);
  2293   sw(A1, SP ,- 5*wordSize);
  2294   sw(AT, SP ,- 6*wordSize);
  2295   sw(T9, SP ,- 7*wordSize);
  2296   addiu(SP, SP, - 7 * wordSize);
  2297   move(A1, reg);
  2298   li(A0, (long)b);
  2299   // call indirectly to solve generation ordering problem
  2300   li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
  2301   lw(T9, AT, 0);
  2302   jalr(T9);
  2303   delayed()->nop();
  2304   lw(T0, SP, 6* wordSize);
  2305   lw(T1, SP, 5* wordSize);
  2306   lw(RA, SP, 4* wordSize);
  2307   lw(A0, SP, 3* wordSize);
  2308   lw(A1, SP, 2* wordSize);
  2309   lw(AT, SP, 1* wordSize);
  2310   lw(T9, SP, 0* wordSize);
  2311   addiu(SP, SP, 7 * wordSize);
  2312 #endif
  2316 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
  2317   if (!VerifyOops) {
  2318     nop();
  2319     return;
  2321   // Pass register number to verify_oop_subroutine
  2322   const char * b = NULL;
  2323   stringStream ss;
  2324   ss.print("verify_oop_addr: %s",  s);
  2325   b = code_string(ss.as_string());
  2327   st_ptr(T0, SP, - wordSize);
  2328   st_ptr(T1, SP, - 2*wordSize);
  2329   st_ptr(RA, SP, - 3*wordSize);
  2330   st_ptr(A0, SP, - 4*wordSize);
  2331   st_ptr(A1, SP, - 5*wordSize);
  2332   st_ptr(AT, SP, - 6*wordSize);
  2333   st_ptr(T9, SP, - 7*wordSize);
  2334   ld_ptr(A1, addr);   // addr may use SP, so load from it before change SP
  2335   addiu(SP, SP, - 7 * wordSize);
  2337   li(A0, (long)b);
  2338   // call indirectly to solve generation ordering problem
  2339   li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
  2340   ld_ptr(T9, AT, 0);
  2341   jalr(T9);
  2342   delayed()->nop();
  2343   ld_ptr(T0, SP, 6* wordSize);
  2344   ld_ptr(T1, SP, 5* wordSize);
  2345   ld_ptr(RA, SP, 4* wordSize);
  2346   ld_ptr(A0, SP, 3* wordSize);
  2347   ld_ptr(A1, SP, 2* wordSize);
  2348   ld_ptr(AT, SP, 1* wordSize);
  2349   ld_ptr(T9, SP, 0* wordSize);
  2350   addiu(SP, SP, 7 * wordSize);
  2353 // used registers :  T0, T1
  2354 void MacroAssembler::verify_oop_subroutine() {
  2355   // RA: ra
  2356   // A0: char* error message
  2357   // A1: oop   object to verify
  2359   Label exit, error;
  2360   // increment counter
  2361   li(T0, (long)StubRoutines::verify_oop_count_addr());
  2362   lw(AT, T0, 0);
  2363 #ifdef _LP64
  2364   daddi(AT, AT, 1);
  2365 #else
  2366   addi(AT, AT, 1);
  2367 #endif
  2368   sw(AT, T0, 0);
  2370   // make sure object is 'reasonable'
  2371   beq(A1, R0, exit);         // if obj is NULL it is ok
  2372   delayed()->nop();
  2374   // Check if the oop is in the right area of memory
  2375   //const int oop_mask = Universe::verify_oop_mask();
  2376   //const int oop_bits = Universe::verify_oop_bits();
  2377   const uintptr_t oop_mask = Universe::verify_oop_mask();
  2378   const uintptr_t oop_bits = Universe::verify_oop_bits();
  2379   li(AT, oop_mask);
  2380   andr(T0, A1, AT);
  2381   li(AT, oop_bits);
  2382   bne(T0, AT, error);
  2383   delayed()->nop();
  2385   // make sure klass is 'reasonable'
  2386   //add for compressedoops
  2387   reinit_heapbase();
  2388   //add for compressedoops
  2389   load_klass(T0, A1);
  2390   beq(T0, R0, error);                        // if klass is NULL it is broken
  2391   delayed()->nop();
  2392   #if 0
  2393   //FIXME:wuhui.
  2394   // Check if the klass is in the right area of memory
  2395   //const int klass_mask = Universe::verify_klass_mask();
  2396   //const int klass_bits = Universe::verify_klass_bits();
  2397   const uintptr_t klass_mask = Universe::verify_klass_mask();
  2398   const uintptr_t klass_bits = Universe::verify_klass_bits();
  2400   li(AT, klass_mask);
  2401   andr(T1, T0, AT);
  2402   li(AT, klass_bits);
  2403   bne(T1, AT, error);
  2404   delayed()->nop();
  2405   // make sure klass' klass is 'reasonable'
  2406   //add for compressedoops
  2407   load_klass(T0, T0);
  2408   beq(T0, R0, error);  // if klass' klass is NULL it is broken
  2409   delayed()->nop();
  2411   li(AT, klass_mask);
  2412   andr(T1, T0, AT);
  2413   li(AT, klass_bits);
  2414   bne(T1, AT, error);
  2415   delayed()->nop();     // if klass not in right area of memory it is broken too.
  2416 #endif
  2417   // return if everything seems ok
  2418   bind(exit);
  2420   jr(RA);
  2421   delayed()->nop();
  2423   // handle errors
  2424   bind(error);
  2425   pushad();
  2426 #ifndef _LP64
  2427   addi(SP, SP, (-1) * wordSize);
  2428 #endif
  2429   call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
  2430   delayed()->nop();
  2431 #ifndef _LP64
  2432   addiu(SP, SP, 1 * wordSize);
  2433 #endif
  2434   popad();
  2435   jr(RA);
  2436   delayed()->nop();
  2439 void MacroAssembler::verify_tlab(Register t1, Register t2) {
  2440 #ifdef ASSERT
  2441   assert_different_registers(t1, t2, AT);
  2442   if (UseTLAB && VerifyOops) {
  2443     Label next, ok;
  2445     get_thread(t1);
  2447     ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset()));
  2448     ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset()));
  2449     sltu(AT, t2, AT);
  2450     beq(AT, R0, next);
  2451     delayed()->nop();
  2453     stop("assert(top >= start)");
  2455     bind(next);
  2456     ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset()));
  2457     sltu(AT, AT, t2);
  2458     beq(AT, R0, ok);
  2459     delayed()->nop();
  2461     stop("assert(top <= end)");
  2463     bind(ok);
  2466 #endif
  2468  RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
  2469                                                        Register tmp,
  2470                                                        int offset) {
  2471    intptr_t value = *delayed_value_addr;
  2472    if (value != 0)
  2473    return RegisterOrConstant(value + offset);
  2474    AddressLiteral a(delayed_value_addr);
  2475    // load indirectly to solve generation ordering problem
  2476    //movptr(tmp, ExternalAddress((address) delayed_value_addr));
  2477    //ld(tmp, a);
  2478    if (offset != 0)
  2479      daddi(tmp,tmp, offset);
  2481    return RegisterOrConstant(tmp);
  2484 void MacroAssembler::hswap(Register reg) {
  2485   //short
  2486   //andi(reg, reg, 0xffff);
  2487   srl(AT, reg, 8);
  2488   sll(reg, reg, 24);
  2489   sra(reg, reg, 16);
  2490   orr(reg, reg, AT);
  2493 void MacroAssembler::huswap(Register reg) {
  2494 #ifdef _LP64
  2495   dsrl(AT, reg, 8);
  2496   dsll(reg, reg, 24);
  2497   dsrl(reg, reg, 16);
  2498   orr(reg, reg, AT);
  2499   andi(reg, reg, 0xffff);
  2500 #else
  2501   //andi(reg, reg, 0xffff);
  2502   srl(AT, reg, 8);
  2503   sll(reg, reg, 24);
  2504   srl(reg, reg, 16);
  2505   orr(reg, reg, AT);
  2506 #endif
  2509 // something funny to do this will only one more register AT
  2510 // 32 bits
  2511 void MacroAssembler::swap(Register reg) {
  2512   srl(AT, reg, 8);
  2513   sll(reg, reg, 24);
  2514   orr(reg, reg, AT);
  2515   //reg : 4 1 2 3
  2516   srl(AT, AT, 16);
  2517   xorr(AT, AT, reg);
  2518   andi(AT, AT, 0xff);
  2519   //AT : 0 0 0 1^3);
  2520   xorr(reg, reg, AT);
  2521   //reg : 4 1 2 1
  2522   sll(AT, AT, 16);
  2523   xorr(reg, reg, AT);
  2524   //reg : 4 3 2 1
  2527 #ifdef _LP64
  2529 /* do 32-bit CAS using MIPS64 lld/scd
  2531   Jin: cas_int should only compare 32-bits of the memory value.
  2532        However, lld/scd will do 64-bit operation, which violates the intention of cas_int.
  2533        To simulate a 32-bit atomic operation, the value loaded with LLD should be split into
  2534        tow halves, and only the low-32 bits is compared. If equals, the low-32 bits of newval,
  2535        plus the high-32 bits or memory value, are stored togethor with SCD.
  2537 Example:
  2539       double d = 3.1415926;
  2540       System.err.println("hello" + d);
  2542   sun.misc.FloatingDecimal$1.<init>()
  2544    `- java.util.concurrent.atomic.AtomicInteger::compareAndSet()
  2546   38 cas_int [a7a7|J] [a0|I] [a6|I]
  2547 // a0: 0xffffffffe8ea9f63 pc: 0x55647f3354
  2548 // a6: 0x4ab325aa
  2550 again:
  2551    0x00000055647f3c5c: lld at, 0x0(a7)                          ; 64-bit load, "0xe8ea9f63"
  2553    0x00000055647f3c60: sll t9, at, 0                            ; t9: low-32 bits (sign extended)
  2554    0x00000055647f3c64: dsrl32 t8, at, 0                         ; t8: high-32 bits
  2555    0x00000055647f3c68: dsll32 t8, t8, 0
  2556    0x00000055647f3c6c: bne t9, a0, 0x00000055647f3c9c           ; goto nequal
  2557    0x00000055647f3c70: sll zero, zero, 0
  2559    0x00000055647f3c74: ori v1, zero, 0xffffffff                 ; v1: low-32 bits of newval (sign unextended)
  2560    0x00000055647f3c78: dsll v1, v1, 16                          ; v1 = a6 & 0xFFFFFFFF;
  2561    0x00000055647f3c7c: ori v1, v1, 0xffffffff
  2562    0x00000055647f3c80: and v1, a6, v1
  2563    0x00000055647f3c84: or at, t8, v1
  2564    0x00000055647f3c88: scd at, 0x0(a7)
  2565    0x00000055647f3c8c: beq at, zero, 0x00000055647f3c5c         ; goto again
  2566    0x00000055647f3c90: sll zero, zero, 0
  2567    0x00000055647f3c94: beq zero, zero, 0x00000055647f45ac       ; goto done
  2568    0x00000055647f3c98: sll zero, zero, 0
  2569 nequal:
  2570    0x00000055647f45a4: dadd a0, t9, zero
  2571    0x00000055647f45a8: dadd at, zero, zero
  2572 done:
  2573 */
  2575 void MacroAssembler::cmpxchg32(Register x_reg, Address dest, Register c_reg) {
  2576   /* 2012/11/11 Jin: MIPS64 can use ll/sc for 32-bit atomic memory access */
  2577   Label done, again, nequal;
  2579   bind(again);
  2581   if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
  2582   ll(AT, dest);
  2583   bne(AT, c_reg, nequal);
  2584   delayed()->nop();
  2586   move(AT, x_reg);
  2587   sc(AT, dest);
  2588   beq(AT, R0, again);
  2589   delayed()->nop();
  2590   b(done);
  2591   delayed()->nop();
  2593   // not xchged
  2594   bind(nequal);
  2595   sync();
  2596   move(c_reg, AT);
  2597   move(AT, R0);
  2599   bind(done);
  2601 #endif  // cmpxchg32
  2603 void MacroAssembler::cmpxchg(Register x_reg, Address dest, Register c_reg) {
  2604   Label done, again, nequal;
  2606   bind(again);
  2607   if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
  2608 #ifdef _LP64
  2609   lld(AT, dest);
  2610 #else
  2611   ll(AT, dest);
  2612 #endif
  2613   bne(AT, c_reg, nequal);
  2614   delayed()->nop();
  2616   move(AT, x_reg);
  2617 #ifdef _LP64
  2618   scd(AT, dest);
  2619 #else
  2620   sc(AT, dest);
  2621 #endif
  2622   beq(AT, R0, again);
  2623   delayed()->nop();
  2624   b(done);
  2625   delayed()->nop();
  2627   // not xchged
  2628   bind(nequal);
  2629   sync();
  2630   move(c_reg, AT);
  2631   move(AT, R0);
  2633   bind(done);
  2636 void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) {
  2637   Label done, again, nequal;
  2639   Register x_reg = x_regLo;
  2640   dsll32(x_regHi, x_regHi, 0);
  2641   dsll32(x_regLo, x_regLo, 0);
  2642   dsrl32(x_regLo, x_regLo, 0);
  2643   orr(x_reg, x_regLo, x_regHi);
  2645   Register c_reg = c_regLo;
  2646   dsll32(c_regHi, c_regHi, 0);
  2647   dsll32(c_regLo, c_regLo, 0);
  2648   dsrl32(c_regLo, c_regLo, 0);
  2649   orr(c_reg, c_regLo, c_regHi);
  2651   bind(again);
  2653   if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
  2654   lld(AT, dest);
  2655   bne(AT, c_reg, nequal);
  2656   delayed()->nop();
  2658   //move(AT, x_reg);
  2659   dadd(AT, x_reg, R0);
  2660   scd(AT, dest);
  2661   beq(AT, R0, again);
  2662   delayed()->nop();
  2663   b(done);
  2664   delayed()->nop();
  2666   // not xchged
  2667   bind(nequal);
  2668   sync();
  2669   //move(c_reg, AT);
  2670   //move(AT, R0);
  2671   dadd(c_reg, AT, R0);
  2672   dadd(AT, R0, R0);
  2673   bind(done);
  2676 // be sure the three register is different
  2677 void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
  2678   assert_different_registers(tmp, fs, ft);
  2679   div_s(tmp, fs, ft);
  2680   trunc_l_s(tmp, tmp);
  2681   cvt_s_l(tmp, tmp);
  2682   mul_s(tmp, tmp, ft);
  2683   sub_s(fd, fs, tmp);
  2686 // be sure the three register is different
  2687 void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
  2688   assert_different_registers(tmp, fs, ft);
  2689   div_d(tmp, fs, ft);
  2690   trunc_l_d(tmp, tmp);
  2691   cvt_d_l(tmp, tmp);
  2692   mul_d(tmp, tmp, ft);
  2693   sub_d(fd, fs, tmp);
  2696 // Fast_Lock and Fast_Unlock used by C2
  2698 // Because the transitions from emitted code to the runtime
  2699 // monitorenter/exit helper stubs are so slow it's critical that
  2700 // we inline both the stack-locking fast-path and the inflated fast path.
  2701 //
  2702 // See also: cmpFastLock and cmpFastUnlock.
  2703 //
  2704 // What follows is a specialized inline transliteration of the code
  2705 // in slow_enter() and slow_exit().  If we're concerned about I$ bloat
  2706 // another option would be to emit TrySlowEnter and TrySlowExit methods
  2707 // at startup-time.  These methods would accept arguments as
  2708 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
  2709 // indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
  2710 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
  2711 // In practice, however, the # of lock sites is bounded and is usually small.
  2712 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
  2713 // if the processor uses simple bimodal branch predictors keyed by EIP
  2714 // Since the helper routines would be called from multiple synchronization
  2715 // sites.
  2716 //
  2717 // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
  2718 // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
  2719 // to those specialized methods.  That'd give us a mostly platform-independent
  2720 // implementation that the JITs could optimize and inline at their pleasure.
  2721 // Done correctly, the only time we'd need to cross to native could would be
  2722 // to park() or unpark() threads.  We'd also need a few more unsafe operators
  2723 // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
  2724 // (b) explicit barriers or fence operations.
  2725 //
  2726 // TODO:
  2727 //
  2728 // *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
  2729 //    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
  2730 //    Given TLAB allocation, Self is usually manifested in a register, so passing it into
  2731 //    the lock operators would typically be faster than reifying Self.
  2732 //
  2733 // *  Ideally I'd define the primitives as:
  2734 //       fast_lock   (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
  2735 //       fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
  2736 //    Unfortunately ADLC bugs prevent us from expressing the ideal form.
  2737 //    Instead, we're stuck with a rather awkward and brittle register assignments below.
  2738 //    Furthermore the register assignments are overconstrained, possibly resulting in
  2739 //    sub-optimal code near the synchronization site.
  2740 //
  2741 // *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
  2742 //    Alternately, use a better sp-proximity test.
  2743 //
  2744 // *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
  2745 //    Either one is sufficient to uniquely identify a thread.
  2746 //    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
  2747 //
  2748 // *  Intrinsify notify() and notifyAll() for the common cases where the
  2749 //    object is locked by the calling thread but the waitlist is empty.
  2750 //    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
  2751 //
  2752 // *  use jccb and jmpb instead of jcc and jmp to improve code density.
  2753 //    But beware of excessive branch density on AMD Opterons.
  2754 //
  2755 // *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
  2756 //    or failure of the fast-path.  If the fast-path fails then we pass
  2757 //    control to the slow-path, typically in C.  In Fast_Lock and
  2758 //    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
  2759 //    will emit a conditional branch immediately after the node.
  2760 //    So we have branches to branches and lots of ICC.ZF games.
  2761 //    Instead, it might be better to have C2 pass a "FailureLabel"
  2762 //    into Fast_Lock and Fast_Unlock.  In the case of success, control
  2763 //    will drop through the node.  ICC.ZF is undefined at exit.
  2764 //    In the case of failure, the node will branch directly to the
  2765 //    FailureLabel
  2768 // obj: object to lock
  2769 // box: on-stack box address (displaced header location) - KILLED
  2770 // rax,: tmp -- KILLED
  2771 // scr: tmp -- KILLED
  2772 void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg) {
  2774   // Ensure the register assignents are disjoint
  2775   guarantee (objReg != boxReg, "") ;
  2776   guarantee (objReg != tmpReg, "") ;
  2777   guarantee (objReg != scrReg, "") ;
  2778   guarantee (boxReg != tmpReg, "") ;
  2779   guarantee (boxReg != scrReg, "") ;
  2782   block_comment("FastLock");
  2783   /*
  2784      move(AT, 0x0);
  2785      return;
  2786      */
  2787   if (PrintBiasedLockingStatistics) {
  2788     push(tmpReg);
  2789     atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, AT, tmpReg);
  2790     pop(tmpReg);
  2793   if (EmitSync & 1) {
  2794     move(AT, 0x0);
  2795     return;
  2796   } else
  2797     if (EmitSync & 2) {
  2798       Label DONE_LABEL ;
  2799       if (UseBiasedLocking) {
  2800         // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
  2801         biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
  2804       ld(tmpReg, Address(objReg, 0)) ;          // fetch markword
  2805       ori(tmpReg, tmpReg, 0x1);
  2806       sd(tmpReg, Address(boxReg, 0));           // Anticipate successful CAS
  2808       cmpxchg(boxReg, Address(objReg, 0), tmpReg);          // Updates tmpReg
  2809       bne(AT, R0, DONE_LABEL);
  2810       delayed()->nop();
  2812       // Recursive locking
  2813       dsubu(tmpReg, tmpReg, SP);
  2814       li(AT, (7 - os::vm_page_size() ));
  2815       andr(tmpReg, tmpReg, AT);
  2816       sd(tmpReg, Address(boxReg, 0));
  2817       bind(DONE_LABEL) ;
  2818     } else {
  2819       // Possible cases that we'll encounter in fast_lock
  2820       // ------------------------------------------------
  2821       // * Inflated
  2822       //    -- unlocked
  2823       //    -- Locked
  2824       //       = by self
  2825       //       = by other
  2826       // * biased
  2827       //    -- by Self
  2828       //    -- by other
  2829       // * neutral
  2830       // * stack-locked
  2831       //    -- by self
  2832       //       = sp-proximity test hits
  2833       //       = sp-proximity test generates false-negative
  2834       //    -- by other
  2835       //
  2837       Label IsInflated, DONE_LABEL, PopDone ;
  2839       // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
  2840       // order to reduce the number of conditional branches in the most common cases.
  2841       // Beware -- there's a subtle invariant that fetch of the markword
  2842       // at [FETCH], below, will never observe a biased encoding (*101b).
  2843       // If this invariant is not held we risk exclusion (safety) failure.
  2844       if (UseBiasedLocking && !UseOptoBiasInlining) {
  2845         biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
  2848       ld(tmpReg, Address(objReg, 0)) ;         //Fetch the markword of the object.
  2849       andi(AT, tmpReg, markOopDesc::monitor_value);
  2850       bne(AT, R0, IsInflated);                      // inflated vs stack-locked|neutral|bias
  2851       delayed()->nop();
  2853       // Attempt stack-locking ...
  2854       ori (tmpReg, tmpReg, markOopDesc::unlocked_value);
  2855       sd(tmpReg, Address(boxReg, 0));          // Anticipate successful CAS
  2856       //if (os::is_MP()) {
  2857       //  sync();
  2858       //}
  2860       cmpxchg(boxReg, Address(objReg, 0), tmpReg);           // Updates tmpReg
  2861       //AT == 1: unlocked
  2863       if (PrintBiasedLockingStatistics) {
  2864         Label L;
  2865         beq(AT, R0, L);
  2866         delayed()->nop();
  2867         push(T0);
  2868         push(T1);
  2869         atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
  2870         pop(T1);
  2871         pop(T0);
  2872         bind(L);
  2874       bne(AT, R0, DONE_LABEL);
  2875       delayed()->nop();
  2877       // Recursive locking
  2878       // The object is stack-locked: markword contains stack pointer to BasicLock.
  2879       // Locked by current thread if difference with current SP is less than one page.
  2880       dsubu(tmpReg, tmpReg, SP);
  2881       li(AT, 7 - os::vm_page_size() );
  2882       andr(tmpReg, tmpReg, AT);
  2883       sd(tmpReg, Address(boxReg, 0));
  2884       if (PrintBiasedLockingStatistics) {
  2885         Label L;
  2886         // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++
  2887         bne(tmpReg, R0, L);
  2888         delayed()->nop();
  2889         push(T0);
  2890         push(T1);
  2891         atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
  2892         pop(T1);
  2893         pop(T0);
  2894         bind(L);
  2896       sltiu(AT, tmpReg, 1); /* AT = (tmpReg == 0) ? 1 : 0 */
  2898       b(DONE_LABEL) ;
  2899       delayed()->nop();
  2901       bind(IsInflated) ;
  2902       // The object's monitor m is unlocked iff m->owner == NULL,
  2903       // otherwise m->owner may contain a thread or a stack address.
  2905       // TODO: someday avoid the ST-before-CAS penalty by
  2906       // relocating (deferring) the following ST.
  2907       // We should also think about trying a CAS without having
  2908       // fetched _owner.  If the CAS is successful we may
  2909       // avoid an RTO->RTS upgrade on the $line.
  2910       // Without cast to int32_t a movptr will destroy r10 which is typically obj
  2911       li(AT, (int32_t)intptr_t(markOopDesc::unused_mark()));
  2912       sd(AT, Address(boxReg, 0));
  2914       move(boxReg, tmpReg) ;
  2915       ld(tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
  2916       // if (m->owner != 0) => AT = 0, goto slow path.
  2917       move(AT, R0);
  2918       bne(tmpReg, R0, DONE_LABEL);
  2919       delayed()->nop();
  2921 #ifndef OPT_THREAD
  2922       get_thread (TREG) ;
  2923 #endif
  2924       // It's inflated and appears unlocked
  2925       //if (os::is_MP()) {
  2926       //  sync();
  2927       //}
  2928       cmpxchg(TREG, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), tmpReg) ;
  2929       // Intentional fall-through into DONE_LABEL ...
  2932       // DONE_LABEL is a hot target - we'd really like to place it at the
  2933       // start of cache line by padding with NOPs.
  2934       // See the AMD and Intel software optimization manuals for the
  2935       // most efficient "long" NOP encodings.
  2936       // Unfortunately none of our alignment mechanisms suffice.
  2937       bind(DONE_LABEL);
  2939       // At DONE_LABEL the AT is set as follows ...
  2940       // Fast_Unlock uses the same protocol.
  2941       // AT == 1 -> Success
  2942       // AT == 0 -> Failure - force control through the slow-path
  2944       // Avoid branch-to-branch on AMD processors
  2945       // This appears to be superstition.
  2946       if (EmitSync & 32) nop() ;
  2951 // obj: object to unlock
  2952 // box: box address (displaced header location), killed.  Must be EAX.
  2953 // rbx,: killed tmp; cannot be obj nor box.
  2954 //
  2955 // Some commentary on balanced locking:
  2956 //
  2957 // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
  2958 // Methods that don't have provably balanced locking are forced to run in the
  2959 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
  2960 // The interpreter provides two properties:
  2961 // I1:  At return-time the interpreter automatically and quietly unlocks any
  2962 //      objects acquired the current activation (frame).  Recall that the
  2963 //      interpreter maintains an on-stack list of locks currently held by
  2964 //      a frame.
  2965 // I2:  If a method attempts to unlock an object that is not held by the
  2966 //      the frame the interpreter throws IMSX.
  2967 //
  2968 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
  2969 // B() doesn't have provably balanced locking so it runs in the interpreter.
  2970 // Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
  2971 // is still locked by A().
  2972 //
  2973 // The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
  2974 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
  2975 // should not be unlocked by "normal" java-level locking and vice-versa.  The specification
  2976 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
  2978 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) {
  2980   guarantee (objReg != boxReg, "") ;
  2981   guarantee (objReg != tmpReg, "") ;
  2982   guarantee (boxReg != tmpReg, "") ;
  2986   block_comment("FastUnlock");
  2989   if (EmitSync & 4) {
  2990     // Disable - inhibit all inlining.  Force control through the slow-path
  2991     move(AT, 0x0);
  2992     return;
  2993   } else
  2994     if (EmitSync & 8) {
  2995       Label DONE_LABEL ;
  2996       if (UseBiasedLocking) {
  2997         biased_locking_exit(objReg, tmpReg, DONE_LABEL);
  2999       // classic stack-locking code ...
  3000       ld(tmpReg, Address(boxReg, 0)) ;
  3001       beq(tmpReg, R0, DONE_LABEL) ;
  3002       move(AT, 0x1);  // delay slot
  3004       cmpxchg(tmpReg, Address(objReg, 0), boxReg);          // Uses EAX which is box
  3005       bind(DONE_LABEL);
  3006     } else {
  3007       Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
  3009       // Critically, the biased locking test must have precedence over
  3010       // and appear before the (box->dhw == 0) recursive stack-lock test.
  3011       if (UseBiasedLocking && !UseOptoBiasInlining) {
  3012         biased_locking_exit(objReg, tmpReg, DONE_LABEL);
  3015       ld(AT, Address(boxReg, 0)) ;            // Examine the displaced header
  3016       beq(AT, R0, DONE_LABEL) ;      // 0 indicates recursive stack-lock
  3017       delayed()->daddiu(AT, R0, 0x1);
  3019       ld(tmpReg, Address(objReg, 0)) ;       // Examine the object's markword
  3020       andi(AT, tmpReg, markOopDesc::monitor_value) ;                     // Inflated?
  3021       beq(AT, R0, Stacked) ;                     // Inflated?
  3022       delayed()->nop();
  3024       bind(Inflated) ;
  3025       // It's inflated.
  3026       // Despite our balanced locking property we still check that m->_owner == Self
  3027       // as java routines or native JNI code called by this thread might
  3028       // have released the lock.
  3029       // Refer to the comments in synchronizer.cpp for how we might encode extra
  3030       // state in _succ so we can avoid fetching EntryList|cxq.
  3031       //
  3032       // I'd like to add more cases in fast_lock() and fast_unlock() --
  3033       // such as recursive enter and exit -- but we have to be wary of
  3034       // I$ bloat, T$ effects and BP$ effects.
  3035       //
  3036       // If there's no contention try a 1-0 exit.  That is, exit without
  3037       // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
  3038       // we detect and recover from the race that the 1-0 exit admits.
  3039       //
  3040       // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
  3041       // before it STs null into _owner, releasing the lock.  Updates
  3042       // to data protected by the critical section must be visible before
  3043       // we drop the lock (and thus before any other thread could acquire
  3044       // the lock and observe the fields protected by the lock).
  3045       // IA32's memory-model is SPO, so STs are ordered with respect to
  3046       // each other and there's no need for an explicit barrier (fence).
  3047       // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
  3048 #ifndef OPT_THREAD
  3049       get_thread (TREG) ;
  3050 #endif
  3052       // It's inflated
  3053       ld(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
  3054       xorr(boxReg, boxReg, TREG);
  3056       ld(AT, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
  3057       orr(boxReg, boxReg, AT);
  3059       move(AT, R0);
  3060       bne(boxReg, R0, DONE_LABEL);
  3061       delayed()->nop();
  3063       ld(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
  3064       ld(AT, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
  3065       orr(boxReg, boxReg, AT);
  3067       move(AT, R0);
  3068       bne(boxReg, R0, DONE_LABEL);
  3069       delayed()->nop();
  3071       sync();
  3072       sd(R0, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
  3073       move(AT, 0x1);
  3074       b(DONE_LABEL);
  3075       delayed()->nop();
  3077       bind  (Stacked);
  3078       ld(tmpReg, Address(boxReg, 0)) ;
  3079       //if (os::is_MP()) { sync(); }
  3080       cmpxchg(tmpReg, Address(objReg, 0), boxReg);
  3082       if (EmitSync & 65536) {
  3083         bind (CheckSucc);
  3086       bind(DONE_LABEL);
  3088       // Avoid branch to branch on AMD processors
  3089       if (EmitSync & 32768) { nop() ; }
  3093 void MacroAssembler::align(int modulus) {
  3094   while (offset() % modulus != 0) nop();
  3098 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
  3099   //Unimplemented();
  3102 #ifdef _LP64
  3103 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
  3105 /* FIXME: Jin: In MIPS64, F0~23 are all caller-saved registers */
  3106 FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13};
  3107 #else
  3108 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, T4, T5, T6, T7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
  3110 Register caller_saved_fpu_registers[] = {};
  3111 #endif
  3113 //We preserve all caller-saved register
  3114 void  MacroAssembler::pushad(){
  3115   int i;
  3117   /* Fixed-point registers */
  3118   int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
  3119   daddi(SP, SP, -1 * len * wordSize);
  3120   for (i = 0; i < len; i++)
  3122 #ifdef _LP64
  3123     sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
  3124 #else
  3125     sw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
  3126 #endif
  3129   /* Floating-point registers */
  3130   len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
  3131   daddi(SP, SP, -1 * len * wordSize);
  3132   for (i = 0; i < len; i++)
  3134 #ifdef _LP64
  3135     sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3136 #else
  3137     swc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3138 #endif
  3140 };
  3142 void  MacroAssembler::popad(){
  3143   int i;
  3145   /* Floating-point registers */
  3146   int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
  3147   for (i = 0; i < len; i++)
  3149 #ifdef _LP64
  3150     ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3151 #else
  3152     lwc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3153 #endif
  3155   daddi(SP, SP, len * wordSize);
  3157   /* Fixed-point registers */
  3158   len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
  3159   for (i = 0; i < len; i++)
  3161 #ifdef _LP64
  3162     ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
  3163 #else
  3164     lw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
  3165 #endif
  3167   daddi(SP, SP, len * wordSize);
  3168 };
  3170 void MacroAssembler::push2(Register reg1, Register reg2) {
  3171 #ifdef _LP64
  3172   daddi(SP, SP, -16);
  3173   sd(reg2, SP, 0);
  3174   sd(reg1, SP, 8);
  3175 #else
  3176   addi(SP, SP, -8);
  3177   sw(reg2, SP, 0);
  3178   sw(reg1, SP, 4);
  3179 #endif
  3182 void MacroAssembler::pop2(Register reg1, Register reg2) {
  3183 #ifdef _LP64
  3184   ld(reg1, SP, 0);
  3185   ld(reg2, SP, 8);
  3186   daddi(SP, SP, 16);
  3187 #else
  3188   lw(reg1, SP, 0);
  3189   lw(reg2, SP, 4);
  3190   addi(SP, SP, 8);
  3191 #endif
  3194 //for UseCompressedOops Option
  3195 void MacroAssembler::load_klass(Register dst, Register src) {
  3196 #ifdef _LP64
  3197   if(UseCompressedClassPointers){
  3198     lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
  3199     decode_klass_not_null(dst);
  3200   } else
  3201 #endif
  3202   ld(dst, src, oopDesc::klass_offset_in_bytes());
  3205 void MacroAssembler::store_klass(Register dst, Register src) {
  3206 #ifdef _LP64
  3207   if(UseCompressedClassPointers){
  3208     encode_klass_not_null(src);
  3209     sw(src, dst, oopDesc::klass_offset_in_bytes());
  3210   } else {
  3211 #endif
  3212     sd(src, dst, oopDesc::klass_offset_in_bytes());
  3216 void MacroAssembler::load_prototype_header(Register dst, Register src) {
  3217   load_klass(dst, src);
  3218   ld(dst, Address(dst, Klass::prototype_header_offset()));
  3221 #ifdef _LP64
  3222 void MacroAssembler::store_klass_gap(Register dst, Register src) {
  3223   if (UseCompressedClassPointers) {
  3224     sw(src, dst, oopDesc::klass_gap_offset_in_bytes());
  3228 void MacroAssembler::load_heap_oop(Register dst, Address src) {
  3229   if(UseCompressedOops){
  3230     lwu(dst, src);
  3231     decode_heap_oop(dst);
  3232   } else {
  3233     ld(dst, src);
  3237 void MacroAssembler::store_heap_oop(Address dst, Register src){
  3238   if(UseCompressedOops){
  3239     assert(!dst.uses(src), "not enough registers");
  3240     encode_heap_oop(src);
  3241     sw(src, dst);
  3242   } else {
  3243     sd(src, dst);
  3247 void MacroAssembler::store_heap_oop_null(Address dst){
  3248   if(UseCompressedOops){
  3249     sw(R0, dst);
  3250   } else {
  3251     sd(R0, dst);
  3255 #ifdef ASSERT
  3256 void MacroAssembler::verify_heapbase(const char* msg) {
  3257   assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed");
  3258   assert (Universe::heap() != NULL, "java heap should be initialized");
  3260 #endif
  3263 // Algorithm must match oop.inline.hpp encode_heap_oop.
  3264 void MacroAssembler::encode_heap_oop(Register r) {
  3265 #ifdef ASSERT
  3266   verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
  3267 #endif
  3268   verify_oop(r, "broken oop in encode_heap_oop");
  3269   if (Universe::narrow_oop_base() == NULL) {
  3270     if (Universe::narrow_oop_shift() != 0) {
  3271       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3272       shr(r, LogMinObjAlignmentInBytes);
  3274     return;
  3277   movz(r, S5_heapbase, r);
  3278   dsub(r, r, S5_heapbase);
  3279   if (Universe::narrow_oop_shift() != 0) {
  3280     assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3281     shr(r, LogMinObjAlignmentInBytes);
  3285 void MacroAssembler::encode_heap_oop(Register dst, Register src) {
  3286 #ifdef ASSERT
  3287   verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
  3288 #endif
  3289   verify_oop(src, "broken oop in encode_heap_oop");
  3290   if (Universe::narrow_oop_base() == NULL) {
  3291     if (Universe::narrow_oop_shift() != 0) {
  3292       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3293       dsrl(dst, src, LogMinObjAlignmentInBytes);
  3294     } else {
  3295       if (dst != src) move(dst, src);
  3297   } else {
  3298     if (dst == src) {
  3299       movz(dst, S5_heapbase, dst);
  3300       dsub(dst, dst, S5_heapbase);
  3301       if (Universe::narrow_oop_shift() != 0) {
  3302         assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3303         shr(dst, LogMinObjAlignmentInBytes);
  3305     } else {
  3306       dsub(dst, src, S5_heapbase);
  3307       if (Universe::narrow_oop_shift() != 0) {
  3308         assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3309         shr(dst, LogMinObjAlignmentInBytes);
  3311       movz(dst, R0, src);
  3316 void MacroAssembler::encode_heap_oop_not_null(Register r) {
  3317   assert (UseCompressedOops, "should be compressed");
  3318 #ifdef ASSERT
  3319   if (CheckCompressedOops) {
  3320     Label ok;
  3321     bne(r, R0, ok);
  3322     delayed()->nop();
  3323     stop("null oop passed to encode_heap_oop_not_null");
  3324     bind(ok);
  3326 #endif
  3327   verify_oop(r, "broken oop in encode_heap_oop_not_null");
  3328   if (Universe::narrow_oop_base() != NULL) {
  3329     dsub(r, r, S5_heapbase);
  3331   if (Universe::narrow_oop_shift() != 0) {
  3332     assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3333     shr(r, LogMinObjAlignmentInBytes);
  3338 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
  3339   assert (UseCompressedOops, "should be compressed");
  3340 #ifdef ASSERT
  3341   if (CheckCompressedOops) {
  3342     Label ok;
  3343     bne(src, R0, ok);
  3344     delayed()->nop();
  3345     stop("null oop passed to encode_heap_oop_not_null2");
  3346     bind(ok);
  3348 #endif
  3349   verify_oop(src, "broken oop in encode_heap_oop_not_null2");
  3351   if (Universe::narrow_oop_base() != NULL) {
  3352     dsub(dst, src, S5_heapbase);
  3353     if (Universe::narrow_oop_shift() != 0) {
  3354       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3355       shr(dst, LogMinObjAlignmentInBytes);
  3357   } else {
  3358     if (Universe::narrow_oop_shift() != 0) {
  3359       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3360       dsrl(dst, src, LogMinObjAlignmentInBytes);
  3361     } else {
  3362       if (dst != src) move(dst, src);
  3367 void  MacroAssembler::decode_heap_oop(Register r) {
  3368 #ifdef ASSERT
  3369   verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
  3370 #endif
  3371   if (Universe::narrow_oop_base() == NULL) {
  3372     if (Universe::narrow_oop_shift() != 0) {
  3373       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3374       shl(r, LogMinObjAlignmentInBytes);
  3376   } else {
  3377     move(AT, r);
  3378     if (Universe::narrow_oop_shift() != 0) {
  3379       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3380       shl(r, LogMinObjAlignmentInBytes);
  3382     dadd(r, r, S5_heapbase);
  3383     movz(r, R0, AT);
  3385   verify_oop(r, "broken oop in decode_heap_oop");
  3388 void  MacroAssembler::decode_heap_oop(Register dst, Register src) {
  3389 #ifdef ASSERT
  3390   verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
  3391 #endif
  3392   if (Universe::narrow_oop_base() == NULL) {
  3393     if (Universe::narrow_oop_shift() != 0) {
  3394       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3395       if (dst != src) nop(); // DON'T DELETE THIS GUY.
  3396       dsll(dst, src, LogMinObjAlignmentInBytes);
  3397     } else {
  3398       if (dst != src) move(dst, src);
  3400   } else {
  3401     if (dst == src) {
  3402       move(AT, dst);
  3403       if (Universe::narrow_oop_shift() != 0) {
  3404         assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3405         shl(dst, LogMinObjAlignmentInBytes);
  3407       dadd(dst, dst, S5_heapbase);
  3408       movz(dst, R0, AT);
  3409     } else {
  3410       if (Universe::narrow_oop_shift() != 0) {
  3411         assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3412         dsll(dst, src, LogMinObjAlignmentInBytes);
  3413         daddu(dst, dst, S5_heapbase);
  3414       } else {
  3415         daddu(dst, src, S5_heapbase);
  3417       movz(dst, R0, src);
  3420   verify_oop(dst, "broken oop in decode_heap_oop");
  3423 void  MacroAssembler::decode_heap_oop_not_null(Register r) {
  3424   // Note: it will change flags
  3425   assert (UseCompressedOops, "should only be used for compressed headers");
  3426   assert (Universe::heap() != NULL, "java heap should be initialized");
  3427   // Cannot assert, unverified entry point counts instructions (see .ad file)
  3428   // vtableStubs also counts instructions in pd_code_size_limit.
  3429   // Also do not verify_oop as this is called by verify_oop.
  3430   if (Universe::narrow_oop_shift() != 0) {
  3431     assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3432     shl(r, LogMinObjAlignmentInBytes);
  3433     if (Universe::narrow_oop_base() != NULL) {
  3434       daddu(r, r, S5_heapbase);
  3436   } else {
  3437     assert (Universe::narrow_oop_base() == NULL, "sanity");
  3441 void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
  3442   assert (UseCompressedOops, "should only be used for compressed headers");
  3443   assert (Universe::heap() != NULL, "java heap should be initialized");
  3445   // Cannot assert, unverified entry point counts instructions (see .ad file)
  3446   // vtableStubs also counts instructions in pd_code_size_limit.
  3447   // Also do not verify_oop as this is called by verify_oop.
  3448   //lea(dst, Address(S5_heapbase, src, Address::times_8, 0));
  3449   if (Universe::narrow_oop_shift() != 0) {
  3450     assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3451     if (LogMinObjAlignmentInBytes == Address::times_8) {
  3452       dsll(dst, src, LogMinObjAlignmentInBytes);
  3453       daddu(dst, dst, S5_heapbase);
  3454     } else {
  3455       dsll(dst, src, LogMinObjAlignmentInBytes);
  3456       if (Universe::narrow_oop_base() != NULL) {
  3457         daddu(dst, dst, S5_heapbase);
  3460   } else {
  3461     assert (Universe::narrow_oop_base() == NULL, "sanity");
  3462     if (dst != src) {
  3463       move(dst, src);
  3468 void MacroAssembler::encode_klass_not_null(Register r) {
  3469   if (Universe::narrow_klass_base() != NULL) {
  3470     assert(r != AT, "Encoding a klass in AT");
  3471     set64(AT, (int64_t)Universe::narrow_klass_base());
  3472     dsub(r, r, AT);
  3474   if (Universe::narrow_klass_shift() != 0) {
  3475     assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  3476     shr(r, LogKlassAlignmentInBytes);
  3480 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
  3481   if (dst == src) {
  3482     encode_klass_not_null(src);
  3483   } else {
  3484     if (Universe::narrow_klass_base() != NULL) {
  3485       set64(dst, (int64_t)Universe::narrow_klass_base());
  3486       dsub(dst, src, dst);
  3487       if (Universe::narrow_klass_shift() != 0) {
  3488         assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  3489         shr(dst, LogKlassAlignmentInBytes);
  3491     } else {
  3492       if (Universe::narrow_klass_shift() != 0) {
  3493         assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  3494         dsrl(dst, src, LogKlassAlignmentInBytes);
  3495       } else {
  3496         move(dst, src);
  3502 // Function instr_size_for_decode_klass_not_null() counts the instructions
  3503 // generated by decode_klass_not_null(register r) and reinit_heapbase(),
  3504 // when (Universe::heap() != NULL).  Hence, if the instructions they
  3505 // generate change, then this method needs to be updated.
  3506 int MacroAssembler::instr_size_for_decode_klass_not_null() {
  3507   assert (UseCompressedClassPointers, "only for compressed klass ptrs");
  3508   if (Universe::narrow_klass_base() != NULL) {
  3509     // mov64 + addq + shlq? + mov64  (for reinit_heapbase()).
  3510     return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10);
  3511   } else {
  3512     // longest load decode klass function, mov64, leaq
  3513     return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1);
  3517 void  MacroAssembler::decode_klass_not_null(Register r) {
  3518   assert (UseCompressedClassPointers, "should only be used for compressed headers");
  3519   assert(r != AT, "Decoding a klass in AT");
  3520   // Cannot assert, unverified entry point counts instructions (see .ad file)
  3521   // vtableStubs also counts instructions in pd_code_size_limit.
  3522   // Also do not verify_oop as this is called by verify_oop.
  3523   if (Universe::narrow_klass_shift() != 0) {
  3524     assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  3525     shl(r, LogKlassAlignmentInBytes);
  3527   if (Universe::narrow_klass_base() != NULL) {
  3528     set64(AT, (int64_t)Universe::narrow_klass_base());
  3529     daddu(r, r, AT);
  3530     //Not neccessary for MIPS at all.
  3531     //reinit_heapbase();
  3535 void  MacroAssembler::decode_klass_not_null(Register dst, Register src) {
  3536   assert (UseCompressedClassPointers, "should only be used for compressed headers");
  3538   if (dst == src) {
  3539     decode_klass_not_null(dst);
  3540   } else {
  3541     // Cannot assert, unverified entry point counts instructions (see .ad file)
  3542     // vtableStubs also counts instructions in pd_code_size_limit.
  3543     // Also do not verify_oop as this is called by verify_oop.
  3544     set64(dst, (int64_t)Universe::narrow_klass_base());
  3545     if (Universe::narrow_klass_shift() != 0) {
  3546       assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  3547       assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
  3548       dsll(AT, src, Address::times_8);
  3549       daddu(dst, dst, AT);
  3550     } else {
  3551       daddu(dst, src, dst);
  3556 void MacroAssembler::incrementl(Register reg, int value) {
  3557   if (value == min_jint) {
  3558      move(AT, value);
  3559      LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
  3560      return;
  3562   if (value <  0) { decrementl(reg, -value); return; }
  3563   if (value == 0) {                        ; return; }
  3565   if(Assembler::is_simm16(value)) {
  3566      NOT_LP64(addiu(reg, reg, value));
  3567      LP64_ONLY(move(AT, value); addu32(reg, reg, AT));
  3568   } else {
  3569      move(AT, value);
  3570      LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
  3574 void MacroAssembler::decrementl(Register reg, int value) {
  3575   if (value == min_jint) {
  3576      move(AT, value);
  3577      LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
  3578      return;
  3580   if (value <  0) { incrementl(reg, -value); return; }
  3581   if (value == 0) {                        ; return; }
  3583   if (Assembler::is_simm16(value)) {
  3584      NOT_LP64(addiu(reg, reg, -value));
  3585      LP64_ONLY(move(AT, value); subu32(reg, reg, AT));
  3586   } else {
  3587      move(AT, value);
  3588      LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
  3592 void MacroAssembler::reinit_heapbase() {
  3593   if (UseCompressedOops || UseCompressedClassPointers) {
  3594     if (Universe::heap() != NULL) {
  3595       if (Universe::narrow_oop_base() == NULL) {
  3596         move(S5_heapbase, R0);
  3597       } else {
  3598         set64(S5_heapbase, (int64_t)Universe::narrow_ptrs_base());
  3600     } else {
  3601       set64(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr());
  3602       ld(S5_heapbase, S5_heapbase, 0);
  3606 #endif // _LP64
  3608 void MacroAssembler::check_klass_subtype(Register sub_klass,
  3609                            Register super_klass,
  3610                            Register temp_reg,
  3611                            Label& L_success) {
  3612 //implement ind   gen_subtype_check
  3613   Label L_failure;
  3614   check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_failure, NULL);
  3615   check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
  3616   bind(L_failure);
  3619 SkipIfEqual::SkipIfEqual(
  3620     MacroAssembler* masm, const bool* flag_addr, bool value) {
  3621   _masm = masm;
  3622   _masm->li(AT, (address)flag_addr);
  3623   _masm->lb(AT,AT,0);
  3624   _masm->addi(AT,AT,-value);
  3625   _masm->beq(AT,R0,_label);
  3626   _masm->delayed()->nop();
  3628 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
  3629                                                    Register super_klass,
  3630                                                    Register temp_reg,
  3631                                                    Label* L_success,
  3632                                                    Label* L_failure,
  3633                                                    Label* L_slow_path,
  3634                                         RegisterOrConstant super_check_offset) {
  3635   assert_different_registers(sub_klass, super_klass, temp_reg);
  3636   bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
  3637   if (super_check_offset.is_register()) {
  3638     assert_different_registers(sub_klass, super_klass,
  3639                                super_check_offset.as_register());
  3640   } else if (must_load_sco) {
  3641     assert(temp_reg != noreg, "supply either a temp or a register offset");
  3644   Label L_fallthrough;
  3645   int label_nulls = 0;
  3646   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
  3647   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
  3648   if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
  3649   assert(label_nulls <= 1, "at most one NULL in the batch");
  3651   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
  3652   int sco_offset = in_bytes(Klass::super_check_offset_offset());
  3653   // If the pointers are equal, we are done (e.g., String[] elements).
  3654   // This self-check enables sharing of secondary supertype arrays among
  3655   // non-primary types such as array-of-interface.  Otherwise, each such
  3656   // type would need its own customized SSA.
  3657   // We move this check to the front of the fast path because many
  3658   // type checks are in fact trivially successful in this manner,
  3659   // so we get a nicely predicted branch right at the start of the check.
  3660   beq(sub_klass, super_klass, *L_success);
  3661   delayed()->nop();
  3662   // Check the supertype display:
  3663   if (must_load_sco) {
  3664     // Positive movl does right thing on LP64.
  3665     lwu(temp_reg, super_klass, sco_offset);
  3666     super_check_offset = RegisterOrConstant(temp_reg);
  3668   dsll(AT, super_check_offset.register_or_noreg(), Address::times_1);
  3669   daddu(AT, sub_klass, AT);
  3670   ld(AT, AT, super_check_offset.constant_or_zero()*Address::times_1);
  3672   // This check has worked decisively for primary supers.
  3673   // Secondary supers are sought in the super_cache ('super_cache_addr').
  3674   // (Secondary supers are interfaces and very deeply nested subtypes.)
  3675   // This works in the same check above because of a tricky aliasing
  3676   // between the super_cache and the primary super display elements.
  3677   // (The 'super_check_addr' can address either, as the case requires.)
  3678   // Note that the cache is updated below if it does not help us find
  3679   // what we need immediately.
  3680   // So if it was a primary super, we can just fail immediately.
  3681   // Otherwise, it's the slow path for us (no success at this point).
  3683   if (super_check_offset.is_register()) {
  3684     beq(super_klass, AT, *L_success);
  3685     delayed()->nop();
  3686     addi(AT, super_check_offset.as_register(), -sc_offset);
  3687     if (L_failure == &L_fallthrough) {
  3688       beq(AT, R0, *L_slow_path);
  3689       delayed()->nop();
  3690     } else {
  3691       bne(AT, R0, *L_failure);
  3692       delayed()->nop();
  3693       b(*L_slow_path);
  3694       delayed()->nop();
  3696   } else if (super_check_offset.as_constant() == sc_offset) {
  3697     // Need a slow path; fast failure is impossible.
  3698     if (L_slow_path == &L_fallthrough) {
  3699       beq(super_klass, AT, *L_success);
  3700       delayed()->nop();
  3701     } else {
  3702       bne(super_klass, AT, *L_slow_path);
  3703       delayed()->nop();
  3704       b(*L_success);
  3705       delayed()->nop();
  3707   } else {
  3708     // No slow path; it's a fast decision.
  3709     if (L_failure == &L_fallthrough) {
  3710       beq(super_klass, AT, *L_success);
  3711       delayed()->nop();
  3712     } else {
  3713       bne(super_klass, AT, *L_failure);
  3714       delayed()->nop();
  3715       b(*L_success);
  3716       delayed()->nop();
  3720   bind(L_fallthrough);
  3725 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
  3726                                                    Register super_klass,
  3727                                                    Register temp_reg,
  3728                                                    Register temp2_reg,
  3729                                                    Label* L_success,
  3730                                                    Label* L_failure,
  3731                                                    bool set_cond_codes) {
  3732   assert_different_registers(sub_klass, super_klass, temp_reg);
  3733   if (temp2_reg != noreg)
  3734     assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
  3735   else
  3736     temp2_reg = T9;
  3737 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
  3739   Label L_fallthrough;
  3740   int label_nulls = 0;
  3741   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
  3742   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
  3743   assert(label_nulls <= 1, "at most one NULL in the batch");
  3745   // a couple of useful fields in sub_klass:
  3746   int ss_offset = in_bytes(Klass::secondary_supers_offset());
  3747   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
  3748   Address secondary_supers_addr(sub_klass, ss_offset);
  3749   Address super_cache_addr(     sub_klass, sc_offset);
  3751   // Do a linear scan of the secondary super-klass chain.
  3752   // This code is rarely used, so simplicity is a virtue here.
  3753   // The repne_scan instruction uses fixed registers, which we must spill.
  3754   // Don't worry too much about pre-existing connections with the input regs.
  3756   // Get super_klass value into rax (even if it was in rdi or rcx).
  3757 #ifndef PRODUCT
  3758   int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
  3759   ExternalAddress pst_counter_addr((address) pst_counter);
  3760   NOT_LP64(  incrementl(pst_counter_addr) );
  3761 #endif //PRODUCT
  3763   // We will consult the secondary-super array.
  3764   ld(temp_reg, secondary_supers_addr);
  3765   // Load the array length.  (Positive movl does right thing on LP64.)
  3766   lw(temp2_reg, Address(temp_reg, Array<Klass*>::length_offset_in_bytes()));
  3767   // Skip to start of data.
  3768   daddiu(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes());
  3770   // Scan RCX words at [RDI] for an occurrence of RAX.
  3771   // Set NZ/Z based on last compare.
  3772   // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does
  3773   // not change flags (only scas instruction which is repeated sets flags).
  3774   // Set Z = 0 (not equal) before 'repne' to indicate that class was not found.
  3776   /* 2013/4/3 Jin: OpenJDK8 never compresses klass pointers in secondary-super array. */
  3777   Label Loop, subtype;
  3778   bind(Loop);
  3779   beq(temp2_reg, R0, *L_failure);
  3780   delayed()->nop();
  3781   ld(AT, temp_reg, 0);
  3782   beq(AT, super_klass, subtype);
  3783   delayed()->daddi(temp_reg, temp_reg, 1 * wordSize);
  3784   b(Loop);
  3785   delayed()->daddi(temp2_reg, temp2_reg, -1);
  3787   bind(subtype);
  3788   sd(super_klass, super_cache_addr);
  3789   if (L_success != &L_fallthrough) {
  3790     b(*L_success);
  3791     delayed()->nop();
  3794   // Success.  Cache the super we found and proceed in triumph.
  3795 #undef IS_A_TEMP
  3797   bind(L_fallthrough);
  3800 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
  3801   ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
  3802   sd(R0, Address(java_thread, JavaThread::vm_result_offset()));
  3803   verify_oop(oop_result, "broken oop in call_VM_base");
  3806 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
  3807   ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
  3808   sd(R0, Address(java_thread, JavaThread::vm_result_2_offset()));
  3811 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
  3812                                          int extra_slot_offset) {
  3813   // cf. TemplateTable::prepare_invoke(), if (load_receiver).
  3814   int stackElementSize = Interpreter::stackElementSize;
  3815   int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
  3816 #ifdef ASSERT
  3817   int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
  3818   assert(offset1 - offset == stackElementSize, "correct arithmetic");
  3819 #endif
  3820   Register             scale_reg    = NOREG;
  3821   Address::ScaleFactor scale_factor = Address::no_scale;
  3822   if (arg_slot.is_constant()) {
  3823     offset += arg_slot.as_constant() * stackElementSize;
  3824   } else {
  3825     scale_reg    = arg_slot.as_register();
  3826     scale_factor = Address::times_8;
  3828   // 2014/07/31 Fu: We don't push RA on stack in prepare_invoke.
  3829   //  offset += wordSize;           // return PC is on stack
  3830   if(scale_reg==NOREG) return Address(SP, offset);
  3831   else {
  3832   dsll(scale_reg, scale_reg, scale_factor);
  3833   daddu(scale_reg, SP, scale_reg);
  3834   return Address(scale_reg, offset);
  3838 SkipIfEqual::~SkipIfEqual() {
  3839   _masm->bind(_label);
  3842 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
  3843   switch (size_in_bytes) {
  3844 #ifndef _LP64
  3845   case  8:
  3846     assert(dst2 != noreg, "second dest register required");
  3847     lw(dst,  src);
  3848     lw(dst2, src.plus_disp(BytesPerInt));
  3849     break;
  3850 #else
  3851   case  8:  ld(dst, src); break;
  3852 #endif
  3853   case  4:  lw(dst, src); break;
  3854   case  2:  is_signed ? lh(dst, src) : lhu(dst, src); break;
  3855   case  1:  is_signed ? lb( dst, src) : lbu( dst, src); break;
  3856   default:  ShouldNotReachHere();
  3860 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
  3861   switch (size_in_bytes) {
  3862 #ifndef _LP64
  3863   case  8:
  3864     assert(src2 != noreg, "second source register required");
  3865     sw(src, dst);
  3866     sw(src2, dst.plus_disp(BytesPerInt));
  3867     break;
  3868 #else
  3869   case  8:  sd(src, dst); break;
  3870 #endif
  3871   case  4:  sw(src, dst); break;
  3872   case  2:  sh(src, dst); break;
  3873   case  1:  sb(src, dst); break;
  3874   default:  ShouldNotReachHere();
  3878 // Look up the method for a megamorphic invokeinterface call.
  3879 // The target method is determined by <intf_klass, itable_index>.
  3880 // The receiver klass is in recv_klass.
  3881 // On success, the result will be in method_result, and execution falls through.
  3882 // On failure, execution transfers to the given label.
  3883 void MacroAssembler::lookup_interface_method(Register recv_klass,
  3884                                              Register intf_klass,
  3885                                              RegisterOrConstant itable_index,
  3886                                              Register method_result,
  3887                                              Register scan_temp,
  3888                                              Label& L_no_such_interface) {
  3889   assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
  3890   assert(itable_index.is_constant() || itable_index.as_register() == method_result,
  3891          "caller must use same register for non-constant itable index as for method");
  3893   // Compute start of first itableOffsetEntry (which is at the end of the vtable)
  3894   int vtable_base = InstanceKlass::vtable_start_offset() * wordSize;
  3895   int itentry_off = itableMethodEntry::method_offset_in_bytes();
  3896   int scan_step   = itableOffsetEntry::size() * wordSize;
  3897   int vte_size    = vtableEntry::size() * wordSize;
  3898   Address::ScaleFactor times_vte_scale = Address::times_ptr;
  3899   assert(vte_size == wordSize, "else adjust times_vte_scale");
  3901   lw(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize));
  3903   // %%% Could store the aligned, prescaled offset in the klassoop.
  3904   dsll(scan_temp, scan_temp, times_vte_scale);
  3905   daddu(scan_temp, recv_klass, scan_temp);
  3906   daddiu(scan_temp, scan_temp, vtable_base);
  3907   if (HeapWordsPerLong > 1) {
  3908     // Round up to align_object_offset boundary
  3909     // see code for InstanceKlass::start_of_itable!
  3910     round_to(scan_temp, BytesPerLong);
  3913   // Adjust recv_klass by scaled itable_index, so we can free itable_index.
  3914   assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
  3915   if (itable_index.is_constant()) {
  3916     set64(AT, (int)itable_index.is_constant());
  3917     dsll(AT, AT, (int)Address::times_ptr);
  3918   } else {
  3919     dsll(AT, itable_index.as_register(), (int)Address::times_ptr);
  3921   daddu(AT, AT, recv_klass);
  3922   daddiu(recv_klass, AT, itentry_off);
  3924   Label search, found_method;
  3926   for (int peel = 1; peel >= 0; peel--) {
  3927     ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
  3929     if (peel) {
  3930       beq(intf_klass, method_result, found_method);
  3931       nop();
  3932     } else {
  3933       bne(intf_klass, method_result, search);
  3934       nop();
  3935       // (invert the test to fall through to found_method...)
  3938     if (!peel)  break;
  3940     bind(search);
  3942     // Check that the previous entry is non-null.  A null entry means that
  3943     // the receiver class doesn't implement the interface, and wasn't the
  3944     // same as when the caller was compiled.
  3945     beq(method_result, R0, L_no_such_interface);
  3946     nop();
  3947     daddiu(scan_temp, scan_temp, scan_step);
  3950   bind(found_method);
  3952   // Got a hit.
  3953   lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
  3954   if(UseLoongsonISA) {
  3955     gsldx(method_result, recv_klass, scan_temp, 0);
  3956   } else {
  3957     daddu(AT, recv_klass, scan_temp);
  3958     ld(method_result, AT);
  3962 // virtual method calling
  3963 void MacroAssembler::lookup_virtual_method(Register recv_klass,
  3964                                            RegisterOrConstant vtable_index,
  3965                                            Register method_result) {
  3966   Register tmp = GP;
  3967   push(tmp);
  3969   if (vtable_index.is_constant()) {
  3970     assert_different_registers(recv_klass, method_result, tmp);
  3971   } else {
  3972     assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp);
  3974   const int base = InstanceKlass::vtable_start_offset() * wordSize;
  3975   assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
  3976 /*
  3977   Address vtable_entry_addr(recv_klass,
  3978                             vtable_index, Address::times_ptr,
  3979                             base + vtableEntry::method_offset_in_bytes());
  3980 */
  3981   if (vtable_index.is_constant()) {
  3982     set64(AT, vtable_index.as_constant());
  3983     dsll(AT, AT, (int)Address::times_ptr);
  3984   } else {
  3985     dsll(AT, vtable_index.as_register(), (int)Address::times_ptr);
  3987   set64(tmp, base + vtableEntry::method_offset_in_bytes());
  3988   daddu(tmp, tmp, AT);
  3989   daddu(tmp, tmp, recv_klass);
  3990   ld(method_result, tmp, 0);
  3992   pop(tmp);

mercurial