src/cpu/mips/vm/macroAssembler_mips.cpp

Tue, 12 Jun 2018 13:58:17 +0800

author
zhaixiang
date
Tue, 12 Jun 2018 13:58:17 +0800
changeset 9144
cecfc245b19a
parent 9136
7ed87d5663da
child 9149
f977b3d18c61
permissions
-rw-r--r--

#7157 Fix all forgot saying delayed() when filling delay slot issues
Summary: enable check_delay and guarantee delay_state is at_delay_slot when filling delay slot
Reviewed-by: aoqi

     1 /*
     2  * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
     3  * Copyright (c) 2017, 2018, Loongson Technology. All rights reserved.
     4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     5  *
     6  * This code is free software; you can redistribute it and/or modify it
     7  * under the terms of the GNU General Public License version 2 only, as
     8  * published by the Free Software Foundation.
     9  *
    10  * This code is distributed in the hope that it will be useful, but WITHOUT
    11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    13  * version 2 for more details (a copy is included in the LICENSE file that
    14  * accompanied this code).
    15  *
    16  * You should have received a copy of the GNU General Public License version
    17  * 2 along with this work; if not, write to the Free Software Foundation,
    18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    19  *
    20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    21  * or visit www.oracle.com if you need additional information or have any
    22  * questions.
    23  *
    24  */
    26 #include "precompiled.hpp"
    27 #include "asm/assembler.hpp"
    28 #include "asm/assembler.inline.hpp"
    29 #include "asm/macroAssembler.inline.hpp"
    30 #include "compiler/disassembler.hpp"
    31 #include "gc_interface/collectedHeap.inline.hpp"
    32 #include "interpreter/interpreter.hpp"
    33 #include "memory/cardTableModRefBS.hpp"
    34 #include "memory/resourceArea.hpp"
    35 #include "memory/universe.hpp"
    36 #include "prims/methodHandles.hpp"
    37 #include "runtime/biasedLocking.hpp"
    38 #include "runtime/interfaceSupport.hpp"
    39 #include "runtime/objectMonitor.hpp"
    40 #include "runtime/os.hpp"
    41 #include "runtime/sharedRuntime.hpp"
    42 #include "runtime/stubRoutines.hpp"
    43 #include "utilities/macros.hpp"
    44 #if INCLUDE_ALL_GCS
    45 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
    46 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
    47 #include "gc_implementation/g1/heapRegion.hpp"
    48 #endif // INCLUDE_ALL_GCS
    50 // Implementation of MacroAssembler
    52 intptr_t MacroAssembler::i[32] = {0};
    53 float MacroAssembler::f[32] = {0.0};
    55 void MacroAssembler::print(outputStream *s) {
    56   unsigned int k;
    57   for(k=0; k<sizeof(i)/sizeof(i[0]); k++) {
    58     s->print_cr("i%d = 0x%.16lx", k, i[k]);
    59   }
    60   s->cr();
    62   for(k=0; k<sizeof(f)/sizeof(f[0]); k++) {
    63     s->print_cr("f%d = %f", k, f[k]);
    64   }
    65   s->cr();
    66 }
    68 int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; }
    69 int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; }
    71 void MacroAssembler::save_registers(MacroAssembler *masm) {
    72 #define __ masm->
    73   for(int k=0; k<32; k++) {
    74     __ sw (as_Register(k), A0, i_offset(k));
    75   }
    77   for(int k=0; k<32; k++) {
    78     __ swc1 (as_FloatRegister(k), A0, f_offset(k));
    79   }
    80 #undef __
    81 }
    83 void MacroAssembler::restore_registers(MacroAssembler *masm) {
    84 #define __ masm->
    85   for(int k=0; k<32; k++) {
    86     __ lw (as_Register(k), A0, i_offset(k));
    87   }
    89   for(int k=0; k<32; k++) {
    90     __ lwc1 (as_FloatRegister(k), A0, f_offset(k));
    91   }
    92 #undef __
    93 }
    96 void MacroAssembler::pd_patch_instruction(address branch, address target) {
    97   jint& stub_inst = *(jint*) branch;
    98   jint *pc = (jint *)branch;
   100   if((opcode(stub_inst) == special_op) && (special(stub_inst) == dadd_op)) {
   101     //b_far:
   102     //  move(AT, RA); // dadd
   103     //  emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
   104     //  nop();
   105     //  lui(T9, 0); // to be patched
   106     //  ori(T9, 0);
   107     //  daddu(T9, T9, RA);
   108     //  move(RA, AT);
   109     //  jr(T9);
   111     assert(opcode(pc[3]) == lui_op
   112         && opcode(pc[4]) == ori_op
   113         && special(pc[5]) == daddu_op, "Not a branch label patch");
   114     if(!(opcode(pc[3]) == lui_op
   115           && opcode(pc[4]) == ori_op
   116           && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); }
   118     int offset = target - branch;
   119     if (!is_simm16(offset)) {
   120       pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12);
   121       pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12);
   122     } else {
   123       /* revert to "beq + nop" */
   124       CodeBuffer cb(branch, 4 * 10);
   125       MacroAssembler masm(&cb);
   126 #define __ masm.
   127       __ b(target);
   128       __ delayed()->nop();
   129       __ nop();
   130       __ nop();
   131       __ nop();
   132       __ nop();
   133       __ nop();
   134       __ nop();
   135     }
   136     return;
   137   } else if (special(pc[4]) == jr_op
   138              && opcode(pc[4]) == special_op
   139              && (((opcode(pc[0]) == lui_op) || opcode(pc[0]) == daddiu_op) || (opcode(pc[0]) == ori_op))) {
   140     //jmp_far:
   141     //  patchable_set48(T9, target);
   142     //  jr(T9);
   143     //  nop();
   145     CodeBuffer cb(branch, 4 * 4);
   146     MacroAssembler masm(&cb);
   147     masm.patchable_set48(T9, (long)(target));
   148     return;
   149   }
   151 #ifndef PRODUCT
   152   if (!is_simm16((target - branch - 4) >> 2)) {
   153     tty->print_cr("Illegal patching: target=0x%lx", target);
   154     int *p = (int *)branch;
   155     for (int i = -10; i < 10; i++) {
   156        tty->print("0x%lx, ", p[i]);
   157     }
   158     tty->print_cr("");
   159   }
   160 #endif
   162   stub_inst = patched_branch(target - branch, stub_inst, 0);
   163 }
   165 static inline address first_cache_address() {
   166   return CodeCache::low_bound() + sizeof(HeapBlock::Header);
   167 }
   169 static inline address last_cache_address() {
   170   return CodeCache::high_bound() - Assembler::InstructionSize;
   171 }
   173 int MacroAssembler::call_size(address target, bool far, bool patchable) {
   174   if (patchable) return 6 << Assembler::LogInstructionSize;
   175   if (!far) return 2 << Assembler::LogInstructionSize; // jal + nop
   176   return (insts_for_set64((jlong)target) + 2) << Assembler::LogInstructionSize;
   177 }
   179 // Can we reach target using jal/j from anywhere
   180 // in the code cache (because code can be relocated)?
   181 bool MacroAssembler::reachable_from_cache(address target) {
   182   address cl = first_cache_address();
   183   address ch = last_cache_address();
   185   return fit_in_jal(target, cl) && fit_in_jal(target, ch);
   186 }
   188 void MacroAssembler::general_jump(address target) {
   189   if (reachable_from_cache(target)) {
   190     j(target);
   191     delayed()->nop();
   192   } else {
   193     set64(T9, (long)target);
   194     jr(T9);
   195     delayed()->nop();
   196   }
   197 }
   199 int MacroAssembler::insts_for_general_jump(address target) {
   200   if (reachable_from_cache(target)) {
   201     //j(target);
   202     //nop();
   203     return 2;
   204   } else {
   205     //set64(T9, (long)target);
   206     //jr(T9);
   207     //nop();
   208     return insts_for_set64((jlong)target) + 2;
   209   }
   210 }
   212 void MacroAssembler::patchable_jump(address target) {
   213   if (reachable_from_cache(target)) {
   214     nop();
   215     nop();
   216     nop();
   217     nop();
   218     j(target);
   219     delayed()->nop();
   220   } else {
   221     patchable_set48(T9, (long)target);
   222     jr(T9);
   223     delayed()->nop();
   224   }
   225 }
   227 int MacroAssembler::insts_for_patchable_jump(address target) {
   228   return 6;
   229 }
   231 void MacroAssembler::general_call(address target) {
   232   if (reachable_from_cache(target)) {
   233     jal(target);
   234     delayed()->nop();
   235   } else {
   236     set64(T9, (long)target);
   237     jalr(T9);
   238     delayed()->nop();
   239   }
   240 }
   242 int MacroAssembler::insts_for_general_call(address target) {
   243   if (reachable_from_cache(target)) {
   244     //jal(target);
   245     //nop();
   246     return 2;
   247   } else {
   248     //set64(T9, (long)target);
   249     //jalr(T9);
   250     //nop();
   251     return insts_for_set64((jlong)target) + 2;
   252   }
   253 }
   255 void MacroAssembler::patchable_call(address target) {
   256   if (reachable_from_cache(target)) {
   257     nop();
   258     nop();
   259     nop();
   260     nop();
   261     jal(target);
   262     delayed()->nop();
   263   } else {
   264     patchable_set48(T9, (long)target);
   265     jalr(T9);
   266     delayed()->nop();
   267   }
   268 }
   270 int MacroAssembler::insts_for_patchable_call(address target) {
   271   return 6;
   272 }
   274 void MacroAssembler::beq_far(Register rs, Register rt, address entry) {
   275   u_char * cur_pc = pc();
   277   /* Jin: Near/Far jump */
   278   if(is_simm16((entry - pc() - 4) / 4)) {
   279     Assembler::beq(rs, rt, offset(entry));
   280   } else {
   281     Label not_jump;
   282     bne(rs, rt, not_jump);
   283     delayed()->nop();
   285     b_far(entry);
   286     delayed()->nop();
   288     bind(not_jump);
   289     has_delay_slot();
   290   }
   291 }
   293 void MacroAssembler::beq_far(Register rs, Register rt, Label& L) {
   294   if (L.is_bound()) {
   295     beq_far(rs, rt, target(L));
   296   } else {
   297     u_char * cur_pc = pc();
   298     Label not_jump;
   299     bne(rs, rt, not_jump);
   300     delayed()->nop();
   302     b_far(L);
   303     delayed()->nop();
   305     bind(not_jump);
   306     has_delay_slot();
   307   }
   308 }
   310 void MacroAssembler::bne_far(Register rs, Register rt, address entry) {
   311   u_char * cur_pc = pc();
   313   /* Jin: Near/Far jump */
   314   if(is_simm16((entry - pc() - 4) / 4)) {
   315     Assembler::bne(rs, rt, offset(entry));
   316   } else {
   317     Label not_jump;
   318     beq(rs, rt, not_jump);
   319     delayed()->nop();
   321     b_far(entry);
   322     delayed()->nop();
   324     bind(not_jump);
   325     has_delay_slot();
   326   }
   327 }
   329 void MacroAssembler::bne_far(Register rs, Register rt, Label& L) {
   330   if (L.is_bound()) {
   331     bne_far(rs, rt, target(L));
   332   } else {
   333     u_char * cur_pc = pc();
   334     Label not_jump;
   335     beq(rs, rt, not_jump);
   336     delayed()->nop();
   338     b_far(L);
   339     delayed()->nop();
   341     bind(not_jump);
   342     has_delay_slot();
   343   }
   344 }
   346 void MacroAssembler::beq_long(Register rs, Register rt, Label& L) {
   347   Label not_taken;
   349   bne(rs, rt, not_taken);
   350   delayed()->nop();
   352   jmp_far(L);
   354   bind(not_taken);
   355 }
   357 void MacroAssembler::bne_long(Register rs, Register rt, Label& L) {
   358   Label not_taken;
   360   beq(rs, rt, not_taken);
   361   delayed()->nop();
   363   jmp_far(L);
   365   bind(not_taken);
   366 }
   368 void MacroAssembler::bc1t_long(Label& L) {
   369   Label not_taken;
   371   bc1f(not_taken);
   372   delayed()->nop();
   374   jmp_far(L);
   376   bind(not_taken);
   377 }
   379 void MacroAssembler::bc1f_long(Label& L) {
   380   Label not_taken;
   382   bc1t(not_taken);
   383   delayed()->nop();
   385   jmp_far(L);
   387   bind(not_taken);
   388 }
   390 void MacroAssembler::b_far(Label& L) {
   391   if (L.is_bound()) {
   392     b_far(target(L));
   393   } else {
   394     volatile address dest = target(L);
   395 /*
   396 MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8
   397    0x00000055651ed514: dadd at, ra, zero
   398    0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520
   400    0x00000055651ed51c: sll zero, zero, 0
   401    0x00000055651ed520: lui t9, 0x0
   402    0x00000055651ed524: ori t9, t9, 0x21b8
   403    0x00000055651ed528: daddu t9, t9, ra
   404    0x00000055651ed52c: dadd ra, at, zero
   405    0x00000055651ed530: jr t9
   406    0x00000055651ed534: sll zero, zero, 0
   407 */
   408     move(AT, RA);
   409     emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
   410     nop();
   411     lui(T9, 0); // to be patched
   412     ori(T9, T9, 0);
   413     daddu(T9, T9, RA);
   414     move(RA, AT);
   415     jr(T9);
   416   }
   417 }
   419 void MacroAssembler::b_far(address entry) {
   420   u_char * cur_pc = pc();
   422   /* Jin: Near/Far jump */
   423   if(is_simm16((entry - pc() - 4) / 4)) {
   424     b(offset(entry));
   425   } else {
   426     /* address must be bounded */
   427     move(AT, RA);
   428     emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
   429     nop();
   430     li32(T9, entry - pc());
   431     daddu(T9, T9, RA);
   432     move(RA, AT);
   433     jr(T9);
   434   }
   435 }
   437 void MacroAssembler::ld_ptr(Register rt, Register offset, Register base) {
   438   addu_long(AT, base, offset);
   439   ld_ptr(rt, 0, AT);
   440 }
   442 void MacroAssembler::st_ptr(Register rt, Register offset, Register base) {
   443   addu_long(AT, base, offset);
   444   st_ptr(rt, 0, AT);
   445 }
   447 void MacroAssembler::ld_long(Register rt, Register offset, Register base) {
   448   addu_long(AT, base, offset);
   449   ld_long(rt, 0, AT);
   450 }
   452 void MacroAssembler::st_long(Register rt, Register offset, Register base) {
   453   addu_long(AT, base, offset);
   454   st_long(rt, 0, AT);
   455 }
   457 Address MacroAssembler::as_Address(AddressLiteral adr) {
   458   return Address(adr.target(), adr.rspec());
   459 }
   461 Address MacroAssembler::as_Address(ArrayAddress adr) {
   462   return Address::make_array(adr);
   463 }
   465 // tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved).
   466 void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) {
   467   Label again;
   469   li(tmp_reg1, counter_addr);
   470   bind(again);
   471   if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
   472   ll(tmp_reg2, tmp_reg1, 0);
   473   addi(tmp_reg2, tmp_reg2, inc);
   474   sc(tmp_reg2, tmp_reg1, 0);
   475   beq(tmp_reg2, R0, again);
   476   delayed()->nop();
   477 }
   479 int MacroAssembler::biased_locking_enter(Register lock_reg,
   480                                          Register obj_reg,
   481                                          Register swap_reg,
   482                                          Register tmp_reg,
   483                                          bool swap_reg_contains_mark,
   484                                          Label& done,
   485                                          Label* slow_case,
   486                                          BiasedLockingCounters* counters) {
   487   assert(UseBiasedLocking, "why call this otherwise?");
   488   bool need_tmp_reg = false;
   489   if (tmp_reg == noreg) {
   490     need_tmp_reg = true;
   491     tmp_reg = T9;
   492   }
   493   assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT);
   494   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
   495   Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
   496   Address saved_mark_addr(lock_reg, 0);
   498   // Biased locking
   499   // See whether the lock is currently biased toward our thread and
   500   // whether the epoch is still valid
   501   // Note that the runtime guarantees sufficient alignment of JavaThread
   502   // pointers to allow age to be placed into low bits
   503   // First check to see whether biasing is even enabled for this object
   504   Label cas_label;
   505   int null_check_offset = -1;
   506   if (!swap_reg_contains_mark) {
   507     null_check_offset = offset();
   508     ld_ptr(swap_reg, mark_addr);
   509   }
   511   if (need_tmp_reg) {
   512     push(tmp_reg);
   513   }
   514   move(tmp_reg, swap_reg);
   515   andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
   516 #ifdef _LP64
   517   daddi(AT, R0, markOopDesc::biased_lock_pattern);
   518   dsub(AT, AT, tmp_reg);
   519 #else
   520   addi(AT, R0, markOopDesc::biased_lock_pattern);
   521   sub(AT, AT, tmp_reg);
   522 #endif
   523   if (need_tmp_reg) {
   524     pop(tmp_reg);
   525   }
   527   bne(AT, R0, cas_label);
   528   delayed()->nop();
   531   // The bias pattern is present in the object's header. Need to check
   532   // whether the bias owner and the epoch are both still current.
   533   // Note that because there is no current thread register on MIPS we
   534   // need to store off the mark word we read out of the object to
   535   // avoid reloading it and needing to recheck invariants below. This
   536   // store is unfortunate but it makes the overall code shorter and
   537   // simpler.
   538   st_ptr(swap_reg, saved_mark_addr);
   539   if (need_tmp_reg) {
   540     push(tmp_reg);
   541   }
   542   if (swap_reg_contains_mark) {
   543     null_check_offset = offset();
   544   }
   545   load_prototype_header(tmp_reg, obj_reg);
   546   xorr(tmp_reg, tmp_reg, swap_reg);
   547   get_thread(swap_reg);
   548   xorr(swap_reg, swap_reg, tmp_reg);
   550   move(AT, ~((int) markOopDesc::age_mask_in_place));
   551   andr(swap_reg, swap_reg, AT);
   553   if (PrintBiasedLockingStatistics) {
   554     Label L;
   555     bne(swap_reg, R0, L);
   556     delayed()->nop();
   557     push(tmp_reg);
   558     push(A0);
   559     atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg);
   560     pop(A0);
   561     pop(tmp_reg);
   562     bind(L);
   563   }
   564   if (need_tmp_reg) {
   565     pop(tmp_reg);
   566   }
   567   beq(swap_reg, R0, done);
   568   delayed()->nop();
   569   Label try_revoke_bias;
   570   Label try_rebias;
   572   // At this point we know that the header has the bias pattern and
   573   // that we are not the bias owner in the current epoch. We need to
   574   // figure out more details about the state of the header in order to
   575   // know what operations can be legally performed on the object's
   576   // header.
   578   // If the low three bits in the xor result aren't clear, that means
   579   // the prototype header is no longer biased and we have to revoke
   580   // the bias on this object.
   582   move(AT, markOopDesc::biased_lock_mask_in_place);
   583   andr(AT, swap_reg, AT);
   584   bne(AT, R0, try_revoke_bias);
   585   delayed()->nop();
   586   // Biasing is still enabled for this data type. See whether the
   587   // epoch of the current bias is still valid, meaning that the epoch
   588   // bits of the mark word are equal to the epoch bits of the
   589   // prototype header. (Note that the prototype header's epoch bits
   590   // only change at a safepoint.) If not, attempt to rebias the object
   591   // toward the current thread. Note that we must be absolutely sure
   592   // that the current epoch is invalid in order to do this because
   593   // otherwise the manipulations it performs on the mark word are
   594   // illegal.
   596   move(AT, markOopDesc::epoch_mask_in_place);
   597   andr(AT,swap_reg, AT);
   598   bne(AT, R0, try_rebias);
   599   delayed()->nop();
   600   // The epoch of the current bias is still valid but we know nothing
   601   // about the owner; it might be set or it might be clear. Try to
   602   // acquire the bias of the object using an atomic operation. If this
   603   // fails we will go in to the runtime to revoke the object's bias.
   604   // Note that we first construct the presumed unbiased header so we
   605   // don't accidentally blow away another thread's valid bias.
   607   ld_ptr(swap_reg, saved_mark_addr);
   609   move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
   610   andr(swap_reg, swap_reg, AT);
   612   if (need_tmp_reg) {
   613     push(tmp_reg);
   614   }
   615   get_thread(tmp_reg);
   616   orr(tmp_reg, tmp_reg, swap_reg);
   617   //if (os::is_MP()) {
   618   //  sync();
   619   //}
   620   cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
   621   if (need_tmp_reg) {
   622     pop(tmp_reg);
   623   }
   624   // If the biasing toward our thread failed, this means that
   625   // another thread succeeded in biasing it toward itself and we
   626   // need to revoke that bias. The revocation will occur in the
   627   // interpreter runtime in the slow case.
   628   if (PrintBiasedLockingStatistics) {
   629     Label L;
   630     bne(AT, R0, L);
   631     delayed()->nop();
   632     push(tmp_reg);
   633     push(A0);
   634     atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg);
   635     pop(A0);
   636     pop(tmp_reg);
   637     bind(L);
   638   }
   639   if (slow_case != NULL) {
   640     beq_far(AT, R0, *slow_case);
   641     delayed()->nop();
   642   }
   643   b(done);
   644   delayed()->nop();
   646   bind(try_rebias);
   647   // At this point we know the epoch has expired, meaning that the
   648   // current "bias owner", if any, is actually invalid. Under these
   649   // circumstances _only_, we are allowed to use the current header's
   650   // value as the comparison value when doing the cas to acquire the
   651   // bias in the current epoch. In other words, we allow transfer of
   652   // the bias from one thread to another directly in this situation.
   653   //
   654   // FIXME: due to a lack of registers we currently blow away the age
   655   // bits in this situation. Should attempt to preserve them.
   656   if (need_tmp_reg) {
   657     push(tmp_reg);
   658   }
   659   load_prototype_header(tmp_reg, obj_reg);
   660   get_thread(swap_reg);
   661   orr(tmp_reg, tmp_reg, swap_reg);
   662   ld_ptr(swap_reg, saved_mark_addr);
   664   //if (os::is_MP()) {
   665   //  sync();
   666   //}
   667   cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
   668   if (need_tmp_reg) {
   669     pop(tmp_reg);
   670   }
   671   // If the biasing toward our thread failed, then another thread
   672   // succeeded in biasing it toward itself and we need to revoke that
   673   // bias. The revocation will occur in the runtime in the slow case.
   674   if (PrintBiasedLockingStatistics) {
   675     Label L;
   676     bne(AT, R0, L);
   677     delayed()->nop();
   678     push(AT);
   679     push(tmp_reg);
   680     atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg);
   681     pop(tmp_reg);
   682     pop(AT);
   683     bind(L);
   684   }
   685   if (slow_case != NULL) {
   686     beq_far(AT, R0, *slow_case);
   687     delayed()->nop();
   688   }
   690   b(done);
   691   delayed()->nop();
   692   bind(try_revoke_bias);
   693   // The prototype mark in the klass doesn't have the bias bit set any
   694   // more, indicating that objects of this data type are not supposed
   695   // to be biased any more. We are going to try to reset the mark of
   696   // this object to the prototype value and fall through to the
   697   // CAS-based locking scheme. Note that if our CAS fails, it means
   698   // that another thread raced us for the privilege of revoking the
   699   // bias of this particular object, so it's okay to continue in the
   700   // normal locking code.
   701   //
   702   // FIXME: due to a lack of registers we currently blow away the age
   703   // bits in this situation. Should attempt to preserve them.
   704   ld_ptr(swap_reg, saved_mark_addr);
   706   if (need_tmp_reg) {
   707     push(tmp_reg);
   708   }
   709   load_prototype_header(tmp_reg, obj_reg);
   710   //if (os::is_MP()) {
   711   // lock();
   712   //}
   713   cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
   714   if (need_tmp_reg) {
   715     pop(tmp_reg);
   716   }
   717   // Fall through to the normal CAS-based lock, because no matter what
   718   // the result of the above CAS, some thread must have succeeded in
   719   // removing the bias bit from the object's header.
   720   if (PrintBiasedLockingStatistics) {
   721     Label L;
   722     bne(AT, R0, L);
   723     delayed()->nop();
   724     push(AT);
   725     push(tmp_reg);
   726     atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg);
   727     pop(tmp_reg);
   728     pop(AT);
   729     bind(L);
   730   }
   732   bind(cas_label);
   733   return null_check_offset;
   734 }
   736 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
   737   assert(UseBiasedLocking, "why call this otherwise?");
   739   // Check for biased locking unlock case, which is a no-op
   740   // Note: we do not have to check the thread ID for two reasons.
   741   // First, the interpreter checks for IllegalMonitorStateException at
   742   // a higher level. Second, if the bias was revoked while we held the
   743   // lock, the object could not be rebiased toward another thread, so
   744   // the bias bit would be clear.
   745 #ifdef _LP64
   746   ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
   747   andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
   748   daddi(AT, R0, markOopDesc::biased_lock_pattern);
   749 #else
   750   lw(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
   751   andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
   752   addi(AT, R0, markOopDesc::biased_lock_pattern);
   753 #endif
   755   beq(AT, temp_reg, done);
   756   delayed()->nop();
   757 }
   759 // the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf
   760 // this method will handle the stack problem, you need not to preserve the stack space for the argument now
   761 void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) {
   762   Label L, E;
   764   assert(number_of_arguments <= 4, "just check");
   766   andi(AT, SP, 0xf);
   767   beq(AT, R0, L);
   768   delayed()->nop();
   769   daddi(SP, SP, -8);
   770   call(entry_point, relocInfo::runtime_call_type);
   771   delayed()->nop();
   772   daddi(SP, SP, 8);
   773   b(E);
   774   delayed()->nop();
   776   bind(L);
   777   call(entry_point, relocInfo::runtime_call_type);
   778   delayed()->nop();
   779   bind(E);
   780 }
   783 void MacroAssembler::jmp(address entry) {
   784   patchable_set48(T9, (long)entry);
   785   jr(T9);
   786 }
   788 void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) {
   789   switch (rtype) {
   790     case relocInfo::runtime_call_type:
   791     case relocInfo::none:
   792       jmp(entry);
   793       break;
   794     default:
   795       {
   796       InstructionMark im(this);
   797       relocate(rtype);
   798       patchable_set48(T9, (long)entry);
   799       jr(T9);
   800       }
   801       break;
   802   }
   803 }
   805 void MacroAssembler::jmp_far(Label& L) {
   806   if (L.is_bound()) {
   807     address entry = target(L);
   808     assert(entry != NULL, "jmp most probably wrong");
   809     InstructionMark im(this);
   811     relocate(relocInfo::internal_word_type);
   812     patchable_set48(T9, (long)entry);
   813   } else {
   814     InstructionMark im(this);
   815     L.add_patch_at(code(), locator());
   817     relocate(relocInfo::internal_word_type);
   818     patchable_set48(T9, (long)pc());
   819   }
   821   jr(T9);
   822   delayed()->nop();
   823 }
   824 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
   825   int oop_index;
   826   if (obj) {
   827     oop_index = oop_recorder()->find_index(obj);
   828   } else {
   829     oop_index = oop_recorder()->allocate_metadata_index(obj);
   830   }
   831   relocate(metadata_Relocation::spec(oop_index));
   832   patchable_set48(AT, (long)obj);
   833   sd(AT, dst);
   834 }
   836 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
   837   int oop_index;
   838   if (obj) {
   839     oop_index = oop_recorder()->find_index(obj);
   840   } else {
   841     oop_index = oop_recorder()->allocate_metadata_index(obj);
   842   }
   843   relocate(metadata_Relocation::spec(oop_index));
   844   patchable_set48(dst, (long)obj);
   845 }
   847 void MacroAssembler::call(address entry) {
   848 // c/c++ code assume T9 is entry point, so we just always move entry to t9
   849 // maybe there is some more graceful method to handle this. FIXME
   850 // For more info, see class NativeCall.
   851 #ifndef _LP64
   852   move(T9, (int)entry);
   853 #else
   854   patchable_set48(T9, (long)entry);
   855 #endif
   856   jalr(T9);
   857 }
   859 void MacroAssembler::call(address entry, relocInfo::relocType rtype) {
   860   switch (rtype) {
   861     case relocInfo::runtime_call_type:
   862     case relocInfo::none:
   863       call(entry);
   864       break;
   865     default:
   866       {
   867   InstructionMark im(this);
   868   relocate(rtype);
   869   call(entry);
   870       }
   871       break;
   872   }
   873 }
   875 void MacroAssembler::call(address entry, RelocationHolder& rh)
   876 {
   877   switch (rh.type()) {
   878     case relocInfo::runtime_call_type:
   879     case relocInfo::none:
   880       call(entry);
   881       break;
   882     default:
   883       {
   884   InstructionMark im(this);
   885   relocate(rh);
   886   call(entry);
   887       }
   888       break;
   889   }
   890 }
   892 void MacroAssembler::ic_call(address entry) {
   893   RelocationHolder rh = virtual_call_Relocation::spec(pc());
   894   patchable_set48(IC_Klass, (long)Universe::non_oop_word());
   895   assert(entry != NULL, "call most probably wrong");
   896   InstructionMark im(this);
   897   relocate(rh);
   898   patchable_call(entry);
   899 }
   901 void MacroAssembler::c2bool(Register r) {
   902   Label L;
   903   Assembler::beq(r, R0, L);
   904   delayed()->nop();
   905   move(r, 1);
   906   bind(L);
   907 }
   909 #ifndef PRODUCT
   910 extern "C" void findpc(intptr_t x);
   911 #endif
   913 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
   914   // In order to get locks to work, we need to fake a in_VM state
   915   JavaThread* thread = JavaThread::current();
   916   JavaThreadState saved_state = thread->thread_state();
   917   thread->set_thread_state(_thread_in_vm);
   918   if (ShowMessageBoxOnError) {
   919     JavaThread* thread = JavaThread::current();
   920     JavaThreadState saved_state = thread->thread_state();
   921     thread->set_thread_state(_thread_in_vm);
   922     if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
   923       ttyLocker ttyl;
   924       BytecodeCounter::print();
   925     }
   926     // To see where a verify_oop failed, get $ebx+40/X for this frame.
   927     // This is the value of eip which points to where verify_oop will return.
   928     if (os::message_box(msg, "Execution stopped, print registers?")) {
   929       ttyLocker ttyl;
   930       tty->print_cr("eip = 0x%08x", eip);
   931 #ifndef PRODUCT
   932       tty->cr();
   933       findpc(eip);
   934       tty->cr();
   935 #endif
   936       tty->print_cr("rax, = 0x%08x", rax);
   937       tty->print_cr("rbx, = 0x%08x", rbx);
   938       tty->print_cr("rcx = 0x%08x", rcx);
   939       tty->print_cr("rdx = 0x%08x", rdx);
   940       tty->print_cr("rdi = 0x%08x", rdi);
   941       tty->print_cr("rsi = 0x%08x", rsi);
   942       tty->print_cr("rbp, = 0x%08x", rbp);
   943       tty->print_cr("rsp = 0x%08x", rsp);
   944       BREAKPOINT;
   945     }
   946   } else {
   947     ttyLocker ttyl;
   948     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
   949     assert(false, "DEBUG MESSAGE");
   950   }
   951   ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
   952 }
   954 void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) {
   955   if ( ShowMessageBoxOnError ) {
   956     JavaThreadState saved_state = JavaThread::current()->thread_state();
   957     JavaThread::current()->set_thread_state(_thread_in_vm);
   958     {
   959       // In order to get locks work, we need to fake a in_VM state
   960       ttyLocker ttyl;
   961       ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
   962       if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
   963   BytecodeCounter::print();
   964       }
   966       //      if (os::message_box(msg, "Execution stopped, print registers?"))
   967       //        regs->print(::tty);
   968     }
   969     ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
   970   }
   971   else
   972     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
   973 }
   976 void MacroAssembler::stop(const char* msg) {
   977   li(A0, (long)msg);
   978 #ifndef _LP64
   979   //reserver space for argument. added by yjl 7/10/2005
   980   addiu(SP, SP, - 1 * wordSize);
   981 #endif
   982   call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
   983   delayed()->nop();
   984 #ifndef _LP64
   985   //restore space for argument
   986   addiu(SP, SP, 1 * wordSize);
   987 #endif
   988   brk(17);
   989 }
   991 void MacroAssembler::warn(const char* msg) {
   992 #ifdef _LP64
   993   pushad();
   994   li(A0, (long)msg);
   995   push(S2);
   996   move(AT, -(StackAlignmentInBytes));
   997   move(S2, SP);     // use S2 as a sender SP holder
   998   andr(SP, SP, AT); // align stack as required by ABI
   999   call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
  1000   delayed()->nop();
  1001   move(SP, S2);     // use S2 as a sender SP holder
  1002   pop(S2);
  1003   popad();
  1004 #else
  1005   pushad();
  1006   addi(SP, SP, -4);
  1007   sw(A0, SP, -1 * wordSize);
  1008   li(A0, (long)msg);
  1009   addi(SP, SP, -1 * wordSize);
  1010   call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
  1011   delayed()->nop();
  1012   addi(SP, SP, 1 * wordSize);
  1013   lw(A0, SP, -1 * wordSize);
  1014   addi(SP, SP, 4);
  1015   popad();
  1016 #endif
  1019 void MacroAssembler::print_reg(Register reg) {
  1020 /*
  1021 char *s = getenv("PRINT_REG");
  1022 if (s == NULL)
  1023   return;
  1024 if (strcmp(s, "1") != 0)
  1025   return;
  1026 */
  1027   void * cur_pc = pc();
  1028   pushad();
  1029   NOT_LP64(push(FP);)
  1031   li(A0, (long)reg->name());
  1032   if (reg == SP)
  1033     addiu(A1, SP, wordSize * 23); //23 registers saved in pushad()
  1034   else if (reg == A0)
  1035     ld(A1, SP, wordSize * 19); //A0 has been modified by li(A0, (long)reg->name()). Ugly Code!
  1036   else
  1037     move(A1, reg);
  1038   li(A2, (long)cur_pc);
  1039   push(S2);
  1040   move(AT, -(StackAlignmentInBytes));
  1041   move(S2, SP);     // use S2 as a sender SP holder
  1042   andr(SP, SP, AT); // align stack as required by ABI
  1043   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_reg_with_pc),relocInfo::runtime_call_type);
  1044   delayed()->nop();
  1045   move(SP, S2);     // use S2 as a sender SP holder
  1046   pop(S2);
  1047   NOT_LP64(pop(FP);)
  1048   popad();
  1050 /*
  1051   pushad();
  1052 #ifdef _LP64
  1053   if (reg == SP)
  1054     addiu(A0, SP, wordSize * 23); //23 registers saved in pushad()
  1055   else
  1056     move(A0, reg);
  1057   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type);
  1058   delayed()->nop();
  1059 #else
  1060   push(FP);
  1061   move(A0, reg);
  1062   dsrl32(A1, reg, 0);
  1063   //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_int),relocInfo::runtime_call_type);
  1064   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type);
  1065   delayed()->nop();
  1066   pop(FP);
  1067 #endif
  1068   popad();
  1069   pushad();
  1070   NOT_LP64(push(FP);)
  1071   char b[50];
  1072   sprintf((char *)b, " pc: %p\n",cur_pc);
  1073   li(A0, (long)(char *)b);
  1074   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
  1075   delayed()->nop();
  1076   NOT_LP64(pop(FP);)
  1077   popad();
  1078 */
  1081 void MacroAssembler::print_reg(FloatRegister reg) {
  1082   void * cur_pc = pc();
  1083   pushad();
  1084   NOT_LP64(push(FP);)
  1085   li(A0, (long)reg->name());
  1086   push(S2);
  1087   move(AT, -(StackAlignmentInBytes));
  1088   move(S2, SP);     // use S2 as a sender SP holder
  1089   andr(SP, SP, AT); // align stack as required by ABI
  1090   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
  1091   delayed()->nop();
  1092   move(SP, S2);     // use S2 as a sender SP holder
  1093   pop(S2);
  1094   NOT_LP64(pop(FP);)
  1095   popad();
  1097   pushad();
  1098   NOT_LP64(push(FP);)
  1099 #if 1
  1100   move(FP, SP);
  1101   move(AT, -(StackAlignmentInBytes));
  1102   andr(SP , SP , AT);
  1103   mov_d(F12, reg);
  1104   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_double),relocInfo::runtime_call_type);
  1105   delayed()->nop();
  1106   move(SP, FP);
  1107 #else
  1108   mov_s(F12, reg);
  1109   //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_float),relocInfo::runtime_call_type);
  1110   //delayed()->nop();
  1111 #endif
  1112   NOT_LP64(pop(FP);)
  1113   popad();
  1115 #if 0
  1116   pushad();
  1117   NOT_LP64(push(FP);)
  1118   char* b = new char[50];
  1119   sprintf(b, " pc: %p\n", cur_pc);
  1120   li(A0, (long)b);
  1121   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
  1122   delayed()->nop();
  1123   NOT_LP64(pop(FP);)
  1124   popad();
  1125 #endif
  1128 void MacroAssembler::increment(Register reg, int imm) {
  1129   if (!imm) return;
  1130   if (is_simm16(imm)) {
  1131 #ifdef _LP64
  1132     daddiu(reg, reg, imm);
  1133 #else
  1134     addiu(reg, reg, imm);
  1135 #endif
  1136   } else {
  1137     move(AT, imm);
  1138 #ifdef _LP64
  1139     daddu(reg, reg, AT);
  1140 #else
  1141     addu(reg, reg, AT);
  1142 #endif
  1146 void MacroAssembler::decrement(Register reg, int imm) {
  1147   increment(reg, -imm);
  1151 void MacroAssembler::call_VM(Register oop_result,
  1152                              address entry_point,
  1153                              bool check_exceptions) {
  1154   call_VM_helper(oop_result, entry_point, 0, check_exceptions);
  1157 void MacroAssembler::call_VM(Register oop_result,
  1158                              address entry_point,
  1159                              Register arg_1,
  1160                              bool check_exceptions) {
  1161   if (arg_1!=A1) move(A1, arg_1);
  1162   call_VM_helper(oop_result, entry_point, 1, check_exceptions);
  1165 void MacroAssembler::call_VM(Register oop_result,
  1166                              address entry_point,
  1167                              Register arg_1,
  1168                              Register arg_2,
  1169                              bool check_exceptions) {
  1170   if (arg_1!=A1) move(A1, arg_1);
  1171   if (arg_2!=A2) move(A2, arg_2);
  1172   assert(arg_2 != A1, "smashed argument");
  1173   call_VM_helper(oop_result, entry_point, 2, check_exceptions);
  1176 void MacroAssembler::call_VM(Register oop_result,
  1177                              address entry_point,
  1178                              Register arg_1,
  1179                              Register arg_2,
  1180                              Register arg_3,
  1181                              bool check_exceptions) {
  1182   if (arg_1!=A1) move(A1, arg_1);
  1183   if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
  1184   if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
  1185   call_VM_helper(oop_result, entry_point, 3, check_exceptions);
  1188 void MacroAssembler::call_VM(Register oop_result,
  1189                              Register last_java_sp,
  1190                              address entry_point,
  1191                              int number_of_arguments,
  1192                              bool check_exceptions) {
  1193   call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions);
  1196 void MacroAssembler::call_VM(Register oop_result,
  1197                              Register last_java_sp,
  1198                              address entry_point,
  1199                              Register arg_1,
  1200                              bool check_exceptions) {
  1201   if (arg_1 != A1) move(A1, arg_1);
  1202   call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
  1205 void MacroAssembler::call_VM(Register oop_result,
  1206                              Register last_java_sp,
  1207                              address entry_point,
  1208                              Register arg_1,
  1209                              Register arg_2,
  1210                              bool check_exceptions) {
  1211   if (arg_1 != A1) move(A1, arg_1);
  1212   if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
  1213   call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
  1216 void MacroAssembler::call_VM(Register oop_result,
  1217                              Register last_java_sp,
  1218                              address entry_point,
  1219                              Register arg_1,
  1220                              Register arg_2,
  1221                              Register arg_3,
  1222                              bool check_exceptions) {
  1223   if (arg_1 != A1) move(A1, arg_1);
  1224   if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
  1225   if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
  1226   call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
  1229 void MacroAssembler::call_VM_base(Register oop_result,
  1230                                   Register java_thread,
  1231                                   Register last_java_sp,
  1232                                   address  entry_point,
  1233                                   int      number_of_arguments,
  1234                                   bool     check_exceptions) {
  1236   address before_call_pc;
  1237   // determine java_thread register
  1238   if (!java_thread->is_valid()) {
  1239 #ifndef OPT_THREAD
  1240     java_thread = T2;
  1241     get_thread(java_thread);
  1242 #else
  1243     java_thread = TREG;
  1244 #endif
  1246   // determine last_java_sp register
  1247   if (!last_java_sp->is_valid()) {
  1248     last_java_sp = SP;
  1250   // debugging support
  1251   assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
  1252   assert(number_of_arguments <= 4   , "cannot have negative number of arguments");
  1253   assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
  1254   assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
  1256   assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save ebp");
  1258   // set last Java frame before call
  1259   before_call_pc = (address)pc();
  1260   set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc);
  1262   // do the call
  1263   move(A0, java_thread);
  1264   call(entry_point, relocInfo::runtime_call_type);
  1265   delayed()->nop();
  1267   // restore the thread (cannot use the pushed argument since arguments
  1268   // may be overwritten by C code generated by an optimizing compiler);
  1269   // however can use the register value directly if it is callee saved.
  1270 #ifndef OPT_THREAD
  1271   get_thread(java_thread);
  1272 #else
  1273 #ifdef ASSERT
  1275     Label L;
  1276     get_thread(AT);
  1277     beq(java_thread, AT, L);
  1278     delayed()->nop();
  1279     stop("MacroAssembler::call_VM_base: TREG not callee saved?");
  1280     bind(L);
  1282 #endif
  1283 #endif
  1285   // discard thread and arguments
  1286   ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
  1287   // reset last Java frame
  1288   reset_last_Java_frame(java_thread, false, true);
  1290   check_and_handle_popframe(java_thread);
  1291   check_and_handle_earlyret(java_thread);
  1292   if (check_exceptions) {
  1293     // check for pending exceptions (java_thread is set upon return)
  1294     Label L;
  1295 #ifdef _LP64
  1296     ld(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
  1297 #else
  1298     lw(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
  1299 #endif
  1300     beq(AT, R0, L);
  1301     delayed()->nop();
  1302     li(AT, before_call_pc);
  1303     push(AT);
  1304     jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
  1305     delayed()->nop();
  1306     bind(L);
  1309   // get oop result if there is one and reset the value in the thread
  1310   if (oop_result->is_valid()) {
  1311 #ifdef _LP64
  1312     ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
  1313     sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
  1314 #else
  1315     lw(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
  1316     sw(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
  1317 #endif
  1318     verify_oop(oop_result);
  1322 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
  1324   move(V0, SP);
  1325   //we also reserve space for java_thread here
  1326 #ifndef _LP64
  1327   daddi(SP, SP, (1 + number_of_arguments) * (- wordSize));
  1328 #endif
  1329   move(AT, -(StackAlignmentInBytes));
  1330   andr(SP, SP, AT);
  1331   call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions);
  1335 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
  1336   call_VM_leaf_base(entry_point, number_of_arguments);
  1339 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
  1340   if (arg_0 != A0) move(A0, arg_0);
  1341   call_VM_leaf(entry_point, 1);
  1344 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
  1345   if (arg_0 != A0) move(A0, arg_0);
  1346   if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
  1347   call_VM_leaf(entry_point, 2);
  1350 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
  1351   if (arg_0 != A0) move(A0, arg_0);
  1352   if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
  1353   if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument");
  1354   call_VM_leaf(entry_point, 3);
  1356 void MacroAssembler::super_call_VM_leaf(address entry_point) {
  1357   MacroAssembler::call_VM_leaf_base(entry_point, 0);
  1361 void MacroAssembler::super_call_VM_leaf(address entry_point,
  1362                                                    Register arg_1) {
  1363   if (arg_1 != A0) move(A0, arg_1);
  1364   MacroAssembler::call_VM_leaf_base(entry_point, 1);
  1368 void MacroAssembler::super_call_VM_leaf(address entry_point,
  1369                                                    Register arg_1,
  1370                                                    Register arg_2) {
  1371   if (arg_1 != A0) move(A0, arg_1);
  1372   if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
  1373   MacroAssembler::call_VM_leaf_base(entry_point, 2);
  1375 void MacroAssembler::super_call_VM_leaf(address entry_point,
  1376                                                    Register arg_1,
  1377                                                    Register arg_2,
  1378                                                    Register arg_3) {
  1379   if (arg_1 != A0) move(A0, arg_1);
  1380   if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
  1381   if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument");
  1382   MacroAssembler::call_VM_leaf_base(entry_point, 3);
  1385 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
  1388 void MacroAssembler::check_and_handle_popframe(Register java_thread) {
  1391 void MacroAssembler::null_check(Register reg, int offset) {
  1392   if (needs_explicit_null_check(offset)) {
  1393     // provoke OS NULL exception if reg = NULL by
  1394     // accessing M[reg] w/o changing any (non-CC) registers
  1395     // NOTE: cmpl is plenty here to provoke a segv
  1396     lw(AT, reg, 0);
  1397     // Note: should probably use testl(rax, Address(reg, 0));
  1398     //       may be shorter code (however, this version of
  1399     //       testl needs to be implemented first)
  1400   } else {
  1401     // nothing to do, (later) access of M[reg + offset]
  1402     // will provoke OS NULL exception if reg = NULL
  1406 void MacroAssembler::enter() {
  1407   push2(RA, FP);
  1408   move(FP, SP);
  1411 void MacroAssembler::leave() {
  1412 #ifndef _LP64
  1413   //move(SP, FP);
  1414   //pop2(FP, RA);
  1415   addi(SP, FP, 2 * wordSize);
  1416   lw(RA, SP, - 1 * wordSize);
  1417   lw(FP, SP, - 2 * wordSize);
  1418 #else
  1419   daddi(SP, FP, 2 * wordSize);
  1420   ld(RA, SP, - 1 * wordSize);
  1421   ld(FP, SP, - 2 * wordSize);
  1422 #endif
  1424 /*
  1425 void MacroAssembler::os_breakpoint() {
  1426   // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
  1427   // (e.g., MSVC can't call ps() otherwise)
  1428   call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
  1430 */
  1431 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) {
  1432   // determine java_thread register
  1433   if (!java_thread->is_valid()) {
  1434 #ifndef OPT_THREAD
  1435     java_thread = T1;
  1436     get_thread(java_thread);
  1437 #else
  1438     java_thread = TREG;
  1439 #endif
  1441   // we must set sp to zero to clear frame
  1442   st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
  1443   // must clear fp, so that compiled frames are not confused; it is possible
  1444   // that we need it only for debugging
  1445   if(clear_fp)
  1446     st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
  1448   if (clear_pc)
  1449     st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
  1452 void MacroAssembler::reset_last_Java_frame(bool clear_fp,
  1453                                            bool clear_pc) {
  1454   Register thread = TREG;
  1455 #ifndef OPT_THREAD
  1456   get_thread(thread);
  1457 #endif
  1458   // we must set sp to zero to clear frame
  1459   sd(R0, Address(thread, JavaThread::last_Java_sp_offset()));
  1460   // must clear fp, so that compiled frames are not confused; it is
  1461   // possible that we need it only for debugging
  1462   if (clear_fp) {
  1463     sd(R0, Address(thread, JavaThread::last_Java_fp_offset()));
  1466   if (clear_pc) {
  1467     sd(R0, Address(thread, JavaThread::last_Java_pc_offset()));
  1471 // Write serialization page so VM thread can do a pseudo remote membar.
  1472 // We use the current thread pointer to calculate a thread specific
  1473 // offset to write to within the page. This minimizes bus traffic
  1474 // due to cache line collision.
  1475 void MacroAssembler::serialize_memory(Register thread, Register tmp) {
  1476   move(tmp, thread);
  1477   srl(tmp, tmp,os::get_serialize_page_shift_count());
  1478   move(AT, (os::vm_page_size() - sizeof(int)));
  1479   andr(tmp, tmp,AT);
  1480   sw(tmp,Address(tmp, (intptr_t)os::get_memory_serialize_page()));
  1483 // Calls to C land
  1484 //
  1485 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded
  1486 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
  1487 // has to be reset to 0. This is required to allow proper stack traversal.
  1488 void MacroAssembler::set_last_Java_frame(Register java_thread,
  1489                                          Register last_java_sp,
  1490                                          Register last_java_fp,
  1491                                          address  last_java_pc) {
  1492   // determine java_thread register
  1493   if (!java_thread->is_valid()) {
  1494 #ifndef OPT_THREAD
  1495     java_thread = T2;
  1496     get_thread(java_thread);
  1497 #else
  1498     java_thread = TREG;
  1499 #endif
  1501   // determine last_java_sp register
  1502   if (!last_java_sp->is_valid()) {
  1503     last_java_sp = SP;
  1506   // last_java_fp is optional
  1508   if (last_java_fp->is_valid()) {
  1509     st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
  1512   // last_java_pc is optional
  1514   if (last_java_pc != NULL) {
  1515     relocate(relocInfo::internal_pc_type);
  1516     patchable_set48(AT, (long)last_java_pc);
  1517     st_ptr(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
  1519   st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
  1522 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
  1523                                          Register last_java_fp,
  1524                                          address  last_java_pc) {
  1525   // determine last_java_sp register
  1526   if (!last_java_sp->is_valid()) {
  1527     last_java_sp = SP;
  1530   Register thread = TREG;
  1531 #ifndef OPT_THREAD
  1532   get_thread(thread);
  1533 #endif
  1534   // last_java_fp is optional
  1535   if (last_java_fp->is_valid()) {
  1536     sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset()));
  1539   // last_java_pc is optional
  1540   if (last_java_pc != NULL) {
  1541     Address java_pc(thread,
  1542                     JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
  1543     li(AT, (intptr_t)(last_java_pc));
  1544     sd(AT, java_pc);
  1547   sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset()));
  1550 //////////////////////////////////////////////////////////////////////////////////
  1551 #if INCLUDE_ALL_GCS
  1553 void MacroAssembler::g1_write_barrier_pre(Register obj,
  1554                                           Register pre_val,
  1555                                           Register thread,
  1556                                           Register tmp,
  1557                                           bool tosca_live,
  1558                                           bool expand_call) {
  1560   // If expand_call is true then we expand the call_VM_leaf macro
  1561   // directly to skip generating the check by
  1562   // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
  1564 #ifdef _LP64
  1565   assert(thread == TREG, "must be");
  1566 #endif // _LP64
  1568   Label done;
  1569   Label runtime;
  1571   assert(pre_val != noreg, "check this code");
  1573   if (obj != noreg) {
  1574     assert_different_registers(obj, pre_val, tmp);
  1575     assert(pre_val != V0, "check this code");
  1578   Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
  1579                                        PtrQueue::byte_offset_of_active()));
  1580   Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
  1581                                        PtrQueue::byte_offset_of_index()));
  1582   Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
  1583                                        PtrQueue::byte_offset_of_buf()));
  1586   // Is marking active?
  1587   if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
  1588     lw(AT, in_progress);
  1589   } else {
  1590     assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
  1591     lb(AT, in_progress);
  1593   beq(AT, R0, done);
  1594   delayed()->nop();
  1596   // Do we need to load the previous value?
  1597   if (obj != noreg) {
  1598     load_heap_oop(pre_val, Address(obj, 0));
  1601   // Is the previous value null?
  1602   beq(pre_val, R0, done);
  1603   delayed()->nop();
  1605   // Can we store original value in the thread's buffer?
  1606   // Is index == 0?
  1607   // (The index field is typed as size_t.)
  1609   ld(tmp, index);
  1610   beq(tmp, R0, runtime);
  1611   delayed()->nop();
  1613   daddiu(tmp, tmp, -1 * wordSize);
  1614   sd(tmp, index);
  1615   ld(AT, buffer);
  1616   daddu(tmp, tmp, AT);
  1618   // Record the previous value
  1619   sd(pre_val, tmp, 0);
  1620   beq(R0, R0, done);
  1621   delayed()->nop();
  1623   bind(runtime);
  1624   // save the live input values
  1625   if (tosca_live) push(V0);
  1627   if (obj != noreg && obj != V0) push(obj);
  1629   if (pre_val != V0) push(pre_val);
  1631   // Calling the runtime using the regular call_VM_leaf mechanism generates
  1632   // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
  1633   // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
  1634   //
  1635   // If we care generating the pre-barrier without a frame (e.g. in the
  1636   // intrinsified Reference.get() routine) then ebp might be pointing to
  1637   // the caller frame and so this check will most likely fail at runtime.
  1638   //
  1639   // Expanding the call directly bypasses the generation of the check.
  1640   // So when we do not have have a full interpreter frame on the stack
  1641   // expand_call should be passed true.
  1643   NOT_LP64( push(thread); )
  1645   if (expand_call) {
  1646     LP64_ONLY( assert(pre_val != A1, "smashed arg"); )
  1647     if (thread != A1) move(A1, thread);
  1648     if (pre_val != A0) move(A0, pre_val);
  1649     MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
  1650   } else {
  1651     call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
  1654   NOT_LP64( pop(thread); )
  1656   // save the live input values
  1657   if (pre_val != V0)
  1658     pop(pre_val);
  1660   if (obj != noreg && obj != V0)
  1661     pop(obj);
  1663   if(tosca_live) pop(V0);
  1665   bind(done);
  1668 void MacroAssembler::g1_write_barrier_post(Register store_addr,
  1669                                            Register new_val,
  1670                                            Register thread,
  1671                                            Register tmp,
  1672                                            Register tmp2) {
  1673   assert(tmp  != AT, "must be");
  1674   assert(tmp2 != AT, "must be");
  1675 #ifdef _LP64
  1676   assert(thread == TREG, "must be");
  1677 #endif // _LP64
  1679   Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
  1680                                        PtrQueue::byte_offset_of_index()));
  1681   Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
  1682                                        PtrQueue::byte_offset_of_buf()));
  1684   BarrierSet* bs = Universe::heap()->barrier_set();
  1685   CardTableModRefBS* ct = (CardTableModRefBS*)bs;
  1686   assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
  1688   Label done;
  1689   Label runtime;
  1691   // Does store cross heap regions?
  1692   xorr(AT, store_addr, new_val);
  1693   dsrl(AT, AT, HeapRegion::LogOfHRGrainBytes);
  1694   beq(AT, R0, done);
  1695   delayed()->nop();
  1698   // crosses regions, storing NULL?
  1699   beq(new_val, R0, done);
  1700   delayed()->nop();
  1702   // storing region crossing non-NULL, is card already dirty?
  1703   const Register card_addr = tmp;
  1704   const Register cardtable = tmp2;
  1706   move(card_addr, store_addr);
  1707   dsrl(card_addr, card_addr, CardTableModRefBS::card_shift);
  1708   // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
  1709   // a valid address and therefore is not properly handled by the relocation code.
  1710   set64(cardtable, (intptr_t)ct->byte_map_base);
  1711   daddu(card_addr, card_addr, cardtable);
  1713   lb(AT, card_addr, 0);
  1714   daddiu(AT, AT, -1 * (int)G1SATBCardTableModRefBS::g1_young_card_val());
  1715   beq(AT, R0, done);
  1716   delayed()->nop();
  1718   sync();
  1719   lb(AT, card_addr, 0);
  1720   daddiu(AT, AT, -1 * (int)(int)CardTableModRefBS::dirty_card_val());
  1721   beq(AT, R0, done);
  1722   delayed()->nop();
  1725   // storing a region crossing, non-NULL oop, card is clean.
  1726   // dirty card and log.
  1727   move(AT, (int)CardTableModRefBS::dirty_card_val());
  1728   sb(AT, card_addr, 0);
  1730   lw(AT, queue_index);
  1731   beq(AT, R0, runtime);
  1732   delayed()->nop();
  1733   daddiu(AT, AT, -1 * wordSize);
  1734   sw(AT, queue_index);
  1735   ld(tmp2, buffer);
  1736 #ifdef _LP64
  1737   ld(AT, queue_index);
  1738   daddu(tmp2, tmp2, AT);
  1739   sd(card_addr, tmp2, 0);
  1740 #else
  1741   lw(AT, queue_index);
  1742   addu32(tmp2, tmp2, AT);
  1743   sw(card_addr, tmp2, 0);
  1744 #endif
  1745   beq(R0, R0, done);
  1746   delayed()->nop();
  1748   bind(runtime);
  1749   // save the live input values
  1750   push(store_addr);
  1751   push(new_val);
  1752 #ifdef _LP64
  1753   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, TREG);
  1754 #else
  1755   push(thread);
  1756   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
  1757   pop(thread);
  1758 #endif
  1759   pop(new_val);
  1760   pop(store_addr);
  1762   bind(done);
  1765 #endif // INCLUDE_ALL_GCS
  1766 //////////////////////////////////////////////////////////////////////////////////
  1769 void MacroAssembler::store_check(Register obj) {
  1770   // Does a store check for the oop in register obj. The content of
  1771   // register obj is destroyed afterwards.
  1772   store_check_part_1(obj);
  1773   store_check_part_2(obj);
  1776 void MacroAssembler::store_check(Register obj, Address dst) {
  1777   store_check(obj);
  1781 // split the store check operation so that other instructions can be scheduled inbetween
  1782 void MacroAssembler::store_check_part_1(Register obj) {
  1783   BarrierSet* bs = Universe::heap()->barrier_set();
  1784   assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
  1785 #ifdef _LP64
  1786   dsrl(obj, obj, CardTableModRefBS::card_shift);
  1787 #else
  1788   shr(obj, CardTableModRefBS::card_shift);
  1789 #endif
  1792 void MacroAssembler::store_check_part_2(Register obj) {
  1793   BarrierSet* bs = Universe::heap()->barrier_set();
  1794   assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
  1795   CardTableModRefBS* ct = (CardTableModRefBS*)bs;
  1796   assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
  1798   set64(AT, (long)ct->byte_map_base);
  1799 #ifdef _LP64
  1800   dadd(AT, AT, obj);
  1801 #else
  1802   add(AT, AT, obj);
  1803 #endif
  1804   if (UseConcMarkSweepGC) sync();
  1805   sb(R0, AT, 0);
  1808 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
  1809 void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
  1810                                    Register t1, Register t2, Label& slow_case) {
  1811   assert_different_registers(obj, var_size_in_bytes, t1, t2, AT);
  1813   Register end = t2;
  1814 #ifndef OPT_THREAD
  1815   Register thread = t1;
  1816   get_thread(thread);
  1817 #else
  1818   Register thread = TREG;
  1819 #endif
  1820   verify_tlab(t1, t2);//blows t1&t2
  1822   ld_ptr(obj, thread, in_bytes(JavaThread::tlab_top_offset()));
  1824   if (var_size_in_bytes == NOREG) {
  1825     // i dont think we need move con_size_in_bytes to a register first.
  1826     // by yjl 8/17/2005
  1827     assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first");
  1828     addi(end, obj, con_size_in_bytes);
  1829   } else {
  1830     add(end, obj, var_size_in_bytes);
  1833   ld_ptr(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
  1834   sltu(AT, AT, end);
  1835   bne_far(AT, R0, slow_case);
  1836   delayed()->nop();
  1839   // update the tlab top pointer
  1840   st_ptr(end, thread, in_bytes(JavaThread::tlab_top_offset()));
  1842   // recover var_size_in_bytes if necessary
  1843   /*if (var_size_in_bytes == end) {
  1844     sub(var_size_in_bytes, end, obj);
  1845     }*/
  1847   verify_tlab(t1, t2);
  1850 // Defines obj, preserves var_size_in_bytes
  1851 void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
  1852                                    Register t1, Register t2, Label& slow_case) {
  1853   assert_different_registers(obj, var_size_in_bytes, t1, AT);
  1854   if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq
  1855     // No allocation in the shared eden.
  1856     b_far(slow_case);
  1857     delayed()->nop();
  1858   } else {
  1860 #ifndef _LP64
  1861     Address heap_top(t1, Assembler::split_low((intptr_t)Universe::heap()->top_addr()));
  1862     lui(t1, split_high((intptr_t)Universe::heap()->top_addr()));
  1863 #else
  1864     Address heap_top(t1);
  1865     li(t1, (long)Universe::heap()->top_addr());
  1866 #endif
  1867     ld_ptr(obj, heap_top);
  1869     Register end = t2;
  1870     Label retry;
  1872     bind(retry);
  1873     if (var_size_in_bytes == NOREG) {
  1874     // i dont think we need move con_size_in_bytes to a register first.
  1875       assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first");
  1876       addi(end, obj, con_size_in_bytes);
  1877     } else {
  1878       add(end, obj, var_size_in_bytes);
  1880     // if end < obj then we wrapped around => object too long => slow case
  1881     sltu(AT, end, obj);
  1882     bne_far(AT, R0, slow_case);
  1883     delayed()->nop();
  1885     li(AT, (long)Universe::heap()->end_addr());
  1886     sltu(AT, AT, end);
  1887     bne_far(AT, R0, slow_case);
  1888     delayed()->nop();
  1889     // Compare obj with the top addr, and if still equal, store the new top addr in
  1890     // end at the address of the top addr pointer. Sets ZF if was equal, and clears
  1891     // it otherwise. Use lock prefix for atomicity on MPs.
  1892     //if (os::is_MP()) {
  1893     //  sync();
  1894     //}
  1896     // if someone beat us on the allocation, try again, otherwise continue
  1897     cmpxchg(end, heap_top, obj);
  1898     beq_far(AT, R0, retry);    //by yyq
  1899     delayed()->nop();
  1904 // C2 doesn't invoke this one.
  1905 void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) {
  1906   Register top = T0;
  1907   Register t1  = T1;
  1908 /* Jin: tlab_refill() is called in
  1910      [c1_Runtime1_mips.cpp] Runtime1::generate_code_for(new_type_array_id);
  1912   In generate_code_for(), T2 has been assigned as a register(length), which is used
  1913  after calling tlab_refill();
  1914   Therefore, tlab_refill() should not use T2.
  1916  Source:
  1918 Exception in thread "main" java.lang.ArrayIndexOutOfBoundsException
  1919         at java.lang.System.arraycopy(Native Method)
  1920         at java.util.Arrays.copyOf(Arrays.java:2799)  <-- alloc_array
  1921         at sun.misc.Resource.getBytes(Resource.java:117)
  1922         at java.net.URLClassLoader.defineClass(URLClassLoader.java:273)
  1923         at java.net.URLClassLoader.findClass(URLClassLoader.java:205)
  1924         at java.lang.ClassLoader.loadClass(ClassLoader.java:321)
  1925  */
  1926   Register t2  = T9;
  1927   Register t3  = T3;
  1928   Register thread_reg = T8;
  1929   Label do_refill, discard_tlab;
  1930   if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq
  1931     // No allocation in the shared eden.
  1932     b(slow_case);
  1933     delayed()->nop();
  1936   get_thread(thread_reg);
  1938   ld_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
  1939   ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
  1941   // calculate amount of free space
  1942   sub(t1, t1, top);
  1943   shr(t1, LogHeapWordSize);
  1945   // Retain tlab and allocate object in shared space if
  1946   // the amount free in the tlab is too large to discard.
  1947   ld_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
  1948   slt(AT, t2, t1);
  1949   beq(AT, R0, discard_tlab);
  1950   delayed()->nop();
  1952   // Retain
  1954 #ifndef _LP64
  1955   move(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
  1956 #else
  1957   li(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
  1958 #endif
  1959   add(t2, t2, AT);
  1960   st_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
  1962   if (TLABStats) {
  1963     // increment number of slow_allocations
  1964     lw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
  1965     addiu(AT, AT, 1);
  1966     sw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
  1968   b(try_eden);
  1969   delayed()->nop();
  1971   bind(discard_tlab);
  1972   if (TLABStats) {
  1973     // increment number of refills
  1974     lw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
  1975     addi(AT, AT, 1);
  1976     sw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
  1977     // accumulate wastage -- t1 is amount free in tlab
  1978     lw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
  1979     add(AT, AT, t1);
  1980     sw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
  1983   // if tlab is currently allocated (top or end != null) then
  1984   // fill [top, end + alignment_reserve) with array object
  1985   beq(top, R0, do_refill);
  1986   delayed()->nop();
  1988   // set up the mark word
  1989   li(AT, (long)markOopDesc::prototype()->copy_set_hash(0x2));
  1990   st_ptr(AT, top, oopDesc::mark_offset_in_bytes());
  1992   // set the length to the remaining space
  1993   addi(t1, t1, - typeArrayOopDesc::header_size(T_INT));
  1994   addi(t1, t1, ThreadLocalAllocBuffer::alignment_reserve());
  1995   shl(t1, log2_intptr(HeapWordSize/sizeof(jint)));
  1996   sw(t1, top, arrayOopDesc::length_offset_in_bytes());
  1998   // set klass to intArrayKlass
  1999 #ifndef _LP64
  2000   lui(AT, split_high((intptr_t)Universe::intArrayKlassObj_addr()));
  2001   lw(t1, AT, split_low((intptr_t)Universe::intArrayKlassObj_addr()));
  2002 #else
  2003   li(AT, (intptr_t)Universe::intArrayKlassObj_addr());
  2004   ld_ptr(t1, AT, 0);
  2005 #endif
  2006   //st_ptr(t1, top, oopDesc::klass_offset_in_bytes());
  2007   store_klass(top, t1);
  2009   // refill the tlab with an eden allocation
  2010   bind(do_refill);
  2011   ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
  2012   shl(t1, LogHeapWordSize);
  2013   // add object_size ??
  2014   eden_allocate(top, t1, 0, t2, t3, slow_case);
  2016   // Check that t1 was preserved in eden_allocate.
  2017 #ifdef ASSERT
  2018   if (UseTLAB) {
  2019     Label ok;
  2020     assert_different_registers(thread_reg, t1);
  2021     ld_ptr(AT, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
  2022     shl(AT, LogHeapWordSize);
  2023     beq(AT, t1, ok);
  2024     delayed()->nop();
  2025     stop("assert(t1 != tlab size)");
  2026     should_not_reach_here();
  2028     bind(ok);
  2030 #endif
  2031   st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_start_offset()));
  2032   st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
  2033   add(top, top, t1);
  2034   addi(top, top, - ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
  2035   st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
  2036   verify_tlab(t1, t2);
  2037   b(retry);
  2038   delayed()->nop();
  2041 static const double     pi_4 =  0.7853981633974483;
  2043 // the x86 version is to clumsy, i dont think we need that fuss. maybe i'm wrong, FIXME
  2044 // must get argument(a double) in F12/F13
  2045 //void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) {
  2046 //We need to preseve the register which maybe modified during the Call @Jerome
  2047 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
  2048 //save all modified register here
  2049 //  if (preserve_cpu_regs) {
  2050 //  }
  2051 //FIXME, in the disassembly of tirgfunc, only used V0,V1,T9, SP,RA,so we ony save V0,V1,T9
  2052   pushad();
  2053 //we should preserve the stack space before we call
  2054   addi(SP, SP, -wordSize * 2);
  2055         switch (trig){
  2056     case 's' :
  2057                   call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type );
  2058       delayed()->nop();
  2059       break;
  2060     case 'c':
  2061       call( CAST_FROM_FN_PTR(address, SharedRuntime::dcos), relocInfo::runtime_call_type );
  2062       delayed()->nop();
  2063       break;
  2064     case 't':
  2065       call( CAST_FROM_FN_PTR(address, SharedRuntime::dtan), relocInfo::runtime_call_type );
  2066       delayed()->nop();
  2067       break;
  2068     default:assert (false, "bad intrinsic");
  2069     break;
  2073   addi(SP, SP, wordSize * 2);
  2074   popad();
  2075 //  if (preserve_cpu_regs) {
  2076 //  }
  2079 #ifdef _LP64
  2080 void MacroAssembler::li(Register rd, long imm) {
  2081   if (imm <= max_jint && imm >= min_jint) {
  2082     li32(rd, (int)imm);
  2083   } else if (julong(imm) <= 0xFFFFFFFF) {
  2084     assert_not_delayed();
  2085     // lui sign-extends, so we can't use that.
  2086     ori(rd, R0, julong(imm) >> 16);
  2087     dsll(rd, rd, 16);
  2088     ori(rd, rd, split_low(imm));
  2089   //aoqi_test
  2090   //} else if ((imm > 0) && ((imm >> 48) == 0)) {
  2091   } else if ((imm > 0) && is_simm16(imm >> 32)) {
  2092     /* A 48-bit address */
  2093     li48(rd, imm);
  2094   } else {
  2095     li64(rd, imm);
  2098 #else
  2099 void MacroAssembler::li(Register rd, long imm) {
  2100   li32(rd, (int)imm);
  2102 #endif
  2104 void MacroAssembler::li32(Register reg, int imm) {
  2105   if (is_simm16(imm)) {
  2106     /* Jin: for imm < 0, we should use addi instead of addiu.
  2108      *  java.lang.StringCoding$StringDecoder.decode(jobject, jint, jint)
  2110      *  78 move [int:-1|I] [a0|I]
  2111      *    : daddi a0, zero, 0xffffffff  (correct)
  2112      *    : daddiu a0, zero, 0xffffffff (incorrect)
  2113      */
  2114     if (imm >= 0)
  2115       addiu(reg, R0, imm);
  2116     else
  2117       addi(reg, R0, imm);
  2118   } else {
  2119     lui(reg, split_low(imm >> 16));
  2120     if (split_low(imm))
  2121       ori(reg, reg, split_low(imm));
  2125 #ifdef _LP64
  2126 void MacroAssembler::set64(Register d, jlong value) {
  2127   assert_not_delayed();
  2129   int hi = (int)(value >> 32);
  2130   int lo = (int)(value & ~0);
  2132   if (value == lo) {  // 32-bit integer
  2133     if (is_simm16(value)) {
  2134       daddiu(d, R0, value);
  2135     } else {
  2136       lui(d, split_low(value >> 16));
  2137       if (split_low(value)) {
  2138         ori(d, d, split_low(value));
  2141   } else if (hi == 0) {  // hardware zero-extends to upper 32
  2142       ori(d, R0, julong(value) >> 16);
  2143       dsll(d, d, 16);
  2144       if (split_low(value)) {
  2145         ori(d, d, split_low(value));
  2147   } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
  2148     // 4 insts
  2149     li48(d, value);
  2150   } else {  // li64
  2151     // 6 insts
  2152     li64(d, value);
  2157 int MacroAssembler::insts_for_set64(jlong value) {
  2158   int hi = (int)(value >> 32);
  2159   int lo = (int)(value & ~0);
  2161   int count = 0;
  2163   if (value == lo) {  // 32-bit integer
  2164     if (is_simm16(value)) {
  2165       //daddiu(d, R0, value);
  2166       count++;
  2167     } else {
  2168       //lui(d, split_low(value >> 16));
  2169       count++;
  2170       if (split_low(value)) {
  2171         //ori(d, d, split_low(value));
  2172         count++;
  2175   } else if (hi == 0) {  // hardware zero-extends to upper 32
  2176       //ori(d, R0, julong(value) >> 16);
  2177       //dsll(d, d, 16);
  2178       count += 2;
  2179       if (split_low(value)) {
  2180         //ori(d, d, split_low(value));
  2181         count++;
  2183   } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
  2184     // 4 insts
  2185     //li48(d, value);
  2186     count += 4;
  2187   } else {  // li64
  2188     // 6 insts
  2189     //li64(d, value);
  2190     count += 6;
  2193   return count;
  2196 void MacroAssembler::patchable_set48(Register d, jlong value) {
  2197   assert_not_delayed();
  2199   int hi = (int)(value >> 32);
  2200   int lo = (int)(value & ~0);
  2202   int count = 0;
  2204   if (value == lo) {  // 32-bit integer
  2205     if (is_simm16(value)) {
  2206       daddiu(d, R0, value);
  2207       count += 1;
  2208     } else {
  2209       lui(d, split_low(value >> 16));
  2210       count += 1;
  2211       if (split_low(value)) {
  2212         ori(d, d, split_low(value));
  2213         count += 1;
  2216   } else if (hi == 0) {  // hardware zero-extends to upper 32
  2217       ori(d, R0, julong(value) >> 16);
  2218       dsll(d, d, 16);
  2219       count += 2;
  2220       if (split_low(value)) {
  2221         ori(d, d, split_low(value));
  2222         count += 1;
  2224   } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
  2225     // 4 insts
  2226     li48(d, value);
  2227     count += 4;
  2228   } else {  // li64
  2229     tty->print_cr("value = 0x%x", value);
  2230     guarantee(false, "Not supported yet !");
  2233   for (count; count < 4; count++) {
  2234     nop();
  2238 void MacroAssembler::patchable_set32(Register d, jlong value) {
  2239   assert_not_delayed();
  2241   int hi = (int)(value >> 32);
  2242   int lo = (int)(value & ~0);
  2244   int count = 0;
  2246   if (value == lo) {  // 32-bit integer
  2247     if (is_simm16(value)) {
  2248       daddiu(d, R0, value);
  2249       count += 1;
  2250     } else {
  2251       lui(d, split_low(value >> 16));
  2252       count += 1;
  2253       if (split_low(value)) {
  2254         ori(d, d, split_low(value));
  2255         count += 1;
  2258   } else if (hi == 0) {  // hardware zero-extends to upper 32
  2259       ori(d, R0, julong(value) >> 16);
  2260       dsll(d, d, 16);
  2261       count += 2;
  2262       if (split_low(value)) {
  2263         ori(d, d, split_low(value));
  2264         count += 1;
  2266   } else {
  2267     tty->print_cr("value = 0x%x", value);
  2268     guarantee(false, "Not supported yet !");
  2271   for (count; count < 3; count++) {
  2272     nop();
  2276 void MacroAssembler::patchable_call32(Register d, jlong value) {
  2277   assert_not_delayed();
  2279   int hi = (int)(value >> 32);
  2280   int lo = (int)(value & ~0);
  2282   int count = 0;
  2284   if (value == lo) {  // 32-bit integer
  2285     if (is_simm16(value)) {
  2286       daddiu(d, R0, value);
  2287       count += 1;
  2288     } else {
  2289       lui(d, split_low(value >> 16));
  2290       count += 1;
  2291       if (split_low(value)) {
  2292         ori(d, d, split_low(value));
  2293         count += 1;
  2296   } else {
  2297     tty->print_cr("value = 0x%x", value);
  2298     guarantee(false, "Not supported yet !");
  2301   for (count; count < 2; count++) {
  2302     nop();
  2306 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
  2307   assert(UseCompressedClassPointers, "should only be used for compressed header");
  2308   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
  2310   int klass_index = oop_recorder()->find_index(k);
  2311   RelocationHolder rspec = metadata_Relocation::spec(klass_index);
  2312   long narrowKlass = (long)Klass::encode_klass(k);
  2314   relocate(rspec, Assembler::narrow_oop_operand);
  2315   patchable_set48(dst, narrowKlass);
  2319 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
  2320   assert(UseCompressedOops, "should only be used for compressed header");
  2321   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
  2323   int oop_index = oop_recorder()->find_index(obj);
  2324   RelocationHolder rspec = oop_Relocation::spec(oop_index);
  2326   relocate(rspec, Assembler::narrow_oop_operand);
  2327   patchable_set48(dst, oop_index);
  2330 void MacroAssembler::li64(Register rd, long imm) {
  2331   assert_not_delayed();
  2332   lui(rd, imm >> 48);
  2333   ori(rd, rd, split_low(imm >> 32));
  2334   dsll(rd, rd, 16);
  2335   ori(rd, rd, split_low(imm >> 16));
  2336   dsll(rd, rd, 16);
  2337   ori(rd, rd, split_low(imm));
  2340 void MacroAssembler::li48(Register rd, long imm) {
  2341   assert_not_delayed();
  2342   assert(is_simm16(imm >> 32), "Not a 48-bit address");
  2343   lui(rd, imm >> 32);
  2344   ori(rd, rd, split_low(imm >> 16));
  2345   dsll(rd, rd, 16);
  2346   ori(rd, rd, split_low(imm));
  2348 #endif
  2349 // NOTE: i dont push eax as i486.
  2350 // the x86 save eax for it use eax as the jump register
  2351 void MacroAssembler::verify_oop(Register reg, const char* s) {
  2352   /*
  2353      if (!VerifyOops) return;
  2355   // Pass register number to verify_oop_subroutine
  2356   char* b = new char[strlen(s) + 50];
  2357   sprintf(b, "verify_oop: %s: %s", reg->name(), s);
  2358   push(rax);                          // save rax,
  2359   push(reg);                          // pass register argument
  2360   ExternalAddress buffer((address) b);
  2361   // avoid using pushptr, as it modifies scratch registers
  2362   // and our contract is not to modify anything
  2363   movptr(rax, buffer.addr());
  2364   push(rax);
  2365   // call indirectly to solve generation ordering problem
  2366   movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
  2367   call(rax);
  2368    */
  2369   if (!VerifyOops) return;
  2370   const char * b = NULL;
  2371   stringStream ss;
  2372   ss.print("verify_oop: %s: %s", reg->name(), s);
  2373   b = code_string(ss.as_string());
  2374 #ifdef _LP64
  2375   pushad();
  2376   move(A1, reg);
  2377   li(A0, (long)b);
  2378   li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
  2379   ld(T9, AT, 0);
  2380   jalr(T9);
  2381   delayed()->nop();
  2382   popad();
  2383 #else
  2384   // Pass register number to verify_oop_subroutine
  2385   sw(T0, SP, - wordSize);
  2386   sw(T1, SP, - 2*wordSize);
  2387   sw(RA, SP, - 3*wordSize);
  2388   sw(A0, SP ,- 4*wordSize);
  2389   sw(A1, SP ,- 5*wordSize);
  2390   sw(AT, SP ,- 6*wordSize);
  2391   sw(T9, SP ,- 7*wordSize);
  2392   addiu(SP, SP, - 7 * wordSize);
  2393   move(A1, reg);
  2394   li(A0, (long)b);
  2395   // call indirectly to solve generation ordering problem
  2396   li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
  2397   lw(T9, AT, 0);
  2398   jalr(T9);
  2399   delayed()->nop();
  2400   lw(T0, SP, 6* wordSize);
  2401   lw(T1, SP, 5* wordSize);
  2402   lw(RA, SP, 4* wordSize);
  2403   lw(A0, SP, 3* wordSize);
  2404   lw(A1, SP, 2* wordSize);
  2405   lw(AT, SP, 1* wordSize);
  2406   lw(T9, SP, 0* wordSize);
  2407   addiu(SP, SP, 7 * wordSize);
  2408 #endif
  2412 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
  2413   if (!VerifyOops) {
  2414     nop();
  2415     return;
  2417   // Pass register number to verify_oop_subroutine
  2418   const char * b = NULL;
  2419   stringStream ss;
  2420   ss.print("verify_oop_addr: %s",  s);
  2421   b = code_string(ss.as_string());
  2423   st_ptr(T0, SP, - wordSize);
  2424   st_ptr(T1, SP, - 2*wordSize);
  2425   st_ptr(RA, SP, - 3*wordSize);
  2426   st_ptr(A0, SP, - 4*wordSize);
  2427   st_ptr(A1, SP, - 5*wordSize);
  2428   st_ptr(AT, SP, - 6*wordSize);
  2429   st_ptr(T9, SP, - 7*wordSize);
  2430   ld_ptr(A1, addr);   // addr may use SP, so load from it before change SP
  2431   addiu(SP, SP, - 7 * wordSize);
  2433   li(A0, (long)b);
  2434   // call indirectly to solve generation ordering problem
  2435   li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
  2436   ld_ptr(T9, AT, 0);
  2437   jalr(T9);
  2438   delayed()->nop();
  2439   ld_ptr(T0, SP, 6* wordSize);
  2440   ld_ptr(T1, SP, 5* wordSize);
  2441   ld_ptr(RA, SP, 4* wordSize);
  2442   ld_ptr(A0, SP, 3* wordSize);
  2443   ld_ptr(A1, SP, 2* wordSize);
  2444   ld_ptr(AT, SP, 1* wordSize);
  2445   ld_ptr(T9, SP, 0* wordSize);
  2446   addiu(SP, SP, 7 * wordSize);
  2449 // used registers :  T0, T1
  2450 void MacroAssembler::verify_oop_subroutine() {
  2451   // RA: ra
  2452   // A0: char* error message
  2453   // A1: oop   object to verify
  2455   Label exit, error;
  2456   // increment counter
  2457   li(T0, (long)StubRoutines::verify_oop_count_addr());
  2458   lw(AT, T0, 0);
  2459 #ifdef _LP64
  2460   daddi(AT, AT, 1);
  2461 #else
  2462   addi(AT, AT, 1);
  2463 #endif
  2464   sw(AT, T0, 0);
  2466   // make sure object is 'reasonable'
  2467   beq(A1, R0, exit);         // if obj is NULL it is ok
  2468   delayed()->nop();
  2470   // Check if the oop is in the right area of memory
  2471   //const int oop_mask = Universe::verify_oop_mask();
  2472   //const int oop_bits = Universe::verify_oop_bits();
  2473   const uintptr_t oop_mask = Universe::verify_oop_mask();
  2474   const uintptr_t oop_bits = Universe::verify_oop_bits();
  2475   li(AT, oop_mask);
  2476   andr(T0, A1, AT);
  2477   li(AT, oop_bits);
  2478   bne(T0, AT, error);
  2479   delayed()->nop();
  2481   // make sure klass is 'reasonable'
  2482   //add for compressedoops
  2483   reinit_heapbase();
  2484   //add for compressedoops
  2485   load_klass(T0, A1);
  2486   beq(T0, R0, error);                        // if klass is NULL it is broken
  2487   delayed()->nop();
  2488   #if 0
  2489   //FIXME:wuhui.
  2490   // Check if the klass is in the right area of memory
  2491   //const int klass_mask = Universe::verify_klass_mask();
  2492   //const int klass_bits = Universe::verify_klass_bits();
  2493   const uintptr_t klass_mask = Universe::verify_klass_mask();
  2494   const uintptr_t klass_bits = Universe::verify_klass_bits();
  2496   li(AT, klass_mask);
  2497   andr(T1, T0, AT);
  2498   li(AT, klass_bits);
  2499   bne(T1, AT, error);
  2500   delayed()->nop();
  2501   // make sure klass' klass is 'reasonable'
  2502   //add for compressedoops
  2503   load_klass(T0, T0);
  2504   beq(T0, R0, error);  // if klass' klass is NULL it is broken
  2505   delayed()->nop();
  2507   li(AT, klass_mask);
  2508   andr(T1, T0, AT);
  2509   li(AT, klass_bits);
  2510   bne(T1, AT, error);
  2511   delayed()->nop();     // if klass not in right area of memory it is broken too.
  2512 #endif
  2513   // return if everything seems ok
  2514   bind(exit);
  2516   jr(RA);
  2517   delayed()->nop();
  2519   // handle errors
  2520   bind(error);
  2521   pushad();
  2522 #ifndef _LP64
  2523   addi(SP, SP, (-1) * wordSize);
  2524 #endif
  2525   call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
  2526   delayed()->nop();
  2527 #ifndef _LP64
  2528   addiu(SP, SP, 1 * wordSize);
  2529 #endif
  2530   popad();
  2531   jr(RA);
  2532   delayed()->nop();
  2535 void MacroAssembler::verify_tlab(Register t1, Register t2) {
  2536 #ifdef ASSERT
  2537   assert_different_registers(t1, t2, AT);
  2538   if (UseTLAB && VerifyOops) {
  2539     Label next, ok;
  2541     get_thread(t1);
  2543     ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset()));
  2544     ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset()));
  2545     sltu(AT, t2, AT);
  2546     beq(AT, R0, next);
  2547     delayed()->nop();
  2549     stop("assert(top >= start)");
  2551     bind(next);
  2552     ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset()));
  2553     sltu(AT, AT, t2);
  2554     beq(AT, R0, ok);
  2555     delayed()->nop();
  2557     stop("assert(top <= end)");
  2559     bind(ok);
  2562 #endif
  2564  RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
  2565                                                        Register tmp,
  2566                                                        int offset) {
  2567    intptr_t value = *delayed_value_addr;
  2568    if (value != 0)
  2569    return RegisterOrConstant(value + offset);
  2570    AddressLiteral a(delayed_value_addr);
  2571    // load indirectly to solve generation ordering problem
  2572    //movptr(tmp, ExternalAddress((address) delayed_value_addr));
  2573    //ld(tmp, a);
  2574    if (offset != 0)
  2575      daddi(tmp,tmp, offset);
  2577    return RegisterOrConstant(tmp);
  2580 void MacroAssembler::hswap(Register reg) {
  2581   //short
  2582   //andi(reg, reg, 0xffff);
  2583   srl(AT, reg, 8);
  2584   sll(reg, reg, 24);
  2585   sra(reg, reg, 16);
  2586   orr(reg, reg, AT);
  2589 void MacroAssembler::huswap(Register reg) {
  2590 #ifdef _LP64
  2591   dsrl(AT, reg, 8);
  2592   dsll(reg, reg, 24);
  2593   dsrl(reg, reg, 16);
  2594   orr(reg, reg, AT);
  2595   andi(reg, reg, 0xffff);
  2596 #else
  2597   //andi(reg, reg, 0xffff);
  2598   srl(AT, reg, 8);
  2599   sll(reg, reg, 24);
  2600   srl(reg, reg, 16);
  2601   orr(reg, reg, AT);
  2602 #endif
  2605 // something funny to do this will only one more register AT
  2606 // 32 bits
  2607 void MacroAssembler::swap(Register reg) {
  2608   srl(AT, reg, 8);
  2609   sll(reg, reg, 24);
  2610   orr(reg, reg, AT);
  2611   //reg : 4 1 2 3
  2612   srl(AT, AT, 16);
  2613   xorr(AT, AT, reg);
  2614   andi(AT, AT, 0xff);
  2615   //AT : 0 0 0 1^3);
  2616   xorr(reg, reg, AT);
  2617   //reg : 4 1 2 1
  2618   sll(AT, AT, 16);
  2619   xorr(reg, reg, AT);
  2620   //reg : 4 3 2 1
  2623 #ifdef _LP64
  2625 /* do 32-bit CAS using MIPS64 lld/scd
  2627   Jin: cas_int should only compare 32-bits of the memory value.
  2628        However, lld/scd will do 64-bit operation, which violates the intention of cas_int.
  2629        To simulate a 32-bit atomic operation, the value loaded with LLD should be split into
  2630        tow halves, and only the low-32 bits is compared. If equals, the low-32 bits of newval,
  2631        plus the high-32 bits or memory value, are stored togethor with SCD.
  2633 Example:
  2635       double d = 3.1415926;
  2636       System.err.println("hello" + d);
  2638   sun.misc.FloatingDecimal$1.<init>()
  2640    `- java.util.concurrent.atomic.AtomicInteger::compareAndSet()
  2642   38 cas_int [a7a7|J] [a0|I] [a6|I]
  2643 // a0: 0xffffffffe8ea9f63 pc: 0x55647f3354
  2644 // a6: 0x4ab325aa
  2646 again:
  2647    0x00000055647f3c5c: lld at, 0x0(a7)                          ; 64-bit load, "0xe8ea9f63"
  2649    0x00000055647f3c60: sll t9, at, 0                            ; t9: low-32 bits (sign extended)
  2650    0x00000055647f3c64: dsrl32 t8, at, 0                         ; t8: high-32 bits
  2651    0x00000055647f3c68: dsll32 t8, t8, 0
  2652    0x00000055647f3c6c: bne t9, a0, 0x00000055647f3c9c           ; goto nequal
  2653    0x00000055647f3c70: sll zero, zero, 0
  2655    0x00000055647f3c74: ori v1, zero, 0xffffffff                 ; v1: low-32 bits of newval (sign unextended)
  2656    0x00000055647f3c78: dsll v1, v1, 16                          ; v1 = a6 & 0xFFFFFFFF;
  2657    0x00000055647f3c7c: ori v1, v1, 0xffffffff
  2658    0x00000055647f3c80: and v1, a6, v1
  2659    0x00000055647f3c84: or at, t8, v1
  2660    0x00000055647f3c88: scd at, 0x0(a7)
  2661    0x00000055647f3c8c: beq at, zero, 0x00000055647f3c5c         ; goto again
  2662    0x00000055647f3c90: sll zero, zero, 0
  2663    0x00000055647f3c94: beq zero, zero, 0x00000055647f45ac       ; goto done
  2664    0x00000055647f3c98: sll zero, zero, 0
  2665 nequal:
  2666    0x00000055647f45a4: dadd a0, t9, zero
  2667    0x00000055647f45a8: dadd at, zero, zero
  2668 done:
  2669 */
  2671 void MacroAssembler::cmpxchg32(Register x_reg, Address dest, Register c_reg) {
  2672   /* 2012/11/11 Jin: MIPS64 can use ll/sc for 32-bit atomic memory access */
  2673   Label done, again, nequal;
  2675   bind(again);
  2677   if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
  2678   ll(AT, dest);
  2679   bne(AT, c_reg, nequal);
  2680   delayed()->nop();
  2682   move(AT, x_reg);
  2683   sc(AT, dest);
  2684   beq(AT, R0, again);
  2685   delayed()->nop();
  2686   b(done);
  2687   delayed()->nop();
  2689   // not xchged
  2690   bind(nequal);
  2691   sync();
  2692   move(c_reg, AT);
  2693   move(AT, R0);
  2695   bind(done);
  2697 #endif  // cmpxchg32
  2699 void MacroAssembler::cmpxchg(Register x_reg, Address dest, Register c_reg) {
  2700   Label done, again, nequal;
  2702   bind(again);
  2703   if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
  2704 #ifdef _LP64
  2705   lld(AT, dest);
  2706 #else
  2707   ll(AT, dest);
  2708 #endif
  2709   bne(AT, c_reg, nequal);
  2710   delayed()->nop();
  2712   move(AT, x_reg);
  2713 #ifdef _LP64
  2714   scd(AT, dest);
  2715 #else
  2716   sc(AT, dest);
  2717 #endif
  2718   beq(AT, R0, again);
  2719   delayed()->nop();
  2720   b(done);
  2721   delayed()->nop();
  2723   // not xchged
  2724   bind(nequal);
  2725   sync();
  2726   move(c_reg, AT);
  2727   move(AT, R0);
  2729   bind(done);
  2732 void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) {
  2733   Label done, again, nequal;
  2735   Register x_reg = x_regLo;
  2736   dsll32(x_regHi, x_regHi, 0);
  2737   dsll32(x_regLo, x_regLo, 0);
  2738   dsrl32(x_regLo, x_regLo, 0);
  2739   orr(x_reg, x_regLo, x_regHi);
  2741   Register c_reg = c_regLo;
  2742   dsll32(c_regHi, c_regHi, 0);
  2743   dsll32(c_regLo, c_regLo, 0);
  2744   dsrl32(c_regLo, c_regLo, 0);
  2745   orr(c_reg, c_regLo, c_regHi);
  2747   bind(again);
  2749   if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
  2750   lld(AT, dest);
  2751   bne(AT, c_reg, nequal);
  2752   delayed()->nop();
  2754   //move(AT, x_reg);
  2755   dadd(AT, x_reg, R0);
  2756   scd(AT, dest);
  2757   beq(AT, R0, again);
  2758   delayed()->nop();
  2759   b(done);
  2760   delayed()->nop();
  2762   // not xchged
  2763   bind(nequal);
  2764   sync();
  2765   //move(c_reg, AT);
  2766   //move(AT, R0);
  2767   dadd(c_reg, AT, R0);
  2768   dadd(AT, R0, R0);
  2769   bind(done);
  2772 // be sure the three register is different
  2773 void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
  2774   assert_different_registers(tmp, fs, ft);
  2775   div_s(tmp, fs, ft);
  2776   trunc_l_s(tmp, tmp);
  2777   cvt_s_l(tmp, tmp);
  2778   mul_s(tmp, tmp, ft);
  2779   sub_s(fd, fs, tmp);
  2782 // be sure the three register is different
  2783 void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
  2784   assert_different_registers(tmp, fs, ft);
  2785   div_d(tmp, fs, ft);
  2786   trunc_l_d(tmp, tmp);
  2787   cvt_d_l(tmp, tmp);
  2788   mul_d(tmp, tmp, ft);
  2789   sub_d(fd, fs, tmp);
  2792 // Fast_Lock and Fast_Unlock used by C2
  2794 // Because the transitions from emitted code to the runtime
  2795 // monitorenter/exit helper stubs are so slow it's critical that
  2796 // we inline both the stack-locking fast-path and the inflated fast path.
  2797 //
  2798 // See also: cmpFastLock and cmpFastUnlock.
  2799 //
  2800 // What follows is a specialized inline transliteration of the code
  2801 // in slow_enter() and slow_exit().  If we're concerned about I$ bloat
  2802 // another option would be to emit TrySlowEnter and TrySlowExit methods
  2803 // at startup-time.  These methods would accept arguments as
  2804 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
  2805 // indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
  2806 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
  2807 // In practice, however, the # of lock sites is bounded and is usually small.
  2808 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
  2809 // if the processor uses simple bimodal branch predictors keyed by EIP
  2810 // Since the helper routines would be called from multiple synchronization
  2811 // sites.
  2812 //
  2813 // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
  2814 // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
  2815 // to those specialized methods.  That'd give us a mostly platform-independent
  2816 // implementation that the JITs could optimize and inline at their pleasure.
  2817 // Done correctly, the only time we'd need to cross to native could would be
  2818 // to park() or unpark() threads.  We'd also need a few more unsafe operators
  2819 // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
  2820 // (b) explicit barriers or fence operations.
  2821 //
  2822 // TODO:
  2823 //
  2824 // *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
  2825 //    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
  2826 //    Given TLAB allocation, Self is usually manifested in a register, so passing it into
  2827 //    the lock operators would typically be faster than reifying Self.
  2828 //
  2829 // *  Ideally I'd define the primitives as:
  2830 //       fast_lock   (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
  2831 //       fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
  2832 //    Unfortunately ADLC bugs prevent us from expressing the ideal form.
  2833 //    Instead, we're stuck with a rather awkward and brittle register assignments below.
  2834 //    Furthermore the register assignments are overconstrained, possibly resulting in
  2835 //    sub-optimal code near the synchronization site.
  2836 //
  2837 // *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
  2838 //    Alternately, use a better sp-proximity test.
  2839 //
  2840 // *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
  2841 //    Either one is sufficient to uniquely identify a thread.
  2842 //    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
  2843 //
  2844 // *  Intrinsify notify() and notifyAll() for the common cases where the
  2845 //    object is locked by the calling thread but the waitlist is empty.
  2846 //    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
  2847 //
  2848 // *  use jccb and jmpb instead of jcc and jmp to improve code density.
  2849 //    But beware of excessive branch density on AMD Opterons.
  2850 //
  2851 // *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
  2852 //    or failure of the fast-path.  If the fast-path fails then we pass
  2853 //    control to the slow-path, typically in C.  In Fast_Lock and
  2854 //    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
  2855 //    will emit a conditional branch immediately after the node.
  2856 //    So we have branches to branches and lots of ICC.ZF games.
  2857 //    Instead, it might be better to have C2 pass a "FailureLabel"
  2858 //    into Fast_Lock and Fast_Unlock.  In the case of success, control
  2859 //    will drop through the node.  ICC.ZF is undefined at exit.
  2860 //    In the case of failure, the node will branch directly to the
  2861 //    FailureLabel
  2864 // obj: object to lock
  2865 // box: on-stack box address (displaced header location) - KILLED
  2866 // rax,: tmp -- KILLED
  2867 // scr: tmp -- KILLED
  2868 void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg) {
  2870   // Ensure the register assignents are disjoint
  2871   guarantee (objReg != boxReg, "") ;
  2872   guarantee (objReg != tmpReg, "") ;
  2873   guarantee (objReg != scrReg, "") ;
  2874   guarantee (boxReg != tmpReg, "") ;
  2875   guarantee (boxReg != scrReg, "") ;
  2878   block_comment("FastLock");
  2879   /*
  2880      move(AT, 0x0);
  2881      return;
  2882      */
  2883   if (PrintBiasedLockingStatistics) {
  2884     push(tmpReg);
  2885     atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, AT, tmpReg);
  2886     pop(tmpReg);
  2889   if (EmitSync & 1) {
  2890     move(AT, 0x0);
  2891     return;
  2892   } else
  2893     if (EmitSync & 2) {
  2894       Label DONE_LABEL ;
  2895       if (UseBiasedLocking) {
  2896         // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
  2897         biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
  2900       ld(tmpReg, Address(objReg, 0)) ;          // fetch markword
  2901       ori(tmpReg, tmpReg, 0x1);
  2902       sd(tmpReg, Address(boxReg, 0));           // Anticipate successful CAS
  2904       cmpxchg(boxReg, Address(objReg, 0), tmpReg);          // Updates tmpReg
  2905       bne(AT, R0, DONE_LABEL);
  2906       delayed()->nop();
  2908       // Recursive locking
  2909       dsubu(tmpReg, tmpReg, SP);
  2910       li(AT, (7 - os::vm_page_size() ));
  2911       andr(tmpReg, tmpReg, AT);
  2912       sd(tmpReg, Address(boxReg, 0));
  2913       bind(DONE_LABEL) ;
  2914     } else {
  2915       // Possible cases that we'll encounter in fast_lock
  2916       // ------------------------------------------------
  2917       // * Inflated
  2918       //    -- unlocked
  2919       //    -- Locked
  2920       //       = by self
  2921       //       = by other
  2922       // * biased
  2923       //    -- by Self
  2924       //    -- by other
  2925       // * neutral
  2926       // * stack-locked
  2927       //    -- by self
  2928       //       = sp-proximity test hits
  2929       //       = sp-proximity test generates false-negative
  2930       //    -- by other
  2931       //
  2933       Label IsInflated, DONE_LABEL, PopDone ;
  2935       // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
  2936       // order to reduce the number of conditional branches in the most common cases.
  2937       // Beware -- there's a subtle invariant that fetch of the markword
  2938       // at [FETCH], below, will never observe a biased encoding (*101b).
  2939       // If this invariant is not held we risk exclusion (safety) failure.
  2940       if (UseBiasedLocking && !UseOptoBiasInlining) {
  2941         biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
  2944       ld(tmpReg, Address(objReg, 0)) ;         //Fetch the markword of the object.
  2945       andi(AT, tmpReg, markOopDesc::monitor_value);
  2946       bne(AT, R0, IsInflated);                      // inflated vs stack-locked|neutral|bias
  2947       delayed()->nop();
  2949       // Attempt stack-locking ...
  2950       ori (tmpReg, tmpReg, markOopDesc::unlocked_value);
  2951       sd(tmpReg, Address(boxReg, 0));          // Anticipate successful CAS
  2952       //if (os::is_MP()) {
  2953       //  sync();
  2954       //}
  2956       cmpxchg(boxReg, Address(objReg, 0), tmpReg);           // Updates tmpReg
  2957       //AT == 1: unlocked
  2959       if (PrintBiasedLockingStatistics) {
  2960         Label L;
  2961         beq(AT, R0, L);
  2962         delayed()->nop();
  2963         push(T0);
  2964         push(T1);
  2965         atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
  2966         pop(T1);
  2967         pop(T0);
  2968         bind(L);
  2970       bne(AT, R0, DONE_LABEL);
  2971       delayed()->nop();
  2973       // Recursive locking
  2974       // The object is stack-locked: markword contains stack pointer to BasicLock.
  2975       // Locked by current thread if difference with current SP is less than one page.
  2976       dsubu(tmpReg, tmpReg, SP);
  2977       li(AT, 7 - os::vm_page_size() );
  2978       andr(tmpReg, tmpReg, AT);
  2979       sd(tmpReg, Address(boxReg, 0));
  2980       if (PrintBiasedLockingStatistics) {
  2981         Label L;
  2982         // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++
  2983         bne(tmpReg, R0, L);
  2984         delayed()->nop();
  2985         push(T0);
  2986         push(T1);
  2987         atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
  2988         pop(T1);
  2989         pop(T0);
  2990         bind(L);
  2992       sltiu(AT, tmpReg, 1); /* AT = (tmpReg == 0) ? 1 : 0 */
  2994       b(DONE_LABEL) ;
  2995       delayed()->nop();
  2997       bind(IsInflated) ;
  2998       // The object's monitor m is unlocked iff m->owner == NULL,
  2999       // otherwise m->owner may contain a thread or a stack address.
  3001       // TODO: someday avoid the ST-before-CAS penalty by
  3002       // relocating (deferring) the following ST.
  3003       // We should also think about trying a CAS without having
  3004       // fetched _owner.  If the CAS is successful we may
  3005       // avoid an RTO->RTS upgrade on the $line.
  3006       // Without cast to int32_t a movptr will destroy r10 which is typically obj
  3007       li(AT, (int32_t)intptr_t(markOopDesc::unused_mark()));
  3008       sd(AT, Address(boxReg, 0));
  3010       move(boxReg, tmpReg) ;
  3011       ld(tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
  3012       // if (m->owner != 0) => AT = 0, goto slow path.
  3013       move(AT, R0);
  3014       bne(tmpReg, R0, DONE_LABEL);
  3015       delayed()->nop();
  3017 #ifndef OPT_THREAD
  3018       get_thread (TREG) ;
  3019 #endif
  3020       // It's inflated and appears unlocked
  3021       //if (os::is_MP()) {
  3022       //  sync();
  3023       //}
  3024       cmpxchg(TREG, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), tmpReg) ;
  3025       // Intentional fall-through into DONE_LABEL ...
  3028       // DONE_LABEL is a hot target - we'd really like to place it at the
  3029       // start of cache line by padding with NOPs.
  3030       // See the AMD and Intel software optimization manuals for the
  3031       // most efficient "long" NOP encodings.
  3032       // Unfortunately none of our alignment mechanisms suffice.
  3033       bind(DONE_LABEL);
  3035       // At DONE_LABEL the AT is set as follows ...
  3036       // Fast_Unlock uses the same protocol.
  3037       // AT == 1 -> Success
  3038       // AT == 0 -> Failure - force control through the slow-path
  3040       // Avoid branch-to-branch on AMD processors
  3041       // This appears to be superstition.
  3042       if (EmitSync & 32) nop() ;
  3047 // obj: object to unlock
  3048 // box: box address (displaced header location), killed.  Must be EAX.
  3049 // rbx,: killed tmp; cannot be obj nor box.
  3050 //
  3051 // Some commentary on balanced locking:
  3052 //
  3053 // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
  3054 // Methods that don't have provably balanced locking are forced to run in the
  3055 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
  3056 // The interpreter provides two properties:
  3057 // I1:  At return-time the interpreter automatically and quietly unlocks any
  3058 //      objects acquired the current activation (frame).  Recall that the
  3059 //      interpreter maintains an on-stack list of locks currently held by
  3060 //      a frame.
  3061 // I2:  If a method attempts to unlock an object that is not held by the
  3062 //      the frame the interpreter throws IMSX.
  3063 //
  3064 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
  3065 // B() doesn't have provably balanced locking so it runs in the interpreter.
  3066 // Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
  3067 // is still locked by A().
  3068 //
  3069 // The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
  3070 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
  3071 // should not be unlocked by "normal" java-level locking and vice-versa.  The specification
  3072 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
  3074 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) {
  3076   guarantee (objReg != boxReg, "") ;
  3077   guarantee (objReg != tmpReg, "") ;
  3078   guarantee (boxReg != tmpReg, "") ;
  3082   block_comment("FastUnlock");
  3085   if (EmitSync & 4) {
  3086     // Disable - inhibit all inlining.  Force control through the slow-path
  3087     move(AT, 0x0);
  3088     return;
  3089   } else
  3090     if (EmitSync & 8) {
  3091       Label DONE_LABEL ;
  3092       if (UseBiasedLocking) {
  3093         biased_locking_exit(objReg, tmpReg, DONE_LABEL);
  3095       // classic stack-locking code ...
  3096       ld(tmpReg, Address(boxReg, 0)) ;
  3097       beq(tmpReg, R0, DONE_LABEL) ;
  3098       move(AT, 0x1);  // delay slot
  3100       cmpxchg(tmpReg, Address(objReg, 0), boxReg);          // Uses EAX which is box
  3101       bind(DONE_LABEL);
  3102     } else {
  3103       Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
  3105       // Critically, the biased locking test must have precedence over
  3106       // and appear before the (box->dhw == 0) recursive stack-lock test.
  3107       if (UseBiasedLocking && !UseOptoBiasInlining) {
  3108         biased_locking_exit(objReg, tmpReg, DONE_LABEL);
  3111       ld(AT, Address(boxReg, 0)) ;            // Examine the displaced header
  3112       beq(AT, R0, DONE_LABEL) ;      // 0 indicates recursive stack-lock
  3113       delayed()->daddiu(AT, R0, 0x1);
  3115       ld(tmpReg, Address(objReg, 0)) ;       // Examine the object's markword
  3116       andi(AT, tmpReg, markOopDesc::monitor_value) ;                     // Inflated?
  3117       beq(AT, R0, Stacked) ;                     // Inflated?
  3118       delayed()->nop();
  3120       bind(Inflated) ;
  3121       // It's inflated.
  3122       // Despite our balanced locking property we still check that m->_owner == Self
  3123       // as java routines or native JNI code called by this thread might
  3124       // have released the lock.
  3125       // Refer to the comments in synchronizer.cpp for how we might encode extra
  3126       // state in _succ so we can avoid fetching EntryList|cxq.
  3127       //
  3128       // I'd like to add more cases in fast_lock() and fast_unlock() --
  3129       // such as recursive enter and exit -- but we have to be wary of
  3130       // I$ bloat, T$ effects and BP$ effects.
  3131       //
  3132       // If there's no contention try a 1-0 exit.  That is, exit without
  3133       // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
  3134       // we detect and recover from the race that the 1-0 exit admits.
  3135       //
  3136       // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
  3137       // before it STs null into _owner, releasing the lock.  Updates
  3138       // to data protected by the critical section must be visible before
  3139       // we drop the lock (and thus before any other thread could acquire
  3140       // the lock and observe the fields protected by the lock).
  3141       // IA32's memory-model is SPO, so STs are ordered with respect to
  3142       // each other and there's no need for an explicit barrier (fence).
  3143       // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
  3144 #ifndef OPT_THREAD
  3145       get_thread (TREG) ;
  3146 #endif
  3148       // It's inflated
  3149       ld(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
  3150       xorr(boxReg, boxReg, TREG);
  3152       ld(AT, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
  3153       orr(boxReg, boxReg, AT);
  3155       move(AT, R0);
  3156       bne(boxReg, R0, DONE_LABEL);
  3157       delayed()->nop();
  3159       ld(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
  3160       ld(AT, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
  3161       orr(boxReg, boxReg, AT);
  3163       move(AT, R0);
  3164       bne(boxReg, R0, DONE_LABEL);
  3165       delayed()->nop();
  3167       sync();
  3168       sd(R0, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
  3169       move(AT, 0x1);
  3170       b(DONE_LABEL);
  3171       delayed()->nop();
  3173       bind  (Stacked);
  3174       ld(tmpReg, Address(boxReg, 0)) ;
  3175       //if (os::is_MP()) { sync(); }
  3176       cmpxchg(tmpReg, Address(objReg, 0), boxReg);
  3178       if (EmitSync & 65536) {
  3179         bind (CheckSucc);
  3182       bind(DONE_LABEL);
  3184       // Avoid branch to branch on AMD processors
  3185       if (EmitSync & 32768) { nop() ; }
  3189 void MacroAssembler::align(int modulus) {
  3190   while (offset() % modulus != 0) nop();
  3194 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
  3195   //Unimplemented();
  3198 #ifdef _LP64
  3199 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
  3201 /* FIXME: Jin: In MIPS64, F0~23 are all caller-saved registers */
  3202 FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13};
  3203 #else
  3204 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, T4, T5, T6, T7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
  3206 Register caller_saved_fpu_registers[] = {};
  3207 #endif
  3209 //We preserve all caller-saved register
  3210 void  MacroAssembler::pushad(){
  3211   int i;
  3213   /* Fixed-point registers */
  3214   int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
  3215   daddi(SP, SP, -1 * len * wordSize);
  3216   for (i = 0; i < len; i++)
  3218 #ifdef _LP64
  3219     sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
  3220 #else
  3221     sw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
  3222 #endif
  3225   /* Floating-point registers */
  3226   len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
  3227   daddi(SP, SP, -1 * len * wordSize);
  3228   for (i = 0; i < len; i++)
  3230 #ifdef _LP64
  3231     sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3232 #else
  3233     swc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3234 #endif
  3236 };
  3238 void  MacroAssembler::popad(){
  3239   int i;
  3241   /* Floating-point registers */
  3242   int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
  3243   for (i = 0; i < len; i++)
  3245 #ifdef _LP64
  3246     ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3247 #else
  3248     lwc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3249 #endif
  3251   daddi(SP, SP, len * wordSize);
  3253   /* Fixed-point registers */
  3254   len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
  3255   for (i = 0; i < len; i++)
  3257 #ifdef _LP64
  3258     ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
  3259 #else
  3260     lw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
  3261 #endif
  3263   daddi(SP, SP, len * wordSize);
  3264 };
  3266 void MacroAssembler::push2(Register reg1, Register reg2) {
  3267 #ifdef _LP64
  3268   daddi(SP, SP, -16);
  3269   sd(reg2, SP, 0);
  3270   sd(reg1, SP, 8);
  3271 #else
  3272   addi(SP, SP, -8);
  3273   sw(reg2, SP, 0);
  3274   sw(reg1, SP, 4);
  3275 #endif
  3278 void MacroAssembler::pop2(Register reg1, Register reg2) {
  3279 #ifdef _LP64
  3280   ld(reg1, SP, 0);
  3281   ld(reg2, SP, 8);
  3282   daddi(SP, SP, 16);
  3283 #else
  3284   lw(reg1, SP, 0);
  3285   lw(reg2, SP, 4);
  3286   addi(SP, SP, 8);
  3287 #endif
  3290 //for UseCompressedOops Option
  3291 void MacroAssembler::load_klass(Register dst, Register src) {
  3292 #ifdef _LP64
  3293   if(UseCompressedClassPointers){
  3294     lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
  3295     decode_klass_not_null(dst);
  3296   } else
  3297 #endif
  3298   ld(dst, src, oopDesc::klass_offset_in_bytes());
  3301 void MacroAssembler::store_klass(Register dst, Register src) {
  3302 #ifdef _LP64
  3303   if(UseCompressedClassPointers){
  3304     encode_klass_not_null(src);
  3305     sw(src, dst, oopDesc::klass_offset_in_bytes());
  3306   } else {
  3307 #endif
  3308     sd(src, dst, oopDesc::klass_offset_in_bytes());
  3312 void MacroAssembler::load_prototype_header(Register dst, Register src) {
  3313   load_klass(dst, src);
  3314   ld(dst, Address(dst, Klass::prototype_header_offset()));
  3317 #ifdef _LP64
  3318 void MacroAssembler::store_klass_gap(Register dst, Register src) {
  3319   if (UseCompressedClassPointers) {
  3320     sw(src, dst, oopDesc::klass_gap_offset_in_bytes());
  3324 void MacroAssembler::load_heap_oop(Register dst, Address src) {
  3325   if(UseCompressedOops){
  3326     lwu(dst, src);
  3327     decode_heap_oop(dst);
  3328   } else {
  3329     ld(dst, src);
  3333 void MacroAssembler::store_heap_oop(Address dst, Register src){
  3334   if(UseCompressedOops){
  3335     assert(!dst.uses(src), "not enough registers");
  3336     encode_heap_oop(src);
  3337     sw(src, dst);
  3338   } else {
  3339     sd(src, dst);
  3343 void MacroAssembler::store_heap_oop_null(Address dst){
  3344   if(UseCompressedOops){
  3345     sw(R0, dst);
  3346   } else {
  3347     sd(R0, dst);
  3351 #ifdef ASSERT
  3352 void MacroAssembler::verify_heapbase(const char* msg) {
  3353   assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed");
  3354   assert (Universe::heap() != NULL, "java heap should be initialized");
  3356 #endif
  3359 // Algorithm must match oop.inline.hpp encode_heap_oop.
  3360 void MacroAssembler::encode_heap_oop(Register r) {
  3361 #ifdef ASSERT
  3362   verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
  3363 #endif
  3364   verify_oop(r, "broken oop in encode_heap_oop");
  3365   if (Universe::narrow_oop_base() == NULL) {
  3366     if (Universe::narrow_oop_shift() != 0) {
  3367       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3368       shr(r, LogMinObjAlignmentInBytes);
  3370     return;
  3373   movz(r, S5_heapbase, r);
  3374   dsub(r, r, S5_heapbase);
  3375   if (Universe::narrow_oop_shift() != 0) {
  3376     assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3377     shr(r, LogMinObjAlignmentInBytes);
  3381 void MacroAssembler::encode_heap_oop(Register dst, Register src) {
  3382 #ifdef ASSERT
  3383   verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
  3384 #endif
  3385   verify_oop(src, "broken oop in encode_heap_oop");
  3386   if (Universe::narrow_oop_base() == NULL) {
  3387     if (Universe::narrow_oop_shift() != 0) {
  3388       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3389       dsrl(dst, src, LogMinObjAlignmentInBytes);
  3390     } else {
  3391       if (dst != src) move(dst, src);
  3393   } else {
  3394     if (dst == src) {
  3395       movz(dst, S5_heapbase, dst);
  3396       dsub(dst, dst, S5_heapbase);
  3397       if (Universe::narrow_oop_shift() != 0) {
  3398         assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3399         shr(dst, LogMinObjAlignmentInBytes);
  3401     } else {
  3402       dsub(dst, src, S5_heapbase);
  3403       if (Universe::narrow_oop_shift() != 0) {
  3404         assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3405         shr(dst, LogMinObjAlignmentInBytes);
  3407       movz(dst, R0, src);
  3412 void MacroAssembler::encode_heap_oop_not_null(Register r) {
  3413   assert (UseCompressedOops, "should be compressed");
  3414 #ifdef ASSERT
  3415   if (CheckCompressedOops) {
  3416     Label ok;
  3417     bne(r, R0, ok);
  3418     delayed()->nop();
  3419     stop("null oop passed to encode_heap_oop_not_null");
  3420     bind(ok);
  3422 #endif
  3423   verify_oop(r, "broken oop in encode_heap_oop_not_null");
  3424   if (Universe::narrow_oop_base() != NULL) {
  3425     dsub(r, r, S5_heapbase);
  3427   if (Universe::narrow_oop_shift() != 0) {
  3428     assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3429     shr(r, LogMinObjAlignmentInBytes);
  3434 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
  3435   assert (UseCompressedOops, "should be compressed");
  3436 #ifdef ASSERT
  3437   if (CheckCompressedOops) {
  3438     Label ok;
  3439     bne(src, R0, ok);
  3440     delayed()->nop();
  3441     stop("null oop passed to encode_heap_oop_not_null2");
  3442     bind(ok);
  3444 #endif
  3445   verify_oop(src, "broken oop in encode_heap_oop_not_null2");
  3447   if (Universe::narrow_oop_base() != NULL) {
  3448     dsub(dst, src, S5_heapbase);
  3449     if (Universe::narrow_oop_shift() != 0) {
  3450       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3451       shr(dst, LogMinObjAlignmentInBytes);
  3453   } else {
  3454     if (Universe::narrow_oop_shift() != 0) {
  3455       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3456       dsrl(dst, src, LogMinObjAlignmentInBytes);
  3457     } else {
  3458       if (dst != src) move(dst, src);
  3463 void  MacroAssembler::decode_heap_oop(Register r) {
  3464 #ifdef ASSERT
  3465   verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
  3466 #endif
  3467   if (Universe::narrow_oop_base() == NULL) {
  3468     if (Universe::narrow_oop_shift() != 0) {
  3469       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3470       shl(r, LogMinObjAlignmentInBytes);
  3472   } else {
  3473     move(AT, r);
  3474     if (Universe::narrow_oop_shift() != 0) {
  3475       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3476       shl(r, LogMinObjAlignmentInBytes);
  3478     dadd(r, r, S5_heapbase);
  3479     movz(r, R0, AT);
  3481   verify_oop(r, "broken oop in decode_heap_oop");
  3484 void  MacroAssembler::decode_heap_oop(Register dst, Register src) {
  3485 #ifdef ASSERT
  3486   verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
  3487 #endif
  3488   if (Universe::narrow_oop_base() == NULL) {
  3489     if (Universe::narrow_oop_shift() != 0) {
  3490       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3491       if (dst != src) nop(); // DON'T DELETE THIS GUY.
  3492       dsll(dst, src, LogMinObjAlignmentInBytes);
  3493     } else {
  3494       if (dst != src) move(dst, src);
  3496   } else {
  3497     if (dst == src) {
  3498       move(AT, dst);
  3499       if (Universe::narrow_oop_shift() != 0) {
  3500         assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3501         shl(dst, LogMinObjAlignmentInBytes);
  3503       dadd(dst, dst, S5_heapbase);
  3504       movz(dst, R0, AT);
  3505     } else {
  3506       if (Universe::narrow_oop_shift() != 0) {
  3507         assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3508         dsll(dst, src, LogMinObjAlignmentInBytes);
  3509         daddu(dst, dst, S5_heapbase);
  3510       } else {
  3511         daddu(dst, src, S5_heapbase);
  3513       movz(dst, R0, src);
  3516   verify_oop(dst, "broken oop in decode_heap_oop");
  3519 void  MacroAssembler::decode_heap_oop_not_null(Register r) {
  3520   // Note: it will change flags
  3521   assert (UseCompressedOops, "should only be used for compressed headers");
  3522   assert (Universe::heap() != NULL, "java heap should be initialized");
  3523   // Cannot assert, unverified entry point counts instructions (see .ad file)
  3524   // vtableStubs also counts instructions in pd_code_size_limit.
  3525   // Also do not verify_oop as this is called by verify_oop.
  3526   if (Universe::narrow_oop_shift() != 0) {
  3527     assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3528     shl(r, LogMinObjAlignmentInBytes);
  3529     if (Universe::narrow_oop_base() != NULL) {
  3530       daddu(r, r, S5_heapbase);
  3532   } else {
  3533     assert (Universe::narrow_oop_base() == NULL, "sanity");
  3537 void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
  3538   assert (UseCompressedOops, "should only be used for compressed headers");
  3539   assert (Universe::heap() != NULL, "java heap should be initialized");
  3541   // Cannot assert, unverified entry point counts instructions (see .ad file)
  3542   // vtableStubs also counts instructions in pd_code_size_limit.
  3543   // Also do not verify_oop as this is called by verify_oop.
  3544   //lea(dst, Address(S5_heapbase, src, Address::times_8, 0));
  3545   if (Universe::narrow_oop_shift() != 0) {
  3546     assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  3547     if (LogMinObjAlignmentInBytes == Address::times_8) {
  3548       dsll(dst, src, LogMinObjAlignmentInBytes);
  3549       daddu(dst, dst, S5_heapbase);
  3550     } else {
  3551       dsll(dst, src, LogMinObjAlignmentInBytes);
  3552       if (Universe::narrow_oop_base() != NULL) {
  3553         daddu(dst, dst, S5_heapbase);
  3556   } else {
  3557     assert (Universe::narrow_oop_base() == NULL, "sanity");
  3558     if (dst != src) {
  3559       move(dst, src);
  3564 void MacroAssembler::encode_klass_not_null(Register r) {
  3565   if (Universe::narrow_klass_base() != NULL) {
  3566     assert(r != AT, "Encoding a klass in AT");
  3567     set64(AT, (int64_t)Universe::narrow_klass_base());
  3568     dsub(r, r, AT);
  3570   if (Universe::narrow_klass_shift() != 0) {
  3571     assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  3572     shr(r, LogKlassAlignmentInBytes);
  3576 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
  3577   if (dst == src) {
  3578     encode_klass_not_null(src);
  3579   } else {
  3580     if (Universe::narrow_klass_base() != NULL) {
  3581       set64(dst, (int64_t)Universe::narrow_klass_base());
  3582       dsub(dst, src, dst);
  3583       if (Universe::narrow_klass_shift() != 0) {
  3584         assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  3585         shr(dst, LogKlassAlignmentInBytes);
  3587     } else {
  3588       if (Universe::narrow_klass_shift() != 0) {
  3589         assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  3590         dsrl(dst, src, LogKlassAlignmentInBytes);
  3591       } else {
  3592         move(dst, src);
  3598 // Function instr_size_for_decode_klass_not_null() counts the instructions
  3599 // generated by decode_klass_not_null(register r) and reinit_heapbase(),
  3600 // when (Universe::heap() != NULL).  Hence, if the instructions they
  3601 // generate change, then this method needs to be updated.
  3602 int MacroAssembler::instr_size_for_decode_klass_not_null() {
  3603   assert (UseCompressedClassPointers, "only for compressed klass ptrs");
  3604   if (Universe::narrow_klass_base() != NULL) {
  3605     // mov64 + addq + shlq? + mov64  (for reinit_heapbase()).
  3606     return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10);
  3607   } else {
  3608     // longest load decode klass function, mov64, leaq
  3609     return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1);
  3613 void  MacroAssembler::decode_klass_not_null(Register r) {
  3614   assert (UseCompressedClassPointers, "should only be used for compressed headers");
  3615   assert(r != AT, "Decoding a klass in AT");
  3616   // Cannot assert, unverified entry point counts instructions (see .ad file)
  3617   // vtableStubs also counts instructions in pd_code_size_limit.
  3618   // Also do not verify_oop as this is called by verify_oop.
  3619   if (Universe::narrow_klass_shift() != 0) {
  3620     assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  3621     shl(r, LogKlassAlignmentInBytes);
  3623   if (Universe::narrow_klass_base() != NULL) {
  3624     set64(AT, (int64_t)Universe::narrow_klass_base());
  3625     daddu(r, r, AT);
  3626     //Not neccessary for MIPS at all.
  3627     //reinit_heapbase();
  3631 void  MacroAssembler::decode_klass_not_null(Register dst, Register src) {
  3632   assert (UseCompressedClassPointers, "should only be used for compressed headers");
  3634   if (dst == src) {
  3635     decode_klass_not_null(dst);
  3636   } else {
  3637     // Cannot assert, unverified entry point counts instructions (see .ad file)
  3638     // vtableStubs also counts instructions in pd_code_size_limit.
  3639     // Also do not verify_oop as this is called by verify_oop.
  3640     set64(dst, (int64_t)Universe::narrow_klass_base());
  3641     if (Universe::narrow_klass_shift() != 0) {
  3642       assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  3643       assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
  3644       dsll(AT, src, Address::times_8);
  3645       daddu(dst, dst, AT);
  3646     } else {
  3647       daddu(dst, src, dst);
  3652 void MacroAssembler::incrementl(Register reg, int value) {
  3653   if (value == min_jint) {
  3654      move(AT, value);
  3655      LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
  3656      return;
  3658   if (value <  0) { decrementl(reg, -value); return; }
  3659   if (value == 0) {                        ; return; }
  3661   if(Assembler::is_simm16(value)) {
  3662      NOT_LP64(addiu(reg, reg, value));
  3663      LP64_ONLY(move(AT, value); addu32(reg, reg, AT));
  3664   } else {
  3665      move(AT, value);
  3666      LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
  3670 void MacroAssembler::decrementl(Register reg, int value) {
  3671   if (value == min_jint) {
  3672      move(AT, value);
  3673      LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
  3674      return;
  3676   if (value <  0) { incrementl(reg, -value); return; }
  3677   if (value == 0) {                        ; return; }
  3679   if (Assembler::is_simm16(value)) {
  3680      NOT_LP64(addiu(reg, reg, -value));
  3681      LP64_ONLY(move(AT, value); subu32(reg, reg, AT));
  3682   } else {
  3683      move(AT, value);
  3684      LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
  3688 void MacroAssembler::reinit_heapbase() {
  3689   if (UseCompressedOops || UseCompressedClassPointers) {
  3690     if (Universe::heap() != NULL) {
  3691       if (Universe::narrow_oop_base() == NULL) {
  3692         move(S5_heapbase, R0);
  3693       } else {
  3694         set64(S5_heapbase, (int64_t)Universe::narrow_ptrs_base());
  3696     } else {
  3697       set64(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr());
  3698       ld(S5_heapbase, S5_heapbase, 0);
  3702 #endif // _LP64
  3704 void MacroAssembler::check_klass_subtype(Register sub_klass,
  3705                            Register super_klass,
  3706                            Register temp_reg,
  3707                            Label& L_success) {
  3708 //implement ind   gen_subtype_check
  3709   Label L_failure;
  3710   check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_failure, NULL);
  3711   check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
  3712   bind(L_failure);
  3715 SkipIfEqual::SkipIfEqual(
  3716     MacroAssembler* masm, const bool* flag_addr, bool value) {
  3717   _masm = masm;
  3718   _masm->li(AT, (address)flag_addr);
  3719   _masm->lb(AT,AT,0);
  3720   _masm->addi(AT,AT,-value);
  3721   _masm->beq(AT,R0,_label);
  3722   _masm->delayed()->nop();
  3724 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
  3725                                                    Register super_klass,
  3726                                                    Register temp_reg,
  3727                                                    Label* L_success,
  3728                                                    Label* L_failure,
  3729                                                    Label* L_slow_path,
  3730                                         RegisterOrConstant super_check_offset) {
  3731   assert_different_registers(sub_klass, super_klass, temp_reg);
  3732   bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
  3733   if (super_check_offset.is_register()) {
  3734     assert_different_registers(sub_klass, super_klass,
  3735                                super_check_offset.as_register());
  3736   } else if (must_load_sco) {
  3737     assert(temp_reg != noreg, "supply either a temp or a register offset");
  3740   Label L_fallthrough;
  3741   int label_nulls = 0;
  3742   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
  3743   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
  3744   if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
  3745   assert(label_nulls <= 1, "at most one NULL in the batch");
  3747   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
  3748   int sco_offset = in_bytes(Klass::super_check_offset_offset());
  3749   // If the pointers are equal, we are done (e.g., String[] elements).
  3750   // This self-check enables sharing of secondary supertype arrays among
  3751   // non-primary types such as array-of-interface.  Otherwise, each such
  3752   // type would need its own customized SSA.
  3753   // We move this check to the front of the fast path because many
  3754   // type checks are in fact trivially successful in this manner,
  3755   // so we get a nicely predicted branch right at the start of the check.
  3756   beq(sub_klass, super_klass, *L_success);
  3757   delayed()->nop();
  3758   // Check the supertype display:
  3759   if (must_load_sco) {
  3760     // Positive movl does right thing on LP64.
  3761     lwu(temp_reg, super_klass, sco_offset);
  3762     super_check_offset = RegisterOrConstant(temp_reg);
  3764   dsll(AT, super_check_offset.register_or_noreg(), Address::times_1);
  3765   daddu(AT, sub_klass, AT);
  3766   ld(AT, AT, super_check_offset.constant_or_zero()*Address::times_1);
  3768   // This check has worked decisively for primary supers.
  3769   // Secondary supers are sought in the super_cache ('super_cache_addr').
  3770   // (Secondary supers are interfaces and very deeply nested subtypes.)
  3771   // This works in the same check above because of a tricky aliasing
  3772   // between the super_cache and the primary super display elements.
  3773   // (The 'super_check_addr' can address either, as the case requires.)
  3774   // Note that the cache is updated below if it does not help us find
  3775   // what we need immediately.
  3776   // So if it was a primary super, we can just fail immediately.
  3777   // Otherwise, it's the slow path for us (no success at this point).
  3779   if (super_check_offset.is_register()) {
  3780     beq(super_klass, AT, *L_success);
  3781     delayed()->nop();
  3782     addi(AT, super_check_offset.as_register(), -sc_offset);
  3783     if (L_failure == &L_fallthrough) {
  3784       beq(AT, R0, *L_slow_path);
  3785       delayed()->nop();
  3786     } else {
  3787       bne(AT, R0, *L_failure);
  3788       delayed()->nop();
  3789       b(*L_slow_path);
  3790       delayed()->nop();
  3792   } else if (super_check_offset.as_constant() == sc_offset) {
  3793     // Need a slow path; fast failure is impossible.
  3794     if (L_slow_path == &L_fallthrough) {
  3795       beq(super_klass, AT, *L_success);
  3796       delayed()->nop();
  3797     } else {
  3798       bne(super_klass, AT, *L_slow_path);
  3799       delayed()->nop();
  3800       b(*L_success);
  3801       delayed()->nop();
  3803   } else {
  3804     // No slow path; it's a fast decision.
  3805     if (L_failure == &L_fallthrough) {
  3806       beq(super_klass, AT, *L_success);
  3807       delayed()->nop();
  3808     } else {
  3809       bne(super_klass, AT, *L_failure);
  3810       delayed()->nop();
  3811       b(*L_success);
  3812       delayed()->nop();
  3816   bind(L_fallthrough);
  3821 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
  3822                                                    Register super_klass,
  3823                                                    Register temp_reg,
  3824                                                    Register temp2_reg,
  3825                                                    Label* L_success,
  3826                                                    Label* L_failure,
  3827                                                    bool set_cond_codes) {
  3828   assert_different_registers(sub_klass, super_klass, temp_reg);
  3829   if (temp2_reg != noreg)
  3830     assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
  3831   else
  3832     temp2_reg = T9;
  3833 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
  3835   Label L_fallthrough;
  3836   int label_nulls = 0;
  3837   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
  3838   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
  3839   assert(label_nulls <= 1, "at most one NULL in the batch");
  3841   // a couple of useful fields in sub_klass:
  3842   int ss_offset = in_bytes(Klass::secondary_supers_offset());
  3843   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
  3844   Address secondary_supers_addr(sub_klass, ss_offset);
  3845   Address super_cache_addr(     sub_klass, sc_offset);
  3847   // Do a linear scan of the secondary super-klass chain.
  3848   // This code is rarely used, so simplicity is a virtue here.
  3849   // The repne_scan instruction uses fixed registers, which we must spill.
  3850   // Don't worry too much about pre-existing connections with the input regs.
  3852   // Get super_klass value into rax (even if it was in rdi or rcx).
  3853 #ifndef PRODUCT
  3854   int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
  3855   ExternalAddress pst_counter_addr((address) pst_counter);
  3856   NOT_LP64(  incrementl(pst_counter_addr) );
  3857 #endif //PRODUCT
  3859   // We will consult the secondary-super array.
  3860   ld(temp_reg, secondary_supers_addr);
  3861   // Load the array length.  (Positive movl does right thing on LP64.)
  3862   lw(temp2_reg, Address(temp_reg, Array<Klass*>::length_offset_in_bytes()));
  3863   // Skip to start of data.
  3864   daddiu(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes());
  3866   // Scan RCX words at [RDI] for an occurrence of RAX.
  3867   // Set NZ/Z based on last compare.
  3868   // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does
  3869   // not change flags (only scas instruction which is repeated sets flags).
  3870   // Set Z = 0 (not equal) before 'repne' to indicate that class was not found.
  3872   /* 2013/4/3 Jin: OpenJDK8 never compresses klass pointers in secondary-super array. */
  3873   Label Loop, subtype;
  3874   bind(Loop);
  3875   beq(temp2_reg, R0, *L_failure);
  3876   delayed()->nop();
  3877   ld(AT, temp_reg, 0);
  3878   beq(AT, super_klass, subtype);
  3879   delayed()->daddi(temp_reg, temp_reg, 1 * wordSize);
  3880   b(Loop);
  3881   delayed()->daddi(temp2_reg, temp2_reg, -1);
  3883   bind(subtype);
  3884   sd(super_klass, super_cache_addr);
  3885   if (L_success != &L_fallthrough) {
  3886     b(*L_success);
  3887     delayed()->nop();
  3890   // Success.  Cache the super we found and proceed in triumph.
  3891 #undef IS_A_TEMP
  3893   bind(L_fallthrough);
  3896 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
  3897   ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
  3898   sd(R0, Address(java_thread, JavaThread::vm_result_offset()));
  3899   verify_oop(oop_result, "broken oop in call_VM_base");
  3902 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
  3903   ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
  3904   sd(R0, Address(java_thread, JavaThread::vm_result_2_offset()));
  3907 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
  3908                                          int extra_slot_offset) {
  3909   // cf. TemplateTable::prepare_invoke(), if (load_receiver).
  3910   int stackElementSize = Interpreter::stackElementSize;
  3911   int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
  3912 #ifdef ASSERT
  3913   int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
  3914   assert(offset1 - offset == stackElementSize, "correct arithmetic");
  3915 #endif
  3916   Register             scale_reg    = NOREG;
  3917   Address::ScaleFactor scale_factor = Address::no_scale;
  3918   if (arg_slot.is_constant()) {
  3919     offset += arg_slot.as_constant() * stackElementSize;
  3920   } else {
  3921     scale_reg    = arg_slot.as_register();
  3922     scale_factor = Address::times_8;
  3924   // 2014/07/31 Fu: We don't push RA on stack in prepare_invoke.
  3925   //  offset += wordSize;           // return PC is on stack
  3926   if(scale_reg==NOREG) return Address(SP, offset);
  3927   else {
  3928   dsll(scale_reg, scale_reg, scale_factor);
  3929   daddu(scale_reg, SP, scale_reg);
  3930   return Address(scale_reg, offset);
  3934 SkipIfEqual::~SkipIfEqual() {
  3935   _masm->bind(_label);
  3938 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
  3939   switch (size_in_bytes) {
  3940 #ifndef _LP64
  3941   case  8:
  3942     assert(dst2 != noreg, "second dest register required");
  3943     lw(dst,  src);
  3944     lw(dst2, src.plus_disp(BytesPerInt));
  3945     break;
  3946 #else
  3947   case  8:  ld(dst, src); break;
  3948 #endif
  3949   case  4:  lw(dst, src); break;
  3950   case  2:  is_signed ? lh(dst, src) : lhu(dst, src); break;
  3951   case  1:  is_signed ? lb( dst, src) : lbu( dst, src); break;
  3952   default:  ShouldNotReachHere();
  3956 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
  3957   switch (size_in_bytes) {
  3958 #ifndef _LP64
  3959   case  8:
  3960     assert(src2 != noreg, "second source register required");
  3961     sw(src, dst);
  3962     sw(src2, dst.plus_disp(BytesPerInt));
  3963     break;
  3964 #else
  3965   case  8:  sd(src, dst); break;
  3966 #endif
  3967   case  4:  sw(src, dst); break;
  3968   case  2:  sh(src, dst); break;
  3969   case  1:  sb(src, dst); break;
  3970   default:  ShouldNotReachHere();
  3974 // Look up the method for a megamorphic invokeinterface call.
  3975 // The target method is determined by <intf_klass, itable_index>.
  3976 // The receiver klass is in recv_klass.
  3977 // On success, the result will be in method_result, and execution falls through.
  3978 // On failure, execution transfers to the given label.
  3979 void MacroAssembler::lookup_interface_method(Register recv_klass,
  3980                                              Register intf_klass,
  3981                                              RegisterOrConstant itable_index,
  3982                                              Register method_result,
  3983                                              Register scan_temp,
  3984                                              Label& L_no_such_interface,
  3985                                              bool return_method) {
  3986   assert_different_registers(recv_klass, intf_klass, scan_temp, AT);
  3987   assert_different_registers(method_result, intf_klass, scan_temp, AT);
  3988   assert(recv_klass != method_result || !return_method,
  3989          "recv_klass can be destroyed when method isn't needed");
  3991   assert(itable_index.is_constant() || itable_index.as_register() == method_result,
  3992          "caller must use same register for non-constant itable index as for method");
  3994   // Compute start of first itableOffsetEntry (which is at the end of the vtable)
  3995   int vtable_base = InstanceKlass::vtable_start_offset() * wordSize;
  3996   int itentry_off = itableMethodEntry::method_offset_in_bytes();
  3997   int scan_step   = itableOffsetEntry::size() * wordSize;
  3998   int vte_size    = vtableEntry::size() * wordSize;
  3999   Address::ScaleFactor times_vte_scale = Address::times_ptr;
  4000   assert(vte_size == wordSize, "else adjust times_vte_scale");
  4002   lw(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize));
  4004   // %%% Could store the aligned, prescaled offset in the klassoop.
  4005   dsll(scan_temp, scan_temp, times_vte_scale);
  4006   daddu(scan_temp, recv_klass, scan_temp);
  4007   daddiu(scan_temp, scan_temp, vtable_base);
  4008   if (HeapWordsPerLong > 1) {
  4009     // Round up to align_object_offset boundary
  4010     // see code for InstanceKlass::start_of_itable!
  4011     round_to(scan_temp, BytesPerLong);
  4014   if (return_method) {
  4015     // Adjust recv_klass by scaled itable_index, so we can free itable_index.
  4016     assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
  4017     if (itable_index.is_constant()) {
  4018       set64(AT, (int)itable_index.is_constant());
  4019       dsll(AT, AT, (int)Address::times_ptr);
  4020     } else {
  4021       dsll(AT, itable_index.as_register(), (int)Address::times_ptr);
  4023     daddu(AT, AT, recv_klass);
  4024     daddiu(recv_klass, AT, itentry_off);
  4027   Label search, found_method;
  4029   for (int peel = 1; peel >= 0; peel--) {
  4030     ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
  4032     if (peel) {
  4033       beq(intf_klass, method_result, found_method);
  4034       delayed()->nop();
  4035     } else {
  4036       bne(intf_klass, method_result, search);
  4037       delayed()->nop();
  4038       // (invert the test to fall through to found_method...)
  4041     if (!peel)  break;
  4043     bind(search);
  4045     // Check that the previous entry is non-null.  A null entry means that
  4046     // the receiver class doesn't implement the interface, and wasn't the
  4047     // same as when the caller was compiled.
  4048     beq(method_result, R0, L_no_such_interface);
  4049     delayed()->nop();
  4050     daddiu(scan_temp, scan_temp, scan_step);
  4053   bind(found_method);
  4055   if (return_method) {
  4056     // Got a hit.
  4057     lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
  4058     if(UseLoongsonISA) {
  4059       gsldx(method_result, recv_klass, scan_temp, 0);
  4060     } else {
  4061       daddu(AT, recv_klass, scan_temp);
  4062       ld(method_result, AT);
  4067 // virtual method calling
  4068 void MacroAssembler::lookup_virtual_method(Register recv_klass,
  4069                                            RegisterOrConstant vtable_index,
  4070                                            Register method_result) {
  4071   Register tmp = GP;
  4072   push(tmp);
  4074   if (vtable_index.is_constant()) {
  4075     assert_different_registers(recv_klass, method_result, tmp);
  4076   } else {
  4077     assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp);
  4079   const int base = InstanceKlass::vtable_start_offset() * wordSize;
  4080   assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
  4081 /*
  4082   Address vtable_entry_addr(recv_klass,
  4083                             vtable_index, Address::times_ptr,
  4084                             base + vtableEntry::method_offset_in_bytes());
  4085 */
  4086   if (vtable_index.is_constant()) {
  4087     set64(AT, vtable_index.as_constant());
  4088     dsll(AT, AT, (int)Address::times_ptr);
  4089   } else {
  4090     dsll(AT, vtable_index.as_register(), (int)Address::times_ptr);
  4092   set64(tmp, base + vtableEntry::method_offset_in_bytes());
  4093   daddu(tmp, tmp, AT);
  4094   daddu(tmp, tmp, recv_klass);
  4095   ld(method_result, tmp, 0);
  4097   pop(tmp);

mercurial