src/cpu/mips/vm/macroAssembler_mips.cpp

changeset 6880
52ea28d233d2
child 6886
2fa8027581f6
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/cpu/mips/vm/macroAssembler_mips.cpp	Thu Sep 07 09:12:16 2017 +0800
     1.3 @@ -0,0 +1,3844 @@
     1.4 +/*
     1.5 + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
     1.6 + * Copyright (c) 2017, Loongson Technology. All rights reserved.
     1.7 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.8 + *
     1.9 + * This code is free software; you can redistribute it and/or modify it
    1.10 + * under the terms of the GNU General Public License version 2 only, as
    1.11 + * published by the Free Software Foundation.
    1.12 + *
    1.13 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.14 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.15 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.16 + * version 2 for more details (a copy is included in the LICENSE file that
    1.17 + * accompanied this code).
    1.18 + *
    1.19 + * You should have received a copy of the GNU General Public License version
    1.20 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.21 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.22 + *
    1.23 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    1.24 + * or visit www.oracle.com if you need additional information or have any
    1.25 + * questions.
    1.26 + *
    1.27 + */
    1.28 +
    1.29 +#include "precompiled.hpp"
    1.30 +#include "asm/assembler.hpp"
    1.31 +#include "asm/assembler.inline.hpp"
    1.32 +#include "asm/macroAssembler.inline.hpp"
    1.33 +#include "compiler/disassembler.hpp"
    1.34 +#include "gc_interface/collectedHeap.inline.hpp"
    1.35 +#include "interpreter/interpreter.hpp"
    1.36 +#include "memory/cardTableModRefBS.hpp"
    1.37 +#include "memory/resourceArea.hpp"
    1.38 +#include "memory/universe.hpp"
    1.39 +#include "prims/methodHandles.hpp"
    1.40 +#include "runtime/biasedLocking.hpp"
    1.41 +#include "runtime/interfaceSupport.hpp"
    1.42 +#include "runtime/objectMonitor.hpp"
    1.43 +#include "runtime/os.hpp"
    1.44 +#include "runtime/sharedRuntime.hpp"
    1.45 +#include "runtime/stubRoutines.hpp"
    1.46 +#include "utilities/macros.hpp"
    1.47 +#if INCLUDE_ALL_GCS
    1.48 +#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
    1.49 +#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
    1.50 +#include "gc_implementation/g1/heapRegion.hpp"
    1.51 +#endif // INCLUDE_ALL_GCS
    1.52 +
    1.53 +// Implementation of MacroAssembler
    1.54 +
    1.55 +intptr_t MacroAssembler::i[32] = {0};
    1.56 +float MacroAssembler::f[32] = {0.0};
    1.57 +
    1.58 +void MacroAssembler::print(outputStream *s) {
    1.59 +  unsigned int k;
    1.60 +  for(k=0; k<sizeof(i)/sizeof(i[0]); k++) {
    1.61 +    s->print_cr("i%d = 0x%.16lx", k, i[k]);
    1.62 +  }
    1.63 +  s->cr();
    1.64 +
    1.65 +  for(k=0; k<sizeof(f)/sizeof(f[0]); k++) {
    1.66 +    s->print_cr("f%d = %f", k, f[k]);
    1.67 +  }
    1.68 +  s->cr();
    1.69 +}
    1.70 +
    1.71 +int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; }
    1.72 +int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; }
    1.73 +
    1.74 +void MacroAssembler::save_registers(MacroAssembler *masm) {
    1.75 +#define __ masm->
    1.76 +  for(int k=0; k<32; k++) {
    1.77 +    __ sw (as_Register(k), A0, i_offset(k));
    1.78 +  }
    1.79 +
    1.80 +  for(int k=0; k<32; k++) {
    1.81 +    __ swc1 (as_FloatRegister(k), A0, f_offset(k));
    1.82 +  }
    1.83 +#undef __
    1.84 +}
    1.85 +
    1.86 +void MacroAssembler::restore_registers(MacroAssembler *masm) {
    1.87 +#define __ masm->
    1.88 +  for(int k=0; k<32; k++) {
    1.89 +    __ lw (as_Register(k), A0, i_offset(k));
    1.90 +  }
    1.91 +
    1.92 +  for(int k=0; k<32; k++) {
    1.93 +    __ lwc1 (as_FloatRegister(k), A0, f_offset(k));
    1.94 +  }
    1.95 +#undef __
    1.96 +}
    1.97 +
    1.98 +
    1.99 +void MacroAssembler::pd_patch_instruction(address branch, address target) {
   1.100 +  jint& stub_inst = *(jint*) branch;
   1.101 +
   1.102 +/* *
   1.103 +  move(AT, RA); // dadd
   1.104 +  emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
   1.105 +  nop();
   1.106 +        lui(T9, 0); // to be patched
   1.107 +        ori(T9, 0);
   1.108 +  daddu(T9, T9, RA);
   1.109 +  move(RA, AT);
   1.110 +  jr(T9);
   1.111 + */
   1.112 +  if(special(stub_inst) == dadd_op) {
   1.113 +    jint *pc = (jint *)branch;
   1.114 +
   1.115 +    assert(opcode(pc[3]) == lui_op
   1.116 +          && opcode(pc[4]) == ori_op
   1.117 +          && special(pc[5]) == daddu_op, "Not a branch label patch");
   1.118 +    if(!(opcode(pc[3]) == lui_op
   1.119 +          && opcode(pc[4]) == ori_op
   1.120 +          && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); }
   1.121 +
   1.122 +    int offset = target - branch;
   1.123 +    if (!is_simm16(offset))
   1.124 +    {
   1.125 +      pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12);
   1.126 +      pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12);
   1.127 +    }
   1.128 +    else
   1.129 +    {
   1.130 +      /* revert to "beq + nop" */
   1.131 +      CodeBuffer cb(branch, 4 * 10);
   1.132 +      MacroAssembler masm(&cb);
   1.133 +#define __ masm.
   1.134 +      __ b(target);
   1.135 +      __ nop();
   1.136 +      __ nop();
   1.137 +      __ nop();
   1.138 +      __ nop();
   1.139 +      __ nop();
   1.140 +      __ nop();
   1.141 +      __ nop();
   1.142 +    }
   1.143 +    return;
   1.144 +  }
   1.145 +
   1.146 +#ifndef PRODUCT
   1.147 +  if (!is_simm16((target - branch - 4) >> 2))
   1.148 +  {
   1.149 +    tty->print_cr("Illegal patching: target=0x%lx", target);
   1.150 +    int *p = (int *)branch;
   1.151 +    for (int i = -10; i < 10; i++)
   1.152 +    {
   1.153 +       tty->print("0x%lx, ", p[i]);
   1.154 +    }
   1.155 +    tty->print_cr("");
   1.156 +  }
   1.157 +#endif
   1.158 +
   1.159 +  stub_inst = patched_branch(target - branch, stub_inst, 0);
   1.160 +}
   1.161 +
   1.162 +static inline address first_cache_address() {
   1.163 +  return CodeCache::low_bound() + sizeof(HeapBlock::Header);
   1.164 +}
   1.165 +
   1.166 +static inline address last_cache_address() {
   1.167 +  return CodeCache::high_bound() - Assembler::InstructionSize;
   1.168 +}
   1.169 +
   1.170 +int MacroAssembler::call_size(address target, bool far, bool patchable) {
   1.171 +  if (patchable) return 6 << Assembler::LogInstructionSize;
   1.172 +  if (!far) return 2 << Assembler::LogInstructionSize; // jal + nop
   1.173 +  return (insts_for_set64((jlong)target) + 2) << Assembler::LogInstructionSize;
   1.174 +}
   1.175 +
   1.176 +// Can we reach target using jal/j from anywhere
   1.177 +// in the code cache (because code can be relocated)?
   1.178 +bool MacroAssembler::reachable_from_cache(address target) {
   1.179 +  address cl = first_cache_address();
   1.180 +  address ch = last_cache_address();
   1.181 +
   1.182 +  return fit_in_jal(target, cl) && fit_in_jal(target, ch);
   1.183 +}
   1.184 +
   1.185 +void MacroAssembler::general_jump(address target) {
   1.186 +  if (reachable_from_cache(target)) {
   1.187 +    j(target);
   1.188 +    nop();
   1.189 +  } else {
   1.190 +    set64(T9, (long)target);
   1.191 +    jr(T9);
   1.192 +    nop();
   1.193 +  }
   1.194 +}
   1.195 +
   1.196 +int MacroAssembler::insts_for_general_jump(address target) {
   1.197 +  if (reachable_from_cache(target)) {
   1.198 +    //j(target);
   1.199 +    //nop();
   1.200 +    return 2;
   1.201 +  } else {
   1.202 +    //set64(T9, (long)target);
   1.203 +    //jr(T9);
   1.204 +    //nop();
   1.205 +    return insts_for_set64((jlong)target) + 2;
   1.206 +  }
   1.207 +}
   1.208 +
   1.209 +void MacroAssembler::patchable_jump(address target) {
   1.210 +  if (reachable_from_cache(target)) {
   1.211 +    nop();
   1.212 +    nop();
   1.213 +    nop();
   1.214 +    nop();
   1.215 +    j(target);
   1.216 +    nop();
   1.217 +  } else {
   1.218 +    patchable_set48(T9, (long)target);
   1.219 +    jr(T9);
   1.220 +    nop();
   1.221 +  }
   1.222 +}
   1.223 +
   1.224 +int MacroAssembler::insts_for_patchable_jump(address target) {
   1.225 +  return 6;
   1.226 +}
   1.227 +
   1.228 +void MacroAssembler::general_call(address target) {
   1.229 +  if (reachable_from_cache(target)) {
   1.230 +    jal(target);
   1.231 +    nop();
   1.232 +  } else {
   1.233 +    set64(T9, (long)target);
   1.234 +    jalr(T9);
   1.235 +    nop();
   1.236 +  }
   1.237 +}
   1.238 +
   1.239 +int MacroAssembler::insts_for_general_call(address target) {
   1.240 +  if (reachable_from_cache(target)) {
   1.241 +    //jal(target);
   1.242 +    //nop();
   1.243 +    return 2;
   1.244 +  } else {
   1.245 +    //set64(T9, (long)target);
   1.246 +    //jalr(T9);
   1.247 +    //nop();
   1.248 +    return insts_for_set64((jlong)target) + 2;
   1.249 +  }
   1.250 +}
   1.251 +
   1.252 +void MacroAssembler::patchable_call(address target) {
   1.253 +  if (reachable_from_cache(target)) {
   1.254 +    nop();
   1.255 +    nop();
   1.256 +    nop();
   1.257 +    nop();
   1.258 +    jal(target);
   1.259 +    nop();
   1.260 +  } else {
   1.261 +    patchable_set48(T9, (long)target);
   1.262 +    jalr(T9);
   1.263 +    nop();
   1.264 +  }
   1.265 +}
   1.266 +
   1.267 +int MacroAssembler::insts_for_patchable_call(address target) {
   1.268 +  return 6;
   1.269 +}
   1.270 +
   1.271 +void MacroAssembler::beq_far(Register rs, Register rt, address entry)
   1.272 +{
   1.273 +  u_char * cur_pc = pc();
   1.274 +
   1.275 +  /* Jin: Near/Far jump */
   1.276 +  if(is_simm16((entry - pc() - 4) / 4))
   1.277 +  {
   1.278 +    Assembler::beq(rs, rt, offset(entry));
   1.279 +  }
   1.280 +  else
   1.281 +  {
   1.282 +    Label not_jump;
   1.283 +    bne(rs, rt, not_jump);
   1.284 +    delayed()->nop();
   1.285 +
   1.286 +    b_far(entry);
   1.287 +    delayed()->nop();
   1.288 +
   1.289 +    bind(not_jump);
   1.290 +    has_delay_slot();
   1.291 +  }
   1.292 +}
   1.293 +
   1.294 +void MacroAssembler::beq_far(Register rs, Register rt, Label& L)
   1.295 +{
   1.296 +  if (L.is_bound()) {
   1.297 +    beq_far(rs, rt, target(L));
   1.298 +  } else {
   1.299 +    u_char * cur_pc = pc();
   1.300 +    Label not_jump;
   1.301 +    bne(rs, rt, not_jump);
   1.302 +    delayed()->nop();
   1.303 +
   1.304 +    b_far(L);
   1.305 +    delayed()->nop();
   1.306 +
   1.307 +    bind(not_jump);
   1.308 +    has_delay_slot();
   1.309 +  }
   1.310 +}
   1.311 +
   1.312 +void MacroAssembler::bne_far(Register rs, Register rt, address entry)
   1.313 +{
   1.314 +  u_char * cur_pc = pc();
   1.315 +
   1.316 +  /* Jin: Near/Far jump */
   1.317 +  if(is_simm16((entry - pc() - 4) / 4))
   1.318 +  {
   1.319 +    Assembler::bne(rs, rt, offset(entry));
   1.320 +  }
   1.321 +  else
   1.322 +  {
   1.323 +    Label not_jump;
   1.324 +    beq(rs, rt, not_jump);
   1.325 +    delayed()->nop();
   1.326 +
   1.327 +    b_far(entry);
   1.328 +    delayed()->nop();
   1.329 +
   1.330 +    bind(not_jump);
   1.331 +    has_delay_slot();
   1.332 +  }
   1.333 +}
   1.334 +
   1.335 +void MacroAssembler::bne_far(Register rs, Register rt, Label& L)
   1.336 +{
   1.337 +  if (L.is_bound()) {
   1.338 +    bne_far(rs, rt, target(L));
   1.339 +  } else {
   1.340 +    u_char * cur_pc = pc();
   1.341 +    Label not_jump;
   1.342 +    beq(rs, rt, not_jump);
   1.343 +    delayed()->nop();
   1.344 +
   1.345 +    b_far(L);
   1.346 +    delayed()->nop();
   1.347 +
   1.348 +    bind(not_jump);
   1.349 +    has_delay_slot();
   1.350 +  }
   1.351 +}
   1.352 +
   1.353 +void MacroAssembler::b_far(Label& L)
   1.354 +{
   1.355 +  if (L.is_bound()) {
   1.356 +    b_far(target(L));
   1.357 +  } else {
   1.358 +  volatile address dest = target(L);
   1.359 +/*
   1.360 +MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8
   1.361 +   0x00000055651ed514: dadd at, ra, zero
   1.362 +   0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520
   1.363 +
   1.364 +   0x00000055651ed51c: sll zero, zero, 0
   1.365 +   0x00000055651ed520: lui t9, 0x0
   1.366 +   0x00000055651ed524: ori t9, t9, 0x21b8
   1.367 +   0x00000055651ed528: daddu t9, t9, ra
   1.368 +   0x00000055651ed52c: dadd ra, at, zero
   1.369 +   0x00000055651ed530: jr t9
   1.370 +   0x00000055651ed534: sll zero, zero, 0
   1.371 +*/
   1.372 +  move(AT, RA);
   1.373 +  emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
   1.374 +  nop();
   1.375 +        lui(T9, 0); // to be patched
   1.376 +        ori(T9, T9, 0);
   1.377 +  daddu(T9, T9, RA);
   1.378 +  move(RA, AT);
   1.379 +  jr(T9);
   1.380 +  }
   1.381 +}
   1.382 +
   1.383 +void MacroAssembler::b_far(address entry)
   1.384 +{
   1.385 +  u_char * cur_pc = pc();
   1.386 +
   1.387 +  /* Jin: Near/Far jump */
   1.388 +  if(is_simm16((entry - pc() - 4) / 4))
   1.389 +  {
   1.390 +    b(offset(entry));
   1.391 +  }
   1.392 +  else
   1.393 +  {
   1.394 +    /* address must be bounded */
   1.395 +    move(AT, RA);
   1.396 +     emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
   1.397 +    nop();
   1.398 +    li32(T9, entry - pc());
   1.399 +    daddu(T9, T9, RA);
   1.400 +    move(RA, AT);
   1.401 +    jr(T9);
   1.402 +  }
   1.403 +}
   1.404 +
   1.405 +void MacroAssembler::ld_ptr(Register rt, Register offset, Register base) {
   1.406 +  addu_long(AT, base, offset);
   1.407 +  ld_ptr(rt, 0, AT);
   1.408 +}
   1.409 +
   1.410 +void MacroAssembler::st_ptr(Register rt, Register offset, Register base) {
   1.411 +  addu_long(AT, base, offset);
   1.412 +  st_ptr(rt, 0, AT);
   1.413 +}
   1.414 +
   1.415 +void MacroAssembler::ld_long(Register rt, Register offset, Register base) {
   1.416 +  addu_long(AT, base, offset);
   1.417 +  ld_long(rt, 0, AT);
   1.418 +}
   1.419 +
   1.420 +void MacroAssembler::st_long(Register rt, Register offset, Register base) {
   1.421 +  addu_long(AT, base, offset);
   1.422 +  st_long(rt, 0, AT);
   1.423 +}
   1.424 +
   1.425 +Address MacroAssembler::as_Address(AddressLiteral adr) {
   1.426 +  return Address(adr.target(), adr.rspec());
   1.427 +}
   1.428 +
   1.429 +Address MacroAssembler::as_Address(ArrayAddress adr) {
   1.430 +  return Address::make_array(adr);
   1.431 +}
   1.432 +
   1.433 +// tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved).
   1.434 +void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) {
   1.435 +  Label again;
   1.436 +
   1.437 +  li(tmp_reg1, counter_addr);
   1.438 +  bind(again);
   1.439 +  if(!Use3A2000) sync();
   1.440 +  ll(tmp_reg2, tmp_reg1, 0);
   1.441 +  addi(tmp_reg2, tmp_reg2, inc);
   1.442 +  sc(tmp_reg2, tmp_reg1, 0);
   1.443 +  beq(tmp_reg2, R0, again);
   1.444 +  delayed()->nop();
   1.445 +}
   1.446 +
   1.447 +int MacroAssembler::biased_locking_enter(Register lock_reg,
   1.448 +                                         Register obj_reg,
   1.449 +                                         Register swap_reg,
   1.450 +                                         Register tmp_reg,
   1.451 +                                         bool swap_reg_contains_mark,
   1.452 +                                         Label& done,
   1.453 +                                         Label* slow_case,
   1.454 +                                         BiasedLockingCounters* counters) {
   1.455 +  assert(UseBiasedLocking, "why call this otherwise?");
   1.456 +  bool need_tmp_reg = false;
   1.457 +  if (tmp_reg == noreg) {
   1.458 +    need_tmp_reg = true;
   1.459 +    tmp_reg = T9;
   1.460 +  }
   1.461 +  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT);
   1.462 +  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
   1.463 +  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
   1.464 +  Address saved_mark_addr(lock_reg, 0);
   1.465 +
   1.466 +  // Biased locking
   1.467 +  // See whether the lock is currently biased toward our thread and
   1.468 +  // whether the epoch is still valid
   1.469 +  // Note that the runtime guarantees sufficient alignment of JavaThread
   1.470 +  // pointers to allow age to be placed into low bits
   1.471 +  // First check to see whether biasing is even enabled for this object
   1.472 +  Label cas_label;
   1.473 +  int null_check_offset = -1;
   1.474 +  if (!swap_reg_contains_mark) {
   1.475 +    null_check_offset = offset();
   1.476 +    ld_ptr(swap_reg, mark_addr);
   1.477 +  }
   1.478 +
   1.479 +  if (need_tmp_reg) {
   1.480 +    push(tmp_reg);
   1.481 +  }
   1.482 +  move(tmp_reg, swap_reg);
   1.483 +  andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
   1.484 +#ifdef _LP64
   1.485 +  daddi(AT, R0, markOopDesc::biased_lock_pattern);
   1.486 +  dsub(AT, AT, tmp_reg);
   1.487 +#else
   1.488 +  addi(AT, R0, markOopDesc::biased_lock_pattern);
   1.489 +  sub(AT, AT, tmp_reg);
   1.490 +#endif
   1.491 +  if (need_tmp_reg) {
   1.492 +    pop(tmp_reg);
   1.493 +  }
   1.494 +
   1.495 +  bne(AT, R0, cas_label);
   1.496 +  delayed()->nop();
   1.497 +
   1.498 +
   1.499 +  // The bias pattern is present in the object's header. Need to check
   1.500 +  // whether the bias owner and the epoch are both still current.
   1.501 +  // Note that because there is no current thread register on MIPS we
   1.502 +  // need to store off the mark word we read out of the object to
   1.503 +  // avoid reloading it and needing to recheck invariants below. This
   1.504 +  // store is unfortunate but it makes the overall code shorter and
   1.505 +  // simpler.
   1.506 +  st_ptr(swap_reg, saved_mark_addr);
   1.507 +  if (need_tmp_reg) {
   1.508 +    push(tmp_reg);
   1.509 +  }
   1.510 +  if (swap_reg_contains_mark) {
   1.511 +    null_check_offset = offset();
   1.512 +  }
   1.513 +  load_prototype_header(tmp_reg, obj_reg);
   1.514 +  xorr(tmp_reg, tmp_reg, swap_reg);
   1.515 +  get_thread(swap_reg);
   1.516 +  xorr(swap_reg, swap_reg, tmp_reg);
   1.517 +
   1.518 +  move(AT, ~((int) markOopDesc::age_mask_in_place));
   1.519 +  andr(swap_reg, swap_reg, AT);
   1.520 +
   1.521 +  if (PrintBiasedLockingStatistics) {
   1.522 +    Label L;
   1.523 +    bne(swap_reg, R0, L);
   1.524 +    delayed()->nop();
   1.525 +    push(tmp_reg);
   1.526 +    push(A0);
   1.527 +    atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg);
   1.528 +    pop(A0);
   1.529 +    pop(tmp_reg);
   1.530 +    bind(L);
   1.531 +  }
   1.532 +  if (need_tmp_reg) {
   1.533 +    pop(tmp_reg);
   1.534 +  }
   1.535 +  beq(swap_reg, R0, done);
   1.536 +  delayed()->nop();
   1.537 +  Label try_revoke_bias;
   1.538 +  Label try_rebias;
   1.539 +
   1.540 +  // At this point we know that the header has the bias pattern and
   1.541 +  // that we are not the bias owner in the current epoch. We need to
   1.542 +  // figure out more details about the state of the header in order to
   1.543 +  // know what operations can be legally performed on the object's
   1.544 +  // header.
   1.545 +
   1.546 +  // If the low three bits in the xor result aren't clear, that means
   1.547 +  // the prototype header is no longer biased and we have to revoke
   1.548 +  // the bias on this object.
   1.549 +
   1.550 +  move(AT, markOopDesc::biased_lock_mask_in_place);
   1.551 +  andr(AT, swap_reg, AT);
   1.552 +  bne(AT, R0, try_revoke_bias);
   1.553 +  delayed()->nop();
   1.554 +  // Biasing is still enabled for this data type. See whether the
   1.555 +  // epoch of the current bias is still valid, meaning that the epoch
   1.556 +  // bits of the mark word are equal to the epoch bits of the
   1.557 +  // prototype header. (Note that the prototype header's epoch bits
   1.558 +  // only change at a safepoint.) If not, attempt to rebias the object
   1.559 +  // toward the current thread. Note that we must be absolutely sure
   1.560 +  // that the current epoch is invalid in order to do this because
   1.561 +  // otherwise the manipulations it performs on the mark word are
   1.562 +  // illegal.
   1.563 +
   1.564 +  move(AT, markOopDesc::epoch_mask_in_place);
   1.565 +  andr(AT,swap_reg, AT);
   1.566 +  bne(AT, R0, try_rebias);
   1.567 +  delayed()->nop();
   1.568 +  // The epoch of the current bias is still valid but we know nothing
   1.569 +  // about the owner; it might be set or it might be clear. Try to
   1.570 +  // acquire the bias of the object using an atomic operation. If this
   1.571 +  // fails we will go in to the runtime to revoke the object's bias.
   1.572 +  // Note that we first construct the presumed unbiased header so we
   1.573 +  // don't accidentally blow away another thread's valid bias.
   1.574 +
   1.575 +  ld_ptr(swap_reg, saved_mark_addr);
   1.576 +
   1.577 +  move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
   1.578 +  andr(swap_reg, swap_reg, AT);
   1.579 +
   1.580 +  if (need_tmp_reg) {
   1.581 +    push(tmp_reg);
   1.582 +  }
   1.583 +  get_thread(tmp_reg);
   1.584 +  orr(tmp_reg, tmp_reg, swap_reg);
   1.585 +  //if (os::is_MP()) {
   1.586 +  //  sync();
   1.587 +  //}
   1.588 +  cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
   1.589 +  if (need_tmp_reg) {
   1.590 +    pop(tmp_reg);
   1.591 +  }
   1.592 +  // If the biasing toward our thread failed, this means that
   1.593 +  // another thread succeeded in biasing it toward itself and we
   1.594 +  // need to revoke that bias. The revocation will occur in the
   1.595 +  // interpreter runtime in the slow case.
   1.596 +  if (PrintBiasedLockingStatistics) {
   1.597 +    Label L;
   1.598 +    bne(AT, R0, L);
   1.599 +    delayed()->nop();
   1.600 +    push(tmp_reg);
   1.601 +    push(A0);
   1.602 +    atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg);
   1.603 +    pop(A0);
   1.604 +    pop(tmp_reg);
   1.605 +    bind(L);
   1.606 +  }
   1.607 +  if (slow_case != NULL) {
   1.608 +    beq_far(AT, R0, *slow_case);
   1.609 +    delayed()->nop();
   1.610 +  }
   1.611 +  b(done);
   1.612 +  delayed()->nop();
   1.613 +
   1.614 +  bind(try_rebias);
   1.615 +  // At this point we know the epoch has expired, meaning that the
   1.616 +  // current "bias owner", if any, is actually invalid. Under these
   1.617 +  // circumstances _only_, we are allowed to use the current header's
   1.618 +  // value as the comparison value when doing the cas to acquire the
   1.619 +  // bias in the current epoch. In other words, we allow transfer of
   1.620 +  // the bias from one thread to another directly in this situation.
   1.621 +  //
   1.622 +  // FIXME: due to a lack of registers we currently blow away the age
   1.623 +  // bits in this situation. Should attempt to preserve them.
   1.624 +  if (need_tmp_reg) {
   1.625 +    push(tmp_reg);
   1.626 +  }
   1.627 +  load_prototype_header(tmp_reg, obj_reg);
   1.628 +  get_thread(swap_reg);
   1.629 +  orr(tmp_reg, tmp_reg, swap_reg);
   1.630 +  ld_ptr(swap_reg, saved_mark_addr);
   1.631 +
   1.632 +  //if (os::is_MP()) {
   1.633 +  //  sync();
   1.634 +  //}
   1.635 +  cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
   1.636 +  if (need_tmp_reg) {
   1.637 +    pop(tmp_reg);
   1.638 +  }
   1.639 +  // If the biasing toward our thread failed, then another thread
   1.640 +  // succeeded in biasing it toward itself and we need to revoke that
   1.641 +  // bias. The revocation will occur in the runtime in the slow case.
   1.642 +  if (PrintBiasedLockingStatistics) {
   1.643 +    Label L;
   1.644 +    bne(AT, R0, L);
   1.645 +    delayed()->nop();
   1.646 +    push(AT);
   1.647 +    push(tmp_reg);
   1.648 +    atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg);
   1.649 +    pop(tmp_reg);
   1.650 +    pop(AT);
   1.651 +    bind(L);
   1.652 +  }
   1.653 +  if (slow_case != NULL) {
   1.654 +    beq_far(AT, R0, *slow_case);
   1.655 +    delayed()->nop();
   1.656 +  }
   1.657 +
   1.658 +  b(done);
   1.659 +  delayed()->nop();
   1.660 +  bind(try_revoke_bias);
   1.661 +  // The prototype mark in the klass doesn't have the bias bit set any
   1.662 +  // more, indicating that objects of this data type are not supposed
   1.663 +  // to be biased any more. We are going to try to reset the mark of
   1.664 +  // this object to the prototype value and fall through to the
   1.665 +  // CAS-based locking scheme. Note that if our CAS fails, it means
   1.666 +  // that another thread raced us for the privilege of revoking the
   1.667 +  // bias of this particular object, so it's okay to continue in the
   1.668 +  // normal locking code.
   1.669 +  //
   1.670 +  // FIXME: due to a lack of registers we currently blow away the age
   1.671 +  // bits in this situation. Should attempt to preserve them.
   1.672 +  ld_ptr(swap_reg, saved_mark_addr);
   1.673 +
   1.674 +  if (need_tmp_reg) {
   1.675 +    push(tmp_reg);
   1.676 +  }
   1.677 +  load_prototype_header(tmp_reg, obj_reg);
   1.678 +  //if (os::is_MP()) {
   1.679 +  // lock();
   1.680 +  //}
   1.681 +  cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
   1.682 +  if (need_tmp_reg) {
   1.683 +    pop(tmp_reg);
   1.684 +  }
   1.685 +  // Fall through to the normal CAS-based lock, because no matter what
   1.686 +  // the result of the above CAS, some thread must have succeeded in
   1.687 +  // removing the bias bit from the object's header.
   1.688 +  if (PrintBiasedLockingStatistics) {
   1.689 +    Label L;
   1.690 +    bne(AT, R0, L);
   1.691 +    delayed()->nop();
   1.692 +    push(AT);
   1.693 +    push(tmp_reg);
   1.694 +    atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg);
   1.695 +    pop(tmp_reg);
   1.696 +    pop(AT);
   1.697 +    bind(L);
   1.698 +  }
   1.699 +
   1.700 +  bind(cas_label);
   1.701 +  return null_check_offset;
   1.702 +}
   1.703 +
   1.704 +void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
   1.705 +  assert(UseBiasedLocking, "why call this otherwise?");
   1.706 +
   1.707 +  // Check for biased locking unlock case, which is a no-op
   1.708 +  // Note: we do not have to check the thread ID for two reasons.
   1.709 +  // First, the interpreter checks for IllegalMonitorStateException at
   1.710 +  // a higher level. Second, if the bias was revoked while we held the
   1.711 +  // lock, the object could not be rebiased toward another thread, so
   1.712 +  // the bias bit would be clear.
   1.713 +#ifdef _LP64
   1.714 +  ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
   1.715 +  andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
   1.716 +  daddi(AT, R0, markOopDesc::biased_lock_pattern);
   1.717 +#else
   1.718 +  lw(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
   1.719 +  andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
   1.720 +  addi(AT, R0, markOopDesc::biased_lock_pattern);
   1.721 +#endif
   1.722 +
   1.723 +  beq(AT, temp_reg, done);
   1.724 +  delayed()->nop();
   1.725 +}
   1.726 +
   1.727 +// NOTE: we dont increment the SP after call like the x86 version, maybe this is a problem, FIXME.
   1.728 +// the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf
   1.729 +// this method will handle the stack problem, you need not to preserve the stack space for the argument now
   1.730 +void MacroAssembler::call_VM_leaf_base(address entry_point,
   1.731 +    int number_of_arguments) {
   1.732 +  //call(RuntimeAddress(entry_point));
   1.733 +  //increment(rsp, number_of_arguments * wordSize);
   1.734 +  Label L, E;
   1.735 +
   1.736 +  assert(number_of_arguments <= 4, "just check");
   1.737 +
   1.738 +  andi(AT, SP, 0xf);
   1.739 +  beq(AT, R0, L);
   1.740 +  delayed()->nop();
   1.741 +  daddi(SP, SP, -8);
   1.742 +  call(entry_point, relocInfo::runtime_call_type);
   1.743 +  delayed()->nop();
   1.744 +  daddi(SP, SP, 8);
   1.745 +  b(E);
   1.746 +  delayed()->nop();
   1.747 +
   1.748 +  bind(L);
   1.749 +  call(entry_point, relocInfo::runtime_call_type);
   1.750 +  delayed()->nop();
   1.751 +  bind(E);
   1.752 +}
   1.753 +
   1.754 +
   1.755 +void MacroAssembler::jmp(address entry) {
   1.756 +  patchable_set48(T9, (long)entry);
   1.757 +  jr(T9);
   1.758 +}
   1.759 +
   1.760 +void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) {
   1.761 +  switch (rtype) {
   1.762 +    case relocInfo::runtime_call_type:
   1.763 +    case relocInfo::none:
   1.764 +      jmp(entry);
   1.765 +      break;
   1.766 +    default:
   1.767 +      {
   1.768 +      InstructionMark im(this);
   1.769 +      relocate(rtype);
   1.770 +      patchable_set48(T9, (long)entry);
   1.771 +      jr(T9);
   1.772 +      }
   1.773 +      break;
   1.774 +  }
   1.775 +}
   1.776 +
   1.777 +void MacroAssembler::call(address entry) {
   1.778 +// c/c++ code assume T9 is entry point, so we just always move entry to t9
   1.779 +// maybe there is some more graceful method to handle this. FIXME
   1.780 +// For more info, see class NativeCall.
   1.781 +#ifndef _LP64
   1.782 +  move(T9, (int)entry);
   1.783 +#else
   1.784 +  patchable_set48(T9, (long)entry);
   1.785 +#endif
   1.786 +  jalr(T9);
   1.787 +}
   1.788 +
   1.789 +void MacroAssembler::call(address entry, relocInfo::relocType rtype) {
   1.790 +  switch (rtype) {
   1.791 +    case relocInfo::runtime_call_type:
   1.792 +    case relocInfo::none:
   1.793 +      call(entry);
   1.794 +      break;
   1.795 +    default:
   1.796 +      {
   1.797 +  InstructionMark im(this);
   1.798 +  relocate(rtype);
   1.799 +  call(entry);
   1.800 +      }
   1.801 +      break;
   1.802 +  }
   1.803 +}
   1.804 +
   1.805 +void MacroAssembler::call(address entry, RelocationHolder& rh)
   1.806 +{
   1.807 +  switch (rh.type()) {
   1.808 +    case relocInfo::runtime_call_type:
   1.809 +    case relocInfo::none:
   1.810 +      call(entry);
   1.811 +      break;
   1.812 +    default:
   1.813 +      {
   1.814 +  InstructionMark im(this);
   1.815 +  relocate(rh);
   1.816 +  call(entry);
   1.817 +      }
   1.818 +      break;
   1.819 +  }
   1.820 +}
   1.821 +
   1.822 +void MacroAssembler::ic_call(address entry) {
   1.823 +  RelocationHolder rh = virtual_call_Relocation::spec(pc());
   1.824 +  patchable_set48(IC_Klass, (long)Universe::non_oop_word());
   1.825 +  assert(entry != NULL, "call most probably wrong");
   1.826 +  InstructionMark im(this);
   1.827 +  relocate(rh);
   1.828 +        patchable_call(entry);
   1.829 +}
   1.830 +
   1.831 +void MacroAssembler::c2bool(Register r) {
   1.832 +  Label L;
   1.833 +  Assembler::beq(r, R0, L);
   1.834 +  delayed()->nop();
   1.835 +  move(r, 1);
   1.836 +  bind(L);
   1.837 +}
   1.838 +
   1.839 +#ifndef PRODUCT
   1.840 +extern "C" void findpc(intptr_t x);
   1.841 +#endif
   1.842 +
   1.843 +void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
   1.844 +  // In order to get locks to work, we need to fake a in_VM state
   1.845 +  JavaThread* thread = JavaThread::current();
   1.846 +  JavaThreadState saved_state = thread->thread_state();
   1.847 +  thread->set_thread_state(_thread_in_vm);
   1.848 +  if (ShowMessageBoxOnError) {
   1.849 +    JavaThread* thread = JavaThread::current();
   1.850 +    JavaThreadState saved_state = thread->thread_state();
   1.851 +    thread->set_thread_state(_thread_in_vm);
   1.852 +    if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
   1.853 +      ttyLocker ttyl;
   1.854 +      BytecodeCounter::print();
   1.855 +    }
   1.856 +    // To see where a verify_oop failed, get $ebx+40/X for this frame.
   1.857 +    // This is the value of eip which points to where verify_oop will return.
   1.858 +    if (os::message_box(msg, "Execution stopped, print registers?")) {
   1.859 +      ttyLocker ttyl;
   1.860 +      tty->print_cr("eip = 0x%08x", eip);
   1.861 +#ifndef PRODUCT
   1.862 +      tty->cr();
   1.863 +      findpc(eip);
   1.864 +      tty->cr();
   1.865 +#endif
   1.866 +      tty->print_cr("rax, = 0x%08x", rax);
   1.867 +      tty->print_cr("rbx, = 0x%08x", rbx);
   1.868 +      tty->print_cr("rcx = 0x%08x", rcx);
   1.869 +      tty->print_cr("rdx = 0x%08x", rdx);
   1.870 +      tty->print_cr("rdi = 0x%08x", rdi);
   1.871 +      tty->print_cr("rsi = 0x%08x", rsi);
   1.872 +      tty->print_cr("rbp, = 0x%08x", rbp);
   1.873 +      tty->print_cr("rsp = 0x%08x", rsp);
   1.874 +      BREAKPOINT;
   1.875 +    }
   1.876 +  } else {
   1.877 +    ttyLocker ttyl;
   1.878 +    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
   1.879 +    assert(false, "DEBUG MESSAGE");
   1.880 +  }
   1.881 +  ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
   1.882 +}
   1.883 +
   1.884 +void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) {
   1.885 +  if ( ShowMessageBoxOnError ) {
   1.886 +    JavaThreadState saved_state = JavaThread::current()->thread_state();
   1.887 +    JavaThread::current()->set_thread_state(_thread_in_vm);
   1.888 +    {
   1.889 +      // In order to get locks work, we need to fake a in_VM state
   1.890 +      ttyLocker ttyl;
   1.891 +      ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
   1.892 +      if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
   1.893 +  BytecodeCounter::print();
   1.894 +      }
   1.895 +
   1.896 +      //      if (os::message_box(msg, "Execution stopped, print registers?"))
   1.897 +      //        regs->print(::tty);
   1.898 +    }
   1.899 +    ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
   1.900 +  }
   1.901 +  else
   1.902 +    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
   1.903 +}
   1.904 +
   1.905 +
   1.906 +void MacroAssembler::stop(const char* msg) {
   1.907 +  li(A0, (long)msg);
   1.908 +#ifndef _LP64
   1.909 +  //reserver space for argument. added by yjl 7/10/2005
   1.910 +  addiu(SP, SP, - 1 * wordSize);
   1.911 +#endif
   1.912 +  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
   1.913 +  delayed()->nop();
   1.914 +#ifndef _LP64
   1.915 +  //restore space for argument
   1.916 +  addiu(SP, SP, 1 * wordSize);
   1.917 +#endif
   1.918 +  brk(17);
   1.919 +}
   1.920 +
   1.921 +void MacroAssembler::warn(const char* msg) {
   1.922 +#ifdef _LP64
   1.923 +  pushad();
   1.924 +  li(A0, (long)msg);
   1.925 +  push(S2);
   1.926 +  move(AT, -(StackAlignmentInBytes));
   1.927 +  move(S2, SP);     // use S2 as a sender SP holder
   1.928 +  andr(SP, SP, AT); // align stack as required by ABI
   1.929 +  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
   1.930 +  delayed()->nop();
   1.931 +  move(SP, S2);     // use S2 as a sender SP holder
   1.932 +  pop(S2);
   1.933 +  popad();
   1.934 +#else
   1.935 +  pushad();
   1.936 +  addi(SP, SP, -4);
   1.937 +  sw(A0, SP, -1 * wordSize);
   1.938 +  li(A0, (long)msg);
   1.939 +  addi(SP, SP, -1 * wordSize);
   1.940 +  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
   1.941 +  delayed()->nop();
   1.942 +  addi(SP, SP, 1 * wordSize);
   1.943 +  lw(A0, SP, -1 * wordSize);
   1.944 +  addi(SP, SP, 4);
   1.945 +  popad();
   1.946 +#endif
   1.947 +}
   1.948 +
   1.949 +void MacroAssembler::print_reg(Register reg) {
   1.950 +/*
   1.951 +char *s = getenv("PRINT_REG");
   1.952 +if (s == NULL)
   1.953 +  return;
   1.954 +if (strcmp(s, "1") != 0)
   1.955 +  return;
   1.956 +*/
   1.957 +  void * cur_pc = pc();
   1.958 +  pushad();
   1.959 +  NOT_LP64(push(FP);)
   1.960 +
   1.961 +  li(A0, (long)reg->name());
   1.962 +  if (reg == SP)
   1.963 +    addiu(A1, SP, wordSize * 23); //23 registers saved in pushad()
   1.964 +  else if (reg == A0)
   1.965 +    ld(A1, SP, wordSize * 19); //A0 has been modified by li(A0, (long)reg->name()). Ugly Code!
   1.966 +  else
   1.967 +    move(A1, reg);
   1.968 +  li(A2, (long)cur_pc);
   1.969 +  push(S2);
   1.970 +  move(AT, -(StackAlignmentInBytes));
   1.971 +  move(S2, SP);     // use S2 as a sender SP holder
   1.972 +  andr(SP, SP, AT); // align stack as required by ABI
   1.973 +  call(CAST_FROM_FN_PTR(address, SharedRuntime::print_reg_with_pc),relocInfo::runtime_call_type);
   1.974 +  delayed()->nop();
   1.975 +  move(SP, S2);     // use S2 as a sender SP holder
   1.976 +  pop(S2);
   1.977 +  NOT_LP64(pop(FP);)
   1.978 +  popad();
   1.979 +
   1.980 +/*
   1.981 +  pushad();
   1.982 +#ifdef _LP64
   1.983 +  if (reg == SP)
   1.984 +    addiu(A0, SP, wordSize * 23); //23 registers saved in pushad()
   1.985 +  else
   1.986 +    move(A0, reg);
   1.987 +  call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type);
   1.988 +  delayed()->nop();
   1.989 +#else
   1.990 +  push(FP);
   1.991 +  move(A0, reg);
   1.992 +  dsrl32(A1, reg, 0);
   1.993 +  //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_int),relocInfo::runtime_call_type);
   1.994 +  call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type);
   1.995 +  delayed()->nop();
   1.996 +  pop(FP);
   1.997 +#endif
   1.998 +  popad();
   1.999 +  pushad();
  1.1000 +  NOT_LP64(push(FP);)
  1.1001 +  char b[50];
  1.1002 +  sprintf((char *)b, " pc: %p\n",cur_pc);
  1.1003 +  li(A0, (long)(char *)b);
  1.1004 +  call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
  1.1005 +  delayed()->nop();
  1.1006 +  NOT_LP64(pop(FP);)
  1.1007 +  popad();
  1.1008 +*/
  1.1009 +}
  1.1010 +
  1.1011 +void MacroAssembler::print_reg(FloatRegister reg) {
  1.1012 +  void * cur_pc = pc();
  1.1013 +  pushad();
  1.1014 +  NOT_LP64(push(FP);)
  1.1015 +  li(A0, (long)reg->name());
  1.1016 +  push(S2);
  1.1017 +  move(AT, -(StackAlignmentInBytes));
  1.1018 +  move(S2, SP);     // use S2 as a sender SP holder
  1.1019 +  andr(SP, SP, AT); // align stack as required by ABI
  1.1020 +  call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
  1.1021 +  delayed()->nop();
  1.1022 +  move(SP, S2);     // use S2 as a sender SP holder
  1.1023 +  pop(S2);
  1.1024 +  NOT_LP64(pop(FP);)
  1.1025 +  popad();
  1.1026 +
  1.1027 +  pushad();
  1.1028 +  NOT_LP64(push(FP);)
  1.1029 +#if 1
  1.1030 +  move(FP, SP);
  1.1031 +  move(AT, -(StackAlignmentInBytes));
  1.1032 +  andr(SP , SP , AT);
  1.1033 +  mov_d(F12, reg);
  1.1034 +  call(CAST_FROM_FN_PTR(address, SharedRuntime::print_double),relocInfo::runtime_call_type);
  1.1035 +  delayed()->nop();
  1.1036 +  move(SP, FP);
  1.1037 +#else
  1.1038 +  mov_s(F12, reg);
  1.1039 +  //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_float),relocInfo::runtime_call_type);
  1.1040 +  //delayed()->nop();
  1.1041 +#endif
  1.1042 +  NOT_LP64(pop(FP);)
  1.1043 +  popad();
  1.1044 +
  1.1045 +#if 0
  1.1046 +  pushad();
  1.1047 +  NOT_LP64(push(FP);)
  1.1048 +  char* b = new char[50];
  1.1049 +  sprintf(b, " pc: %p\n", cur_pc);
  1.1050 +  li(A0, (long)b);
  1.1051 +  call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
  1.1052 +  delayed()->nop();
  1.1053 +  NOT_LP64(pop(FP);)
  1.1054 +  popad();
  1.1055 +#endif
  1.1056 +}
  1.1057 +
  1.1058 +void MacroAssembler::increment(Register reg, int imm) {
  1.1059 +  if (!imm) return;
  1.1060 +  if (is_simm16(imm)) {
  1.1061 +#ifdef _LP64
  1.1062 +    daddiu(reg, reg, imm);
  1.1063 +#else
  1.1064 +    addiu(reg, reg, imm);
  1.1065 +#endif
  1.1066 +  } else {
  1.1067 +    move(AT, imm);
  1.1068 +#ifdef _LP64
  1.1069 +    daddu(reg, reg, AT);
  1.1070 +#else
  1.1071 +    addu(reg, reg, AT);
  1.1072 +#endif
  1.1073 +  }
  1.1074 +}
  1.1075 +
  1.1076 +void MacroAssembler::decrement(Register reg, int imm) {
  1.1077 +  increment(reg, -imm);
  1.1078 +}
  1.1079 +
  1.1080 +
  1.1081 +void MacroAssembler::call_VM(Register oop_result,
  1.1082 +                             address entry_point,
  1.1083 +                             bool check_exceptions) {
  1.1084 +  call_VM_helper(oop_result, entry_point, 0, check_exceptions);
  1.1085 +}
  1.1086 +
  1.1087 +void MacroAssembler::call_VM(Register oop_result,
  1.1088 +                             address entry_point,
  1.1089 +                             Register arg_1,
  1.1090 +                             bool check_exceptions) {
  1.1091 +  if (arg_1!=A1) move(A1, arg_1);
  1.1092 +  call_VM_helper(oop_result, entry_point, 1, check_exceptions);
  1.1093 +}
  1.1094 +
  1.1095 +void MacroAssembler::call_VM(Register oop_result,
  1.1096 +                             address entry_point,
  1.1097 +                             Register arg_1,
  1.1098 +                             Register arg_2,
  1.1099 +                             bool check_exceptions) {
  1.1100 +  if (arg_1!=A1) move(A1, arg_1);
  1.1101 +  if (arg_2!=A2) move(A2, arg_2);
  1.1102 +  assert(arg_2 != A1, "smashed argument");
  1.1103 +  call_VM_helper(oop_result, entry_point, 2, check_exceptions);
  1.1104 +}
  1.1105 +
  1.1106 +void MacroAssembler::call_VM(Register oop_result,
  1.1107 +                             address entry_point,
  1.1108 +                             Register arg_1,
  1.1109 +                             Register arg_2,
  1.1110 +                             Register arg_3,
  1.1111 +                             bool check_exceptions) {
  1.1112 +  if (arg_1!=A1) move(A1, arg_1);
  1.1113 +  if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
  1.1114 +  if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
  1.1115 +  call_VM_helper(oop_result, entry_point, 3, check_exceptions);
  1.1116 +}
  1.1117 +
  1.1118 +void MacroAssembler::call_VM(Register oop_result,
  1.1119 +                             Register last_java_sp,
  1.1120 +                             address entry_point,
  1.1121 +                             int number_of_arguments,
  1.1122 +                             bool check_exceptions) {
  1.1123 +  call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions);
  1.1124 +}
  1.1125 +
  1.1126 +void MacroAssembler::call_VM(Register oop_result,
  1.1127 +                             Register last_java_sp,
  1.1128 +                             address entry_point,
  1.1129 +                             Register arg_1,
  1.1130 +                             bool check_exceptions) {
  1.1131 +  if (arg_1 != A1) move(A1, arg_1);
  1.1132 +  call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
  1.1133 +}
  1.1134 +
  1.1135 +void MacroAssembler::call_VM(Register oop_result,
  1.1136 +                             Register last_java_sp,
  1.1137 +                             address entry_point,
  1.1138 +                             Register arg_1,
  1.1139 +                             Register arg_2,
  1.1140 +                             bool check_exceptions) {
  1.1141 +  if (arg_1 != A1) move(A1, arg_1);
  1.1142 +  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
  1.1143 +  call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
  1.1144 +}
  1.1145 +
  1.1146 +void MacroAssembler::call_VM(Register oop_result,
  1.1147 +                             Register last_java_sp,
  1.1148 +                             address entry_point,
  1.1149 +                             Register arg_1,
  1.1150 +                             Register arg_2,
  1.1151 +                             Register arg_3,
  1.1152 +                             bool check_exceptions) {
  1.1153 +  if (arg_1 != A1) move(A1, arg_1);
  1.1154 +  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
  1.1155 +  if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
  1.1156 +  call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
  1.1157 +}
  1.1158 +
  1.1159 +void MacroAssembler::call_VM_base(Register oop_result,
  1.1160 +                                  Register java_thread,
  1.1161 +                                  Register last_java_sp,
  1.1162 +                                  address  entry_point,
  1.1163 +                                  int      number_of_arguments,
  1.1164 +          bool     check_exceptions) {
  1.1165 +
  1.1166 +  address before_call_pc;
  1.1167 +  // determine java_thread register
  1.1168 +  if (!java_thread->is_valid()) {
  1.1169 +#ifndef OPT_THREAD
  1.1170 +    java_thread = T2;
  1.1171 +    get_thread(java_thread);
  1.1172 +#else
  1.1173 +    java_thread = TREG;
  1.1174 +#endif
  1.1175 +  }
  1.1176 +  // determine last_java_sp register
  1.1177 +  if (!last_java_sp->is_valid()) {
  1.1178 +    last_java_sp = SP;
  1.1179 +  }
  1.1180 +  // debugging support
  1.1181 +  assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
  1.1182 +  assert(number_of_arguments <= 4   , "cannot have negative number of arguments");
  1.1183 +  assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
  1.1184 +  assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
  1.1185 +
  1.1186 +  assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save ebp");
  1.1187 +
  1.1188 +  // set last Java frame before call
  1.1189 +  before_call_pc = (address)pc();
  1.1190 +  set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc);
  1.1191 +
  1.1192 +  // do the call
  1.1193 +  move(A0, java_thread);
  1.1194 +  call(entry_point, relocInfo::runtime_call_type);
  1.1195 +  delayed()->nop();
  1.1196 +
  1.1197 +  // restore the thread (cannot use the pushed argument since arguments
  1.1198 +  // may be overwritten by C code generated by an optimizing compiler);
  1.1199 +  // however can use the register value directly if it is callee saved.
  1.1200 +#ifndef OPT_THREAD
  1.1201 +  if (java_thread >=S0 && java_thread <=S7) {
  1.1202 +#ifdef ASSERT
  1.1203 +    { Label L;
  1.1204 +      get_thread(AT);
  1.1205 +      beq(java_thread, AT, L);
  1.1206 +      delayed()->nop();
  1.1207 +      stop("MacroAssembler::call_VM_base: edi not callee saved?");
  1.1208 +      bind(L);
  1.1209 +    }
  1.1210 +#endif
  1.1211 +  } else {
  1.1212 +    get_thread(java_thread);
  1.1213 +  }
  1.1214 +#endif
  1.1215 +
  1.1216 +  // discard thread and arguments
  1.1217 +  ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
  1.1218 +  // reset last Java frame
  1.1219 +  reset_last_Java_frame(java_thread, false, true);
  1.1220 +
  1.1221 +  check_and_handle_popframe(java_thread);
  1.1222 +  check_and_handle_earlyret(java_thread);
  1.1223 +  if (check_exceptions) {
  1.1224 +    // check for pending exceptions (java_thread is set upon return)
  1.1225 +    Label L;
  1.1226 +#ifdef _LP64
  1.1227 +    ld(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
  1.1228 +#else
  1.1229 +    lw(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
  1.1230 +#endif
  1.1231 +    beq(AT, R0, L);
  1.1232 +    delayed()->nop();
  1.1233 +    li(AT, before_call_pc);
  1.1234 +    push(AT);
  1.1235 +    jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
  1.1236 +    delayed()->nop();
  1.1237 +    bind(L);
  1.1238 +  }
  1.1239 +
  1.1240 +  // get oop result if there is one and reset the value in the thread
  1.1241 +  if (oop_result->is_valid()) {
  1.1242 +#ifdef _LP64
  1.1243 +    ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
  1.1244 +    sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
  1.1245 +#else
  1.1246 +    lw(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
  1.1247 +    sw(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
  1.1248 +#endif
  1.1249 +    verify_oop(oop_result);
  1.1250 +  }
  1.1251 +}
  1.1252 +
  1.1253 +void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
  1.1254 +
  1.1255 +  move(V0, SP);
  1.1256 +  //we also reserve space for java_thread here
  1.1257 +#ifndef _LP64
  1.1258 +  daddi(SP, SP, (1 + number_of_arguments) * (- wordSize));
  1.1259 +#endif
  1.1260 +  move(AT, -(StackAlignmentInBytes));
  1.1261 +  andr(SP, SP, AT);
  1.1262 +  call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions);
  1.1263 +
  1.1264 +}
  1.1265 +
  1.1266 +void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
  1.1267 +  call_VM_leaf_base(entry_point, number_of_arguments);
  1.1268 +}
  1.1269 +
  1.1270 +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
  1.1271 +  if (arg_0 != A0) move(A0, arg_0);
  1.1272 +  call_VM_leaf(entry_point, 1);
  1.1273 +}
  1.1274 +
  1.1275 +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
  1.1276 +  if (arg_0 != A0) move(A0, arg_0);
  1.1277 +  if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
  1.1278 +  call_VM_leaf(entry_point, 2);
  1.1279 +}
  1.1280 +
  1.1281 +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
  1.1282 +  if (arg_0 != A0) move(A0, arg_0);
  1.1283 +  if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
  1.1284 +  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument");
  1.1285 +  call_VM_leaf(entry_point, 3);
  1.1286 +}
  1.1287 +void MacroAssembler::super_call_VM_leaf(address entry_point) {
  1.1288 +  MacroAssembler::call_VM_leaf_base(entry_point, 0);
  1.1289 +}
  1.1290 +
  1.1291 +
  1.1292 +void MacroAssembler::super_call_VM_leaf(address entry_point,
  1.1293 +                                                   Register arg_1) {
  1.1294 +  if (arg_1 != A0) move(A0, arg_1);
  1.1295 +  MacroAssembler::call_VM_leaf_base(entry_point, 1);
  1.1296 +}
  1.1297 +
  1.1298 +
  1.1299 +void MacroAssembler::super_call_VM_leaf(address entry_point,
  1.1300 +                                                   Register arg_1,
  1.1301 +                                                   Register arg_2) {
  1.1302 +  if (arg_1 != A0) move(A0, arg_1);
  1.1303 +  if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
  1.1304 +  MacroAssembler::call_VM_leaf_base(entry_point, 2);
  1.1305 +}
  1.1306 +void MacroAssembler::super_call_VM_leaf(address entry_point,
  1.1307 +                                                   Register arg_1,
  1.1308 +                                                   Register arg_2,
  1.1309 +                                                   Register arg_3) {
  1.1310 +  if (arg_1 != A0) move(A0, arg_1);
  1.1311 +  if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
  1.1312 +  if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument");
  1.1313 +  MacroAssembler::call_VM_leaf_base(entry_point, 3);
  1.1314 +}
  1.1315 +
  1.1316 +void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
  1.1317 +}
  1.1318 +
  1.1319 +void MacroAssembler::check_and_handle_popframe(Register java_thread) {
  1.1320 +}
  1.1321 +
  1.1322 +void MacroAssembler::null_check(Register reg, int offset) {
  1.1323 +  if (needs_explicit_null_check(offset)) {
  1.1324 +    // provoke OS NULL exception if reg = NULL by
  1.1325 +    // accessing M[reg] w/o changing any (non-CC) registers
  1.1326 +    // NOTE: cmpl is plenty here to provoke a segv
  1.1327 +    lw(AT, reg, 0);
  1.1328 +/* Jin
  1.1329 +    nop();
  1.1330 +    nop();
  1.1331 +    nop();
  1.1332 +*/
  1.1333 +    // Note: should probably use testl(rax, Address(reg, 0));
  1.1334 +    //       may be shorter code (however, this version of
  1.1335 +    //       testl needs to be implemented first)
  1.1336 +  } else {
  1.1337 +    // nothing to do, (later) access of M[reg + offset]
  1.1338 +    // will provoke OS NULL exception if reg = NULL
  1.1339 +  }
  1.1340 +}
  1.1341 +
  1.1342 +void MacroAssembler::enter() {
  1.1343 +  push2(RA, FP);
  1.1344 +  move(FP, SP);
  1.1345 +}
  1.1346 +
  1.1347 +void MacroAssembler::leave() {
  1.1348 +#ifndef _LP64
  1.1349 +  //move(SP, FP);
  1.1350 +  //pop2(FP, RA);
  1.1351 +  addi(SP, FP, 2 * wordSize);
  1.1352 +  lw(RA, SP, - 1 * wordSize);
  1.1353 +  lw(FP, SP, - 2 * wordSize);
  1.1354 +#else
  1.1355 +  daddi(SP, FP, 2 * wordSize);
  1.1356 +  ld(RA, SP, - 1 * wordSize);
  1.1357 +  ld(FP, SP, - 2 * wordSize);
  1.1358 +#endif
  1.1359 +}
  1.1360 +/*
  1.1361 +void MacroAssembler::os_breakpoint() {
  1.1362 +  // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
  1.1363 +  // (e.g., MSVC can't call ps() otherwise)
  1.1364 +  call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
  1.1365 +}
  1.1366 +*/
  1.1367 +void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) {
  1.1368 +  // determine java_thread register
  1.1369 +  if (!java_thread->is_valid()) {
  1.1370 +#ifndef OPT_THREAD
  1.1371 +    java_thread = T1;
  1.1372 +    get_thread(java_thread);
  1.1373 +#else
  1.1374 +    java_thread = TREG;
  1.1375 +#endif
  1.1376 +  }
  1.1377 +  // we must set sp to zero to clear frame
  1.1378 +  st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
  1.1379 +  // must clear fp, so that compiled frames are not confused; it is possible
  1.1380 +  // that we need it only for debugging
  1.1381 +  if(clear_fp)
  1.1382 +    st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
  1.1383 +
  1.1384 +  if (clear_pc)
  1.1385 +    st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
  1.1386 +}
  1.1387 +
  1.1388 +void MacroAssembler::reset_last_Java_frame(bool clear_fp,
  1.1389 +                                           bool clear_pc) {
  1.1390 +  Register thread = TREG;
  1.1391 +#ifndef OPT_THREAD
  1.1392 +  get_thread(thread);
  1.1393 +#endif
  1.1394 +  // we must set sp to zero to clear frame
  1.1395 +  sd(R0, Address(thread, JavaThread::last_Java_sp_offset()));
  1.1396 +  // must clear fp, so that compiled frames are not confused; it is
  1.1397 +  // possible that we need it only for debugging
  1.1398 +  if (clear_fp) {
  1.1399 +    sd(R0, Address(thread, JavaThread::last_Java_fp_offset()));
  1.1400 +  }
  1.1401 +
  1.1402 +  if (clear_pc) {
  1.1403 +    sd(R0, Address(thread, JavaThread::last_Java_pc_offset()));
  1.1404 +  }
  1.1405 +}
  1.1406 +
  1.1407 +// Write serialization page so VM thread can do a pseudo remote membar.
  1.1408 +// We use the current thread pointer to calculate a thread specific
  1.1409 +// offset to write to within the page. This minimizes bus traffic
  1.1410 +// due to cache line collision.
  1.1411 +void MacroAssembler::serialize_memory(Register thread, Register tmp) {
  1.1412 +  move(tmp, thread);
  1.1413 +  srl(tmp, tmp,os::get_serialize_page_shift_count());
  1.1414 +  move(AT, (os::vm_page_size() - sizeof(int)));
  1.1415 +  andr(tmp, tmp,AT);
  1.1416 +  sw(tmp,Address(tmp, (intptr_t)os::get_memory_serialize_page()));
  1.1417 +}
  1.1418 +
  1.1419 +// Calls to C land
  1.1420 +//
  1.1421 +// When entering C land, the rbp, & rsp of the last Java frame have to be recorded
  1.1422 +// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
  1.1423 +// has to be reset to 0. This is required to allow proper stack traversal.
  1.1424 +void MacroAssembler::set_last_Java_frame(Register java_thread,
  1.1425 +                                         Register last_java_sp,
  1.1426 +                                         Register last_java_fp,
  1.1427 +                                         address  last_java_pc) {
  1.1428 +  // determine java_thread register
  1.1429 +  if (!java_thread->is_valid()) {
  1.1430 +#ifndef OPT_THREAD
  1.1431 +    java_thread = T2;
  1.1432 +    get_thread(java_thread);
  1.1433 +#else
  1.1434 +    java_thread = TREG;
  1.1435 +#endif
  1.1436 +  }
  1.1437 +  // determine last_java_sp register
  1.1438 +  if (!last_java_sp->is_valid()) {
  1.1439 +    last_java_sp = SP;
  1.1440 +  }
  1.1441 +
  1.1442 +  // last_java_fp is optional
  1.1443 +
  1.1444 +  if (last_java_fp->is_valid()) {
  1.1445 +    st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
  1.1446 +  }
  1.1447 +
  1.1448 +  // last_java_pc is optional
  1.1449 +
  1.1450 +  if (last_java_pc != NULL) {
  1.1451 +    relocate(relocInfo::internal_pc_type);
  1.1452 +    patchable_set48(AT, (long)last_java_pc);
  1.1453 +    st_ptr(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
  1.1454 +  }
  1.1455 +  st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
  1.1456 +}
  1.1457 +
  1.1458 +void MacroAssembler::set_last_Java_frame(Register last_java_sp,
  1.1459 +                                         Register last_java_fp,
  1.1460 +                                         address  last_java_pc) {
  1.1461 +  // determine last_java_sp register
  1.1462 +  if (!last_java_sp->is_valid()) {
  1.1463 +    last_java_sp = SP;
  1.1464 +  }
  1.1465 +
  1.1466 +  Register thread = TREG;
  1.1467 +#ifndef OPT_THREAD
  1.1468 +  get_thread(thread);
  1.1469 +#endif
  1.1470 +  // last_java_fp is optional
  1.1471 +  if (last_java_fp->is_valid()) {
  1.1472 +    sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset()));
  1.1473 +  }
  1.1474 +
  1.1475 +  // last_java_pc is optional
  1.1476 +  if (last_java_pc != NULL) {
  1.1477 +    Address java_pc(thread,
  1.1478 +                    JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
  1.1479 +    li(AT, (intptr_t)(last_java_pc));
  1.1480 +    sd(AT, java_pc);
  1.1481 +  }
  1.1482 +
  1.1483 +  sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset()));
  1.1484 +}
  1.1485 +
  1.1486 +//////////////////////////////////////////////////////////////////////////////////
  1.1487 +#if INCLUDE_ALL_GCS
  1.1488 +
  1.1489 +void MacroAssembler::g1_write_barrier_pre(Register obj,
  1.1490 +#ifndef _LP64
  1.1491 +                                          Register thread,
  1.1492 +#endif
  1.1493 +                                          Register tmp,
  1.1494 +                                          Register tmp2,
  1.1495 +                                          bool tosca_live) {
  1.1496 +  Unimplemented();
  1.1497 +}
  1.1498 +
  1.1499 +void MacroAssembler::g1_write_barrier_post(Register store_addr,
  1.1500 +                                           Register new_val,
  1.1501 +#ifndef _LP64
  1.1502 +                                           Register thread,
  1.1503 +#endif
  1.1504 +                                           Register tmp,
  1.1505 +                                           Register tmp2) {
  1.1506 +
  1.1507 +  Unimplemented();
  1.1508 +}
  1.1509 +
  1.1510 +#endif // INCLUDE_ALL_GCS
  1.1511 +//////////////////////////////////////////////////////////////////////////////////
  1.1512 +
  1.1513 +
  1.1514 +void MacroAssembler::store_check(Register obj) {
  1.1515 +  // Does a store check for the oop in register obj. The content of
  1.1516 +  // register obj is destroyed afterwards.
  1.1517 +  store_check_part_1(obj);
  1.1518 +  store_check_part_2(obj);
  1.1519 +}
  1.1520 +
  1.1521 +void MacroAssembler::store_check(Register obj, Address dst) {
  1.1522 +  store_check(obj);
  1.1523 +}
  1.1524 +
  1.1525 +
  1.1526 +// split the store check operation so that other instructions can be scheduled inbetween
  1.1527 +void MacroAssembler::store_check_part_1(Register obj) {
  1.1528 +  BarrierSet* bs = Universe::heap()->barrier_set();
  1.1529 +  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
  1.1530 +#ifdef _LP64
  1.1531 +  dsrl(obj, obj, CardTableModRefBS::card_shift);
  1.1532 +#else
  1.1533 +  shr(obj, CardTableModRefBS::card_shift);
  1.1534 +#endif
  1.1535 +}
  1.1536 +
  1.1537 +void MacroAssembler::store_check_part_2(Register obj) {
  1.1538 +  BarrierSet* bs = Universe::heap()->barrier_set();
  1.1539 +  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
  1.1540 +  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
  1.1541 +  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
  1.1542 +
  1.1543 +  li(AT, (long)ct->byte_map_base);
  1.1544 +#ifdef _LP64
  1.1545 +  dadd(AT, AT, obj);
  1.1546 +#else
  1.1547 +  add(AT, AT, obj);
  1.1548 +#endif
  1.1549 +  sb(R0, AT, 0);
  1.1550 +  sync();
  1.1551 +}
  1.1552 +
  1.1553 +// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
  1.1554 +void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
  1.1555 +                                   Register t1, Register t2, Label& slow_case) {
  1.1556 +  assert_different_registers(obj, var_size_in_bytes, t1, t2, AT);
  1.1557 +
  1.1558 +  Register end = t2;
  1.1559 +#ifndef OPT_THREAD
  1.1560 +  Register thread = t1;
  1.1561 +  get_thread(thread);
  1.1562 +#else
  1.1563 +  Register thread = TREG;
  1.1564 +#endif
  1.1565 +  verify_tlab(t1, t2);//blows t1&t2
  1.1566 +
  1.1567 +  ld_ptr(obj, thread, in_bytes(JavaThread::tlab_top_offset()));
  1.1568 +
  1.1569 +  if (var_size_in_bytes == NOREG) {
  1.1570 +    // i dont think we need move con_size_in_bytes to a register first.
  1.1571 +    // by yjl 8/17/2005
  1.1572 +    assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first");
  1.1573 +    addi(end, obj, con_size_in_bytes);
  1.1574 +  } else {
  1.1575 +    add(end, obj, var_size_in_bytes);
  1.1576 +  }
  1.1577 +
  1.1578 +  ld_ptr(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
  1.1579 +  sltu(AT, AT, end);
  1.1580 +  bne_far(AT, R0, slow_case);
  1.1581 +  delayed()->nop();
  1.1582 +
  1.1583 +
  1.1584 +  // update the tlab top pointer
  1.1585 +  st_ptr(end, thread, in_bytes(JavaThread::tlab_top_offset()));
  1.1586 +
  1.1587 +  // recover var_size_in_bytes if necessary
  1.1588 +  /*if (var_size_in_bytes == end) {
  1.1589 +    sub(var_size_in_bytes, end, obj);
  1.1590 +    }*/
  1.1591 +
  1.1592 +  verify_tlab(t1, t2);
  1.1593 +}
  1.1594 +
  1.1595 +// Defines obj, preserves var_size_in_bytes
  1.1596 +void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
  1.1597 +                                   Register t1, Register t2, Label& slow_case) {
  1.1598 +  assert_different_registers(obj, var_size_in_bytes, t1, AT);
  1.1599 +  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq
  1.1600 +    // No allocation in the shared eden.
  1.1601 +    b_far(slow_case);
  1.1602 +    delayed()->nop();
  1.1603 +  } else {
  1.1604 +
  1.1605 +#ifndef _LP64
  1.1606 +    Address heap_top(t1, Assembler::split_low((intptr_t)Universe::heap()->top_addr()));
  1.1607 +    lui(t1, split_high((intptr_t)Universe::heap()->top_addr()));
  1.1608 +#else
  1.1609 +    Address heap_top(t1);
  1.1610 +    li(t1, (long)Universe::heap()->top_addr());
  1.1611 +#endif
  1.1612 +    ld_ptr(obj, heap_top);
  1.1613 +
  1.1614 +    Register end = t2;
  1.1615 +    Label retry;
  1.1616 +
  1.1617 +    bind(retry);
  1.1618 +    if (var_size_in_bytes == NOREG) {
  1.1619 +    // i dont think we need move con_size_in_bytes to a register first.
  1.1620 +      assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first");
  1.1621 +      addi(end, obj, con_size_in_bytes);
  1.1622 +    } else {
  1.1623 +      add(end, obj, var_size_in_bytes);
  1.1624 +    }
  1.1625 +    // if end < obj then we wrapped around => object too long => slow case
  1.1626 +    sltu(AT, end, obj);
  1.1627 +    bne_far(AT, R0, slow_case);
  1.1628 +    delayed()->nop();
  1.1629 +
  1.1630 +    li(AT, (long)Universe::heap()->end_addr());
  1.1631 +    sltu(AT, AT, end);
  1.1632 +    bne_far(AT, R0, slow_case);
  1.1633 +    delayed()->nop();
  1.1634 +    // Compare obj with the top addr, and if still equal, store the new top addr in
  1.1635 +    // end at the address of the top addr pointer. Sets ZF if was equal, and clears
  1.1636 +    // it otherwise. Use lock prefix for atomicity on MPs.
  1.1637 +    //if (os::is_MP()) {
  1.1638 +    //  sync();
  1.1639 +    //}
  1.1640 +
  1.1641 +    // if someone beat us on the allocation, try again, otherwise continue
  1.1642 +    cmpxchg(end, heap_top, obj);
  1.1643 +    beq_far(AT, R0, retry);    //by yyq
  1.1644 +    delayed()->nop();
  1.1645 +
  1.1646 +  }
  1.1647 +}
  1.1648 +
  1.1649 +// C2 doesn't invoke this one.
  1.1650 +void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) {
  1.1651 +  Register top = T0;
  1.1652 +  Register t1  = T1;
  1.1653 +/* Jin: tlab_refill() is called in
  1.1654 +
  1.1655 +     [c1_Runtime1_mips.cpp] Runtime1::generate_code_for(new_type_array_id);
  1.1656 +
  1.1657 +  In generate_code_for(), T2 has been assigned as a register(length), which is used
  1.1658 + after calling tlab_refill();
  1.1659 +  Therefore, tlab_refill() should not use T2.
  1.1660 +
  1.1661 + Source:
  1.1662 +
  1.1663 +Exception in thread "main" java.lang.ArrayIndexOutOfBoundsException
  1.1664 +        at java.lang.System.arraycopy(Native Method)
  1.1665 +        at java.util.Arrays.copyOf(Arrays.java:2799)  <-- alloc_array
  1.1666 +        at sun.misc.Resource.getBytes(Resource.java:117)
  1.1667 +        at java.net.URLClassLoader.defineClass(URLClassLoader.java:273)
  1.1668 +        at java.net.URLClassLoader.findClass(URLClassLoader.java:205)
  1.1669 +        at java.lang.ClassLoader.loadClass(ClassLoader.java:321)
  1.1670 + */
  1.1671 +  Register t2  = T9;
  1.1672 +  Register t3  = T3;
  1.1673 +  Register thread_reg = T8;
  1.1674 +  Label do_refill, discard_tlab;
  1.1675 +  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq
  1.1676 +    // No allocation in the shared eden.
  1.1677 +    b(slow_case);
  1.1678 +    delayed()->nop();
  1.1679 +  }
  1.1680 +
  1.1681 +  get_thread(thread_reg);
  1.1682 +
  1.1683 +  ld_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
  1.1684 +  ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
  1.1685 +
  1.1686 +  // calculate amount of free space
  1.1687 +  sub(t1, t1, top);
  1.1688 +  shr(t1, LogHeapWordSize);
  1.1689 +
  1.1690 +  // Retain tlab and allocate object in shared space if
  1.1691 +  // the amount free in the tlab is too large to discard.
  1.1692 +  ld_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
  1.1693 +  slt(AT, t2, t1);
  1.1694 +  beq(AT, R0, discard_tlab);
  1.1695 +  delayed()->nop();
  1.1696 +
  1.1697 +  // Retain
  1.1698 +
  1.1699 +#ifndef _LP64
  1.1700 +  move(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
  1.1701 +#else
  1.1702 +  li(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
  1.1703 +#endif
  1.1704 +  add(t2, t2, AT);
  1.1705 +  st_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
  1.1706 +
  1.1707 +  if (TLABStats) {
  1.1708 +    // increment number of slow_allocations
  1.1709 +    lw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
  1.1710 +    addiu(AT, AT, 1);
  1.1711 +    sw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
  1.1712 +  }
  1.1713 +  b(try_eden);
  1.1714 +  delayed()->nop();
  1.1715 +
  1.1716 +  bind(discard_tlab);
  1.1717 +  if (TLABStats) {
  1.1718 +    // increment number of refills
  1.1719 +    lw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
  1.1720 +    addi(AT, AT, 1);
  1.1721 +    sw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
  1.1722 +    // accumulate wastage -- t1 is amount free in tlab
  1.1723 +    lw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
  1.1724 +    add(AT, AT, t1);
  1.1725 +    sw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
  1.1726 +  }
  1.1727 +
  1.1728 +  // if tlab is currently allocated (top or end != null) then
  1.1729 +  // fill [top, end + alignment_reserve) with array object
  1.1730 +  beq(top, R0, do_refill);
  1.1731 +  delayed()->nop();
  1.1732 +
  1.1733 +  // set up the mark word
  1.1734 +  li(AT, (long)markOopDesc::prototype()->copy_set_hash(0x2));
  1.1735 +  st_ptr(AT, top, oopDesc::mark_offset_in_bytes());
  1.1736 +
  1.1737 +  // set the length to the remaining space
  1.1738 +  addi(t1, t1, - typeArrayOopDesc::header_size(T_INT));
  1.1739 +  addi(t1, t1, ThreadLocalAllocBuffer::alignment_reserve());
  1.1740 +  shl(t1, log2_intptr(HeapWordSize/sizeof(jint)));
  1.1741 +  sw(t1, top, arrayOopDesc::length_offset_in_bytes());
  1.1742 +
  1.1743 +  // set klass to intArrayKlass
  1.1744 +#ifndef _LP64
  1.1745 +  lui(AT, split_high((intptr_t)Universe::intArrayKlassObj_addr()));
  1.1746 +  lw(t1, AT, split_low((intptr_t)Universe::intArrayKlassObj_addr()));
  1.1747 +#else
  1.1748 +  li(AT, (intptr_t)Universe::intArrayKlassObj_addr());
  1.1749 +  ld_ptr(t1, AT, 0);
  1.1750 +#endif
  1.1751 +  //st_ptr(t1, top, oopDesc::klass_offset_in_bytes());
  1.1752 +  store_klass(top, t1);
  1.1753 +
  1.1754 +  // refill the tlab with an eden allocation
  1.1755 +  bind(do_refill);
  1.1756 +  ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
  1.1757 +  shl(t1, LogHeapWordSize);
  1.1758 +  // add object_size ??
  1.1759 +  eden_allocate(top, t1, 0, t2, t3, slow_case);
  1.1760 +
  1.1761 +  // Check that t1 was preserved in eden_allocate.
  1.1762 +#ifdef ASSERT
  1.1763 +  if (UseTLAB) {
  1.1764 +    Label ok;
  1.1765 +    assert_different_registers(thread_reg, t1);
  1.1766 +    ld_ptr(AT, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
  1.1767 +    shl(AT, LogHeapWordSize);
  1.1768 +    beq(AT, t1, ok);
  1.1769 +    delayed()->nop();
  1.1770 +    stop("assert(t1 != tlab size)");
  1.1771 +    should_not_reach_here();
  1.1772 +
  1.1773 +    bind(ok);
  1.1774 +  }
  1.1775 +#endif
  1.1776 +  st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_start_offset()));
  1.1777 +  st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
  1.1778 +  add(top, top, t1);
  1.1779 +  addi(top, top, - ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
  1.1780 +  st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
  1.1781 +  verify_tlab(t1, t2);
  1.1782 +  b(retry);
  1.1783 +  delayed()->nop();
  1.1784 +}
  1.1785 +
  1.1786 +static const double     pi_4 =  0.7853981633974483;
  1.1787 +
  1.1788 +// the x86 version is to clumsy, i dont think we need that fuss. maybe i'm wrong, FIXME
  1.1789 +// must get argument(a double) in F12/F13
  1.1790 +//void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) {
  1.1791 +//We need to preseve the register which maybe modified during the Call @Jerome
  1.1792 +void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
  1.1793 +//save all modified register here
  1.1794 +//  if (preserve_cpu_regs) {
  1.1795 +//  }
  1.1796 +//FIXME, in the disassembly of tirgfunc, only used V0,V1,T9, SP,RA,so we ony save V0,V1,T9
  1.1797 +  pushad();
  1.1798 +//we should preserve the stack space before we call
  1.1799 +  addi(SP, SP, -wordSize * 2);
  1.1800 +        switch (trig){
  1.1801 +    case 's' :
  1.1802 +                  call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type );
  1.1803 +      delayed()->nop();
  1.1804 +      break;
  1.1805 +    case 'c':
  1.1806 +      call( CAST_FROM_FN_PTR(address, SharedRuntime::dcos), relocInfo::runtime_call_type );
  1.1807 +      delayed()->nop();
  1.1808 +      break;
  1.1809 +    case 't':
  1.1810 +      call( CAST_FROM_FN_PTR(address, SharedRuntime::dtan), relocInfo::runtime_call_type );
  1.1811 +      delayed()->nop();
  1.1812 +      break;
  1.1813 +    default:assert (false, "bad intrinsic");
  1.1814 +    break;
  1.1815 +
  1.1816 +  }
  1.1817 +
  1.1818 +  addi(SP, SP, wordSize * 2);
  1.1819 +  popad();
  1.1820 +//  if (preserve_cpu_regs) {
  1.1821 +//  }
  1.1822 +}
  1.1823 +
  1.1824 +#ifdef _LP64
  1.1825 +void MacroAssembler::li(Register rd, long imm) {
  1.1826 +  if (imm <= max_jint && imm >= min_jint) {
  1.1827 +    li32(rd, (int)imm);
  1.1828 +  } else if (julong(imm) <= 0xFFFFFFFF) {
  1.1829 +    assert_not_delayed();
  1.1830 +    // lui sign-extends, so we can't use that.
  1.1831 +    ori(rd, R0, julong(imm) >> 16);
  1.1832 +    dsll(rd, rd, 16);
  1.1833 +    ori(rd, rd, split_low(imm));
  1.1834 +  //aoqi_test
  1.1835 +  //} else if ((imm > 0) && ((imm >> 48) == 0)) {
  1.1836 +  } else if ((imm > 0) && is_simm16(imm >> 32)) {
  1.1837 +    /* A 48-bit address */
  1.1838 +    li48(rd, imm);
  1.1839 +  } else {
  1.1840 +    li64(rd, imm);
  1.1841 +  }
  1.1842 +}
  1.1843 +#else
  1.1844 +void MacroAssembler::li(Register rd, long imm) {
  1.1845 +  li32(rd, (int)imm);
  1.1846 +}
  1.1847 +#endif
  1.1848 +
  1.1849 +void MacroAssembler::li32(Register reg, int imm) {
  1.1850 +  if (is_simm16(imm)) {
  1.1851 +    /* Jin: for imm < 0, we should use addi instead of addiu.
  1.1852 +     *
  1.1853 +     *  java.lang.StringCoding$StringDecoder.decode(jobject, jint, jint)
  1.1854 +     *
  1.1855 +     *  78 move [int:-1|I] [a0|I]
  1.1856 +     *    : daddi a0, zero, 0xffffffff  (correct)
  1.1857 +     *    : daddiu a0, zero, 0xffffffff (incorrect)
  1.1858 +     */
  1.1859 +    if (imm >= 0)
  1.1860 +      addiu(reg, R0, imm);
  1.1861 +    else
  1.1862 +      addi(reg, R0, imm);
  1.1863 +  } else {
  1.1864 +    lui(reg, split_low(imm >> 16));
  1.1865 +    if (split_low(imm))
  1.1866 +      ori(reg, reg, split_low(imm));
  1.1867 +  }
  1.1868 +}
  1.1869 +
  1.1870 +#ifdef _LP64
  1.1871 +void MacroAssembler::set64(Register d, jlong value) {
  1.1872 +  assert_not_delayed();
  1.1873 +
  1.1874 +  int hi = (int)(value >> 32);
  1.1875 +  int lo = (int)(value & ~0);
  1.1876 +
  1.1877 +  if (value == lo) {  // 32-bit integer
  1.1878 +    if (is_simm16(value)) {
  1.1879 +      daddiu(d, R0, value);
  1.1880 +    } else {
  1.1881 +      lui(d, split_low(value >> 16));
  1.1882 +      if (split_low(value)) {
  1.1883 +        ori(d, d, split_low(value));
  1.1884 +      }
  1.1885 +    }
  1.1886 +  } else if (hi == 0) {  // hardware zero-extends to upper 32
  1.1887 +      ori(d, R0, julong(value) >> 16);
  1.1888 +      dsll(d, d, 16);
  1.1889 +      if (split_low(value)) {
  1.1890 +        ori(d, d, split_low(value));
  1.1891 +      }
  1.1892 +  } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
  1.1893 +    // 4 insts
  1.1894 +    li48(d, value);
  1.1895 +  } else {  // li64
  1.1896 +    // 6 insts
  1.1897 +    li64(d, value);
  1.1898 +  }
  1.1899 +}
  1.1900 +
  1.1901 +
  1.1902 +int MacroAssembler::insts_for_set64(jlong value) {
  1.1903 +  int hi = (int)(value >> 32);
  1.1904 +  int lo = (int)(value & ~0);
  1.1905 +
  1.1906 +  int count = 0;
  1.1907 +
  1.1908 +  if (value == lo) {  // 32-bit integer
  1.1909 +    if (is_simm16(value)) {
  1.1910 +      //daddiu(d, R0, value);
  1.1911 +      count++;
  1.1912 +    } else {
  1.1913 +      //lui(d, split_low(value >> 16));
  1.1914 +      count++;
  1.1915 +      if (split_low(value)) {
  1.1916 +        //ori(d, d, split_low(value));
  1.1917 +        count++;
  1.1918 +      }
  1.1919 +    }
  1.1920 +  } else if (hi == 0) {  // hardware zero-extends to upper 32
  1.1921 +      //ori(d, R0, julong(value) >> 16);
  1.1922 +      //dsll(d, d, 16);
  1.1923 +      count += 2;
  1.1924 +      if (split_low(value)) {
  1.1925 +        //ori(d, d, split_low(value));
  1.1926 +        count++;
  1.1927 +      }
  1.1928 +  } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
  1.1929 +    // 4 insts
  1.1930 +    //li48(d, value);
  1.1931 +    count += 4;
  1.1932 +  } else {  // li64
  1.1933 +    // 6 insts
  1.1934 +    //li64(d, value);
  1.1935 +    count += 6;
  1.1936 +  }
  1.1937 +
  1.1938 +  return count;
  1.1939 +}
  1.1940 +
  1.1941 +void MacroAssembler::patchable_set48(Register d, jlong value) {
  1.1942 +  assert_not_delayed();
  1.1943 +
  1.1944 +  int hi = (int)(value >> 32);
  1.1945 +  int lo = (int)(value & ~0);
  1.1946 +
  1.1947 +  int count = 0;
  1.1948 +
  1.1949 +  if (value == lo) {  // 32-bit integer
  1.1950 +    if (is_simm16(value)) {
  1.1951 +      daddiu(d, R0, value);
  1.1952 +      count += 1;
  1.1953 +    } else {
  1.1954 +      lui(d, split_low(value >> 16));
  1.1955 +      count += 1;
  1.1956 +      if (split_low(value)) {
  1.1957 +        ori(d, d, split_low(value));
  1.1958 +        count += 1;
  1.1959 +      }
  1.1960 +    }
  1.1961 +  } else if (hi == 0) {  // hardware zero-extends to upper 32
  1.1962 +      ori(d, R0, julong(value) >> 16);
  1.1963 +      dsll(d, d, 16);
  1.1964 +      count += 2;
  1.1965 +      if (split_low(value)) {
  1.1966 +        ori(d, d, split_low(value));
  1.1967 +        count += 1;
  1.1968 +      }
  1.1969 +  } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
  1.1970 +    // 4 insts
  1.1971 +    li48(d, value);
  1.1972 +    count += 4;
  1.1973 +  } else {  // li64
  1.1974 +    tty->print_cr("value = 0x%x", value);
  1.1975 +    guarantee(false, "Not supported yet !");
  1.1976 +  }
  1.1977 +
  1.1978 +  for (count; count < 4; count++) {
  1.1979 +    nop();
  1.1980 +  }
  1.1981 +}
  1.1982 +
  1.1983 +void MacroAssembler::patchable_set32(Register d, jlong value) {
  1.1984 +  assert_not_delayed();
  1.1985 +
  1.1986 +  int hi = (int)(value >> 32);
  1.1987 +  int lo = (int)(value & ~0);
  1.1988 +
  1.1989 +  int count = 0;
  1.1990 +
  1.1991 +  if (value == lo) {  // 32-bit integer
  1.1992 +    if (is_simm16(value)) {
  1.1993 +      daddiu(d, R0, value);
  1.1994 +      count += 1;
  1.1995 +    } else {
  1.1996 +      lui(d, split_low(value >> 16));
  1.1997 +      count += 1;
  1.1998 +      if (split_low(value)) {
  1.1999 +        ori(d, d, split_low(value));
  1.2000 +        count += 1;
  1.2001 +      }
  1.2002 +    }
  1.2003 +  } else if (hi == 0) {  // hardware zero-extends to upper 32
  1.2004 +      ori(d, R0, julong(value) >> 16);
  1.2005 +      dsll(d, d, 16);
  1.2006 +      count += 2;
  1.2007 +      if (split_low(value)) {
  1.2008 +        ori(d, d, split_low(value));
  1.2009 +        count += 1;
  1.2010 +      }
  1.2011 +  } else {
  1.2012 +    tty->print_cr("value = 0x%x", value);
  1.2013 +    guarantee(false, "Not supported yet !");
  1.2014 +  }
  1.2015 +
  1.2016 +  for (count; count < 3; count++) {
  1.2017 +    nop();
  1.2018 +  }
  1.2019 +}
  1.2020 +
  1.2021 +void MacroAssembler::patchable_call32(Register d, jlong value) {
  1.2022 +  assert_not_delayed();
  1.2023 +
  1.2024 +  int hi = (int)(value >> 32);
  1.2025 +  int lo = (int)(value & ~0);
  1.2026 +
  1.2027 +  int count = 0;
  1.2028 +
  1.2029 +  if (value == lo) {  // 32-bit integer
  1.2030 +    if (is_simm16(value)) {
  1.2031 +      daddiu(d, R0, value);
  1.2032 +      count += 1;
  1.2033 +    } else {
  1.2034 +      lui(d, split_low(value >> 16));
  1.2035 +      count += 1;
  1.2036 +      if (split_low(value)) {
  1.2037 +        ori(d, d, split_low(value));
  1.2038 +        count += 1;
  1.2039 +      }
  1.2040 +    }
  1.2041 +  } else {
  1.2042 +    tty->print_cr("value = 0x%x", value);
  1.2043 +    guarantee(false, "Not supported yet !");
  1.2044 +  }
  1.2045 +
  1.2046 +  for (count; count < 2; count++) {
  1.2047 +    nop();
  1.2048 +  }
  1.2049 +}
  1.2050 +
  1.2051 +void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
  1.2052 +  assert(UseCompressedClassPointers, "should only be used for compressed header");
  1.2053 +  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
  1.2054 +
  1.2055 +  int klass_index = oop_recorder()->find_index(k);
  1.2056 +  RelocationHolder rspec = metadata_Relocation::spec(klass_index);
  1.2057 +  long narrowKlass = (long)Klass::encode_klass(k);
  1.2058 +
  1.2059 +  relocate(rspec, Assembler::narrow_oop_operand);
  1.2060 +  patchable_set48(dst, narrowKlass);
  1.2061 +}
  1.2062 +
  1.2063 +
  1.2064 +void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
  1.2065 +  assert(UseCompressedOops, "should only be used for compressed header");
  1.2066 +  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
  1.2067 +
  1.2068 +  int oop_index = oop_recorder()->find_index(obj);
  1.2069 +  RelocationHolder rspec = oop_Relocation::spec(oop_index);
  1.2070 +
  1.2071 +  relocate(rspec, Assembler::narrow_oop_operand);
  1.2072 +  patchable_set48(dst, oop_index);
  1.2073 +}
  1.2074 +
  1.2075 +void MacroAssembler::li64(Register rd, long imm) {
  1.2076 +  assert_not_delayed();
  1.2077 +  lui(rd, imm >> 48);
  1.2078 +  ori(rd, rd, split_low(imm >> 32));
  1.2079 +  dsll(rd, rd, 16);
  1.2080 +  ori(rd, rd, split_low(imm >> 16));
  1.2081 +  dsll(rd, rd, 16);
  1.2082 +  ori(rd, rd, split_low(imm));
  1.2083 +}
  1.2084 +
  1.2085 +void MacroAssembler::li48(Register rd, long imm) {
  1.2086 +  assert_not_delayed();
  1.2087 +  assert(is_simm16(imm >> 32), "Not a 48-bit address");
  1.2088 +  lui(rd, imm >> 32);
  1.2089 +  ori(rd, rd, split_low(imm >> 16));
  1.2090 +  dsll(rd, rd, 16);
  1.2091 +  ori(rd, rd, split_low(imm));
  1.2092 +}
  1.2093 +#endif
  1.2094 +// NOTE: i dont push eax as i486.
  1.2095 +// the x86 save eax for it use eax as the jump register
  1.2096 +void MacroAssembler::verify_oop(Register reg, const char* s) {
  1.2097 +  /*
  1.2098 +     if (!VerifyOops) return;
  1.2099 +
  1.2100 +  // Pass register number to verify_oop_subroutine
  1.2101 +  char* b = new char[strlen(s) + 50];
  1.2102 +  sprintf(b, "verify_oop: %s: %s", reg->name(), s);
  1.2103 +  push(rax);                          // save rax,
  1.2104 +  push(reg);                          // pass register argument
  1.2105 +  ExternalAddress buffer((address) b);
  1.2106 +  // avoid using pushptr, as it modifies scratch registers
  1.2107 +  // and our contract is not to modify anything
  1.2108 +  movptr(rax, buffer.addr());
  1.2109 +  push(rax);
  1.2110 +  // call indirectly to solve generation ordering problem
  1.2111 +  movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
  1.2112 +  call(rax);
  1.2113 +   */
  1.2114 +  if (!VerifyOops) return;
  1.2115 +  const char * b = NULL;
  1.2116 +  stringStream ss;
  1.2117 +  ss.print("verify_oop: %s: %s", reg->name(), s);
  1.2118 +  b = code_string(ss.as_string());
  1.2119 +#ifdef _LP64
  1.2120 +  pushad();
  1.2121 +  move(A1, reg);
  1.2122 +  li(A0, (long)b);
  1.2123 +  li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
  1.2124 +  ld(T9, AT, 0);
  1.2125 +  jalr(T9);
  1.2126 +  delayed()->nop();
  1.2127 +  popad();
  1.2128 +#else
  1.2129 +  // Pass register number to verify_oop_subroutine
  1.2130 +  sw(T0, SP, - wordSize);
  1.2131 +  sw(T1, SP, - 2*wordSize);
  1.2132 +  sw(RA, SP, - 3*wordSize);
  1.2133 +  sw(A0, SP ,- 4*wordSize);
  1.2134 +  sw(A1, SP ,- 5*wordSize);
  1.2135 +  sw(AT, SP ,- 6*wordSize);
  1.2136 +  sw(T9, SP ,- 7*wordSize);
  1.2137 +  addiu(SP, SP, - 7 * wordSize);
  1.2138 +  move(A1, reg);
  1.2139 +  li(A0, (long)b);
  1.2140 +  // call indirectly to solve generation ordering problem
  1.2141 +  li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
  1.2142 +  lw(T9, AT, 0);
  1.2143 +  jalr(T9);
  1.2144 +  delayed()->nop();
  1.2145 +  lw(T0, SP, 6* wordSize);
  1.2146 +  lw(T1, SP, 5* wordSize);
  1.2147 +  lw(RA, SP, 4* wordSize);
  1.2148 +  lw(A0, SP, 3* wordSize);
  1.2149 +  lw(A1, SP, 2* wordSize);
  1.2150 +  lw(AT, SP, 1* wordSize);
  1.2151 +  lw(T9, SP, 0* wordSize);
  1.2152 +  addiu(SP, SP, 7 * wordSize);
  1.2153 +#endif
  1.2154 +}
  1.2155 +
  1.2156 +
  1.2157 +void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
  1.2158 +  if (!VerifyOops) {
  1.2159 +    nop();
  1.2160 +    return;
  1.2161 +  }
  1.2162 +  // Pass register number to verify_oop_subroutine
  1.2163 +  const char * b = NULL;
  1.2164 +  stringStream ss;
  1.2165 +  ss.print("verify_oop_addr: %s",  s);
  1.2166 +  b = code_string(ss.as_string());
  1.2167 +
  1.2168 +  st_ptr(T0, SP, - wordSize);
  1.2169 +  st_ptr(T1, SP, - 2*wordSize);
  1.2170 +  st_ptr(RA, SP, - 3*wordSize);
  1.2171 +  st_ptr(A0, SP, - 4*wordSize);
  1.2172 +  st_ptr(A1, SP, - 5*wordSize);
  1.2173 +  st_ptr(AT, SP, - 6*wordSize);
  1.2174 +  st_ptr(T9, SP, - 7*wordSize);
  1.2175 +  ld_ptr(A1, addr);   // addr may use SP, so load from it before change SP
  1.2176 +  addiu(SP, SP, - 7 * wordSize);
  1.2177 +
  1.2178 +  li(A0, (long)b);
  1.2179 +  // call indirectly to solve generation ordering problem
  1.2180 +  li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
  1.2181 +  ld_ptr(T9, AT, 0);
  1.2182 +  jalr(T9);
  1.2183 +  delayed()->nop();
  1.2184 +  ld_ptr(T0, SP, 6* wordSize);
  1.2185 +  ld_ptr(T1, SP, 5* wordSize);
  1.2186 +  ld_ptr(RA, SP, 4* wordSize);
  1.2187 +  ld_ptr(A0, SP, 3* wordSize);
  1.2188 +  ld_ptr(A1, SP, 2* wordSize);
  1.2189 +  ld_ptr(AT, SP, 1* wordSize);
  1.2190 +  ld_ptr(T9, SP, 0* wordSize);
  1.2191 +  addiu(SP, SP, 7 * wordSize);
  1.2192 +}
  1.2193 +
  1.2194 +// used registers :  T0, T1
  1.2195 +void MacroAssembler::verify_oop_subroutine() {
  1.2196 +  // RA: ra
  1.2197 +  // A0: char* error message
  1.2198 +  // A1: oop   object to verify
  1.2199 +
  1.2200 +  Label exit, error;
  1.2201 +  // increment counter
  1.2202 +  li(T0, (long)StubRoutines::verify_oop_count_addr());
  1.2203 +  lw(AT, T0, 0);
  1.2204 +#ifdef _LP64
  1.2205 +  daddi(AT, AT, 1);
  1.2206 +#else
  1.2207 +  addi(AT, AT, 1);
  1.2208 +#endif
  1.2209 +  sw(AT, T0, 0);
  1.2210 +
  1.2211 +  // make sure object is 'reasonable'
  1.2212 +  beq(A1, R0, exit);         // if obj is NULL it is ok
  1.2213 +  delayed()->nop();
  1.2214 +
  1.2215 +  // Check if the oop is in the right area of memory
  1.2216 +  //const int oop_mask = Universe::verify_oop_mask();
  1.2217 +  //const int oop_bits = Universe::verify_oop_bits();
  1.2218 +  const uintptr_t oop_mask = Universe::verify_oop_mask();
  1.2219 +  const uintptr_t oop_bits = Universe::verify_oop_bits();
  1.2220 +  li(AT, oop_mask);
  1.2221 +  andr(T0, A1, AT);
  1.2222 +  li(AT, oop_bits);
  1.2223 +  bne(T0, AT, error);
  1.2224 +  delayed()->nop();
  1.2225 +
  1.2226 +  // make sure klass is 'reasonable'
  1.2227 +  //add for compressedoops
  1.2228 +  reinit_heapbase();
  1.2229 +  //add for compressedoops
  1.2230 +  load_klass(T0, A1);
  1.2231 +  beq(T0, R0, error);                        // if klass is NULL it is broken
  1.2232 +  delayed()->nop();
  1.2233 +  #if 0
  1.2234 +  //FIXME:wuhui.
  1.2235 +  // Check if the klass is in the right area of memory
  1.2236 +  //const int klass_mask = Universe::verify_klass_mask();
  1.2237 +  //const int klass_bits = Universe::verify_klass_bits();
  1.2238 +  const uintptr_t klass_mask = Universe::verify_klass_mask();
  1.2239 +  const uintptr_t klass_bits = Universe::verify_klass_bits();
  1.2240 +
  1.2241 +  li(AT, klass_mask);
  1.2242 +  andr(T1, T0, AT);
  1.2243 +  li(AT, klass_bits);
  1.2244 +  bne(T1, AT, error);
  1.2245 +  delayed()->nop();
  1.2246 +  // make sure klass' klass is 'reasonable'
  1.2247 +  //add for compressedoops
  1.2248 +  load_klass(T0, T0);
  1.2249 +  beq(T0, R0, error);  // if klass' klass is NULL it is broken
  1.2250 +  delayed()->nop();
  1.2251 +
  1.2252 +  li(AT, klass_mask);
  1.2253 +  andr(T1, T0, AT);
  1.2254 +  li(AT, klass_bits);
  1.2255 +  bne(T1, AT, error);
  1.2256 +  delayed()->nop();     // if klass not in right area of memory it is broken too.
  1.2257 +#endif
  1.2258 +  // return if everything seems ok
  1.2259 +  bind(exit);
  1.2260 +
  1.2261 +  jr(RA);
  1.2262 +  delayed()->nop();
  1.2263 +
  1.2264 +  // handle errors
  1.2265 +  bind(error);
  1.2266 +  pushad();
  1.2267 +#ifndef _LP64
  1.2268 +  addi(SP, SP, (-1) * wordSize);
  1.2269 +#endif
  1.2270 +  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
  1.2271 +  delayed()->nop();
  1.2272 +#ifndef _LP64
  1.2273 +  addiu(SP, SP, 1 * wordSize);
  1.2274 +#endif
  1.2275 +  popad();
  1.2276 +  jr(RA);
  1.2277 +  delayed()->nop();
  1.2278 +}
  1.2279 +
  1.2280 +void MacroAssembler::verify_tlab(Register t1, Register t2) {
  1.2281 +#ifdef ASSERT
  1.2282 +  assert_different_registers(t1, t2, AT);
  1.2283 +  if (UseTLAB && VerifyOops) {
  1.2284 +    Label next, ok;
  1.2285 +
  1.2286 +    get_thread(t1);
  1.2287 +
  1.2288 +    ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset()));
  1.2289 +    ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset()));
  1.2290 +    sltu(AT, t2, AT);
  1.2291 +    beq(AT, R0, next);
  1.2292 +    delayed()->nop();
  1.2293 +
  1.2294 +    stop("assert(top >= start)");
  1.2295 +
  1.2296 +    bind(next);
  1.2297 +    ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset()));
  1.2298 +    sltu(AT, AT, t2);
  1.2299 +    beq(AT, R0, ok);
  1.2300 +    delayed()->nop();
  1.2301 +
  1.2302 +    stop("assert(top <= end)");
  1.2303 +
  1.2304 +    bind(ok);
  1.2305 +
  1.2306 +  }
  1.2307 +#endif
  1.2308 +}
  1.2309 + RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
  1.2310 +                                                       Register tmp,
  1.2311 +                                                       int offset) {
  1.2312 +   intptr_t value = *delayed_value_addr;
  1.2313 +   if (value != 0)
  1.2314 +   return RegisterOrConstant(value + offset);
  1.2315 +   AddressLiteral a(delayed_value_addr);
  1.2316 +   // load indirectly to solve generation ordering problem
  1.2317 +   //movptr(tmp, ExternalAddress((address) delayed_value_addr));
  1.2318 +   //ld(tmp, a);
  1.2319 +   if (offset != 0)
  1.2320 +     daddi(tmp,tmp, offset);
  1.2321 +
  1.2322 +   return RegisterOrConstant(tmp);
  1.2323 + }
  1.2324 +
  1.2325 +void MacroAssembler::hswap(Register reg) {
  1.2326 +  //short
  1.2327 +  //andi(reg, reg, 0xffff);
  1.2328 +  srl(AT, reg, 8);
  1.2329 +  sll(reg, reg, 24);
  1.2330 +  sra(reg, reg, 16);
  1.2331 +  orr(reg, reg, AT);
  1.2332 +}
  1.2333 +
  1.2334 +void MacroAssembler::huswap(Register reg) {
  1.2335 +#ifdef _LP64
  1.2336 +  dsrl(AT, reg, 8);
  1.2337 +  dsll(reg, reg, 24);
  1.2338 +  dsrl(reg, reg, 16);
  1.2339 +  orr(reg, reg, AT);
  1.2340 +  andi(reg, reg, 0xffff);
  1.2341 +#else
  1.2342 +  //andi(reg, reg, 0xffff);
  1.2343 +  srl(AT, reg, 8);
  1.2344 +  sll(reg, reg, 24);
  1.2345 +  srl(reg, reg, 16);
  1.2346 +  orr(reg, reg, AT);
  1.2347 +#endif
  1.2348 +}
  1.2349 +
  1.2350 +// something funny to do this will only one more register AT
  1.2351 +// 32 bits
  1.2352 +void MacroAssembler::swap(Register reg) {
  1.2353 +  srl(AT, reg, 8);
  1.2354 +  sll(reg, reg, 24);
  1.2355 +  orr(reg, reg, AT);
  1.2356 +  //reg : 4 1 2 3
  1.2357 +  srl(AT, AT, 16);
  1.2358 +  xorr(AT, AT, reg);
  1.2359 +  andi(AT, AT, 0xff);
  1.2360 +  //AT : 0 0 0 1^3);
  1.2361 +  xorr(reg, reg, AT);
  1.2362 +  //reg : 4 1 2 1
  1.2363 +  sll(AT, AT, 16);
  1.2364 +  xorr(reg, reg, AT);
  1.2365 +  //reg : 4 3 2 1
  1.2366 +}
  1.2367 +
  1.2368 +#ifdef _LP64
  1.2369 +
  1.2370 +/* do 32-bit CAS using MIPS64 lld/scd
  1.2371 +
  1.2372 +  Jin: cas_int should only compare 32-bits of the memory value.
  1.2373 +       However, lld/scd will do 64-bit operation, which violates the intention of cas_int.
  1.2374 +       To simulate a 32-bit atomic operation, the value loaded with LLD should be split into
  1.2375 +       tow halves, and only the low-32 bits is compared. If equals, the low-32 bits of newval,
  1.2376 +       plus the high-32 bits or memory value, are stored togethor with SCD.
  1.2377 +
  1.2378 +Example:
  1.2379 +
  1.2380 +      double d = 3.1415926;
  1.2381 +      System.err.println("hello" + d);
  1.2382 +
  1.2383 +  sun.misc.FloatingDecimal$1.<init>()
  1.2384 +   |
  1.2385 +   `- java.util.concurrent.atomic.AtomicInteger::compareAndSet()
  1.2386 +
  1.2387 +  38 cas_int [a7a7|J] [a0|I] [a6|I]
  1.2388 +// a0: 0xffffffffe8ea9f63 pc: 0x55647f3354
  1.2389 +// a6: 0x4ab325aa
  1.2390 +
  1.2391 +again:
  1.2392 +   0x00000055647f3c5c: lld at, 0x0(a7)                          ; 64-bit load, "0xe8ea9f63"
  1.2393 +
  1.2394 +   0x00000055647f3c60: sll t9, at, 0                            ; t9: low-32 bits (sign extended)
  1.2395 +   0x00000055647f3c64: dsrl32 t8, at, 0                         ; t8: high-32 bits
  1.2396 +   0x00000055647f3c68: dsll32 t8, t8, 0
  1.2397 +   0x00000055647f3c6c: bne t9, a0, 0x00000055647f3c9c           ; goto nequal
  1.2398 +   0x00000055647f3c70: sll zero, zero, 0
  1.2399 +
  1.2400 +   0x00000055647f3c74: ori v1, zero, 0xffffffff                 ; v1: low-32 bits of newval (sign unextended)
  1.2401 +   0x00000055647f3c78: dsll v1, v1, 16                          ; v1 = a6 & 0xFFFFFFFF;
  1.2402 +   0x00000055647f3c7c: ori v1, v1, 0xffffffff
  1.2403 +   0x00000055647f3c80: and v1, a6, v1
  1.2404 +   0x00000055647f3c84: or at, t8, v1
  1.2405 +   0x00000055647f3c88: scd at, 0x0(a7)
  1.2406 +   0x00000055647f3c8c: beq at, zero, 0x00000055647f3c5c         ; goto again
  1.2407 +   0x00000055647f3c90: sll zero, zero, 0
  1.2408 +   0x00000055647f3c94: beq zero, zero, 0x00000055647f45ac       ; goto done
  1.2409 +   0x00000055647f3c98: sll zero, zero, 0
  1.2410 +nequal:
  1.2411 +   0x00000055647f45a4: dadd a0, t9, zero
  1.2412 +   0x00000055647f45a8: dadd at, zero, zero
  1.2413 +done:
  1.2414 +*/
  1.2415 +
  1.2416 +void MacroAssembler::cmpxchg32(Register x_reg, Address dest, Register c_reg) {
  1.2417 +  /* 2012/11/11 Jin: MIPS64 can use ll/sc for 32-bit atomic memory access */
  1.2418 +  Label done, again, nequal;
  1.2419 +
  1.2420 +  bind(again);
  1.2421 +
  1.2422 +  if(!Use3A2000) sync();
  1.2423 +  ll(AT, dest);
  1.2424 +  bne(AT, c_reg, nequal);
  1.2425 +  delayed()->nop();
  1.2426 +
  1.2427 +  move(AT, x_reg);
  1.2428 +  sc(AT, dest);
  1.2429 +  beq(AT, R0, again);
  1.2430 +  delayed()->nop();
  1.2431 +  b(done);
  1.2432 +  delayed()->nop();
  1.2433 +
  1.2434 +  // not xchged
  1.2435 +  bind(nequal);
  1.2436 +  sync();
  1.2437 +  move(c_reg, AT);
  1.2438 +  move(AT, R0);
  1.2439 +
  1.2440 +  bind(done);
  1.2441 +}
  1.2442 +#endif  // cmpxchg32
  1.2443 +
  1.2444 +void MacroAssembler::cmpxchg(Register x_reg, Address dest, Register c_reg) {
  1.2445 +  Label done, again, nequal;
  1.2446 +
  1.2447 +  bind(again);
  1.2448 +#ifdef _LP64
  1.2449 +  if(!Use3A2000) sync();
  1.2450 +  lld(AT, dest);
  1.2451 +#else
  1.2452 +  if(!Use3A2000) sync();
  1.2453 +  ll(AT, dest);
  1.2454 +#endif
  1.2455 +  bne(AT, c_reg, nequal);
  1.2456 +  delayed()->nop();
  1.2457 +
  1.2458 +  move(AT, x_reg);
  1.2459 +#ifdef _LP64
  1.2460 +  scd(AT, dest);
  1.2461 +#else
  1.2462 +  sc(AT, dest);
  1.2463 +#endif
  1.2464 +  beq(AT, R0, again);
  1.2465 +  delayed()->nop();
  1.2466 +  b(done);
  1.2467 +  delayed()->nop();
  1.2468 +
  1.2469 +  // not xchged
  1.2470 +  bind(nequal);
  1.2471 +  sync();
  1.2472 +  move(c_reg, AT);
  1.2473 +  move(AT, R0);
  1.2474 +
  1.2475 +  bind(done);
  1.2476 +}
  1.2477 +
  1.2478 +void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) {
  1.2479 +  Label done, again, nequal;
  1.2480 +
  1.2481 +  Register x_reg = x_regLo;
  1.2482 +  dsll32(x_regHi, x_regHi, 0);
  1.2483 +  dsll32(x_regLo, x_regLo, 0);
  1.2484 +  dsrl32(x_regLo, x_regLo, 0);
  1.2485 +  orr(x_reg, x_regLo, x_regHi);
  1.2486 +
  1.2487 +  Register c_reg = c_regLo;
  1.2488 +  dsll32(c_regHi, c_regHi, 0);
  1.2489 +  dsll32(c_regLo, c_regLo, 0);
  1.2490 +  dsrl32(c_regLo, c_regLo, 0);
  1.2491 +  orr(c_reg, c_regLo, c_regHi);
  1.2492 +
  1.2493 +  bind(again);
  1.2494 +
  1.2495 +        if(!Use3A2000) sync();
  1.2496 +  lld(AT, dest);
  1.2497 +  bne(AT, c_reg, nequal);
  1.2498 +  delayed()->nop();
  1.2499 +
  1.2500 +  //move(AT, x_reg);
  1.2501 +  dadd(AT, x_reg, R0);
  1.2502 +  scd(AT, dest);
  1.2503 +  beq(AT, R0, again);
  1.2504 +  delayed()->nop();
  1.2505 +  b(done);
  1.2506 +  delayed()->nop();
  1.2507 +
  1.2508 +  // not xchged
  1.2509 +  bind(nequal);
  1.2510 +  sync();
  1.2511 +  //move(c_reg, AT);
  1.2512 +  //move(AT, R0);
  1.2513 +  dadd(c_reg, AT, R0);
  1.2514 +  dadd(AT, R0, R0);
  1.2515 +  bind(done);
  1.2516 +}
  1.2517 +
  1.2518 +// be sure the three register is different
  1.2519 +void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
  1.2520 +  assert_different_registers(tmp, fs, ft);
  1.2521 +  div_s(tmp, fs, ft);
  1.2522 +  trunc_l_s(tmp, tmp);
  1.2523 +  cvt_s_l(tmp, tmp);
  1.2524 +  mul_s(tmp, tmp, ft);
  1.2525 +  sub_s(fd, fs, tmp);
  1.2526 +}
  1.2527 +
  1.2528 +// be sure the three register is different
  1.2529 +void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
  1.2530 +  assert_different_registers(tmp, fs, ft);
  1.2531 +  div_d(tmp, fs, ft);
  1.2532 +  trunc_l_d(tmp, tmp);
  1.2533 +  cvt_d_l(tmp, tmp);
  1.2534 +  mul_d(tmp, tmp, ft);
  1.2535 +  sub_d(fd, fs, tmp);
  1.2536 +}
  1.2537 +
  1.2538 +// Fast_Lock and Fast_Unlock used by C2
  1.2539 +
  1.2540 +// Because the transitions from emitted code to the runtime
  1.2541 +// monitorenter/exit helper stubs are so slow it's critical that
  1.2542 +// we inline both the stack-locking fast-path and the inflated fast path.
  1.2543 +//
  1.2544 +// See also: cmpFastLock and cmpFastUnlock.
  1.2545 +//
  1.2546 +// What follows is a specialized inline transliteration of the code
  1.2547 +// in slow_enter() and slow_exit().  If we're concerned about I$ bloat
  1.2548 +// another option would be to emit TrySlowEnter and TrySlowExit methods
  1.2549 +// at startup-time.  These methods would accept arguments as
  1.2550 +// (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
  1.2551 +// indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
  1.2552 +// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
  1.2553 +// In practice, however, the # of lock sites is bounded and is usually small.
  1.2554 +// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
  1.2555 +// if the processor uses simple bimodal branch predictors keyed by EIP
  1.2556 +// Since the helper routines would be called from multiple synchronization
  1.2557 +// sites.
  1.2558 +//
  1.2559 +// An even better approach would be write "MonitorEnter()" and "MonitorExit()"
  1.2560 +// in java - using j.u.c and unsafe - and just bind the lock and unlock sites
  1.2561 +// to those specialized methods.  That'd give us a mostly platform-independent
  1.2562 +// implementation that the JITs could optimize and inline at their pleasure.
  1.2563 +// Done correctly, the only time we'd need to cross to native could would be
  1.2564 +// to park() or unpark() threads.  We'd also need a few more unsafe operators
  1.2565 +// to (a) prevent compiler-JIT reordering of non-volatile accesses, and
  1.2566 +// (b) explicit barriers or fence operations.
  1.2567 +//
  1.2568 +// TODO:
  1.2569 +//
  1.2570 +// *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
  1.2571 +//    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
  1.2572 +//    Given TLAB allocation, Self is usually manifested in a register, so passing it into
  1.2573 +//    the lock operators would typically be faster than reifying Self.
  1.2574 +//
  1.2575 +// *  Ideally I'd define the primitives as:
  1.2576 +//       fast_lock   (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
  1.2577 +//       fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
  1.2578 +//    Unfortunately ADLC bugs prevent us from expressing the ideal form.
  1.2579 +//    Instead, we're stuck with a rather awkward and brittle register assignments below.
  1.2580 +//    Furthermore the register assignments are overconstrained, possibly resulting in
  1.2581 +//    sub-optimal code near the synchronization site.
  1.2582 +//
  1.2583 +// *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
  1.2584 +//    Alternately, use a better sp-proximity test.
  1.2585 +//
  1.2586 +// *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
  1.2587 +//    Either one is sufficient to uniquely identify a thread.
  1.2588 +//    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
  1.2589 +//
  1.2590 +// *  Intrinsify notify() and notifyAll() for the common cases where the
  1.2591 +//    object is locked by the calling thread but the waitlist is empty.
  1.2592 +//    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
  1.2593 +//
  1.2594 +// *  use jccb and jmpb instead of jcc and jmp to improve code density.
  1.2595 +//    But beware of excessive branch density on AMD Opterons.
  1.2596 +//
  1.2597 +// *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
  1.2598 +//    or failure of the fast-path.  If the fast-path fails then we pass
  1.2599 +//    control to the slow-path, typically in C.  In Fast_Lock and
  1.2600 +//    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
  1.2601 +//    will emit a conditional branch immediately after the node.
  1.2602 +//    So we have branches to branches and lots of ICC.ZF games.
  1.2603 +//    Instead, it might be better to have C2 pass a "FailureLabel"
  1.2604 +//    into Fast_Lock and Fast_Unlock.  In the case of success, control
  1.2605 +//    will drop through the node.  ICC.ZF is undefined at exit.
  1.2606 +//    In the case of failure, the node will branch directly to the
  1.2607 +//    FailureLabel
  1.2608 +
  1.2609 +
  1.2610 +// obj: object to lock
  1.2611 +// box: on-stack box address (displaced header location) - KILLED
  1.2612 +// rax,: tmp -- KILLED
  1.2613 +// scr: tmp -- KILLED
  1.2614 +void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg) {
  1.2615 +
  1.2616 +  // Ensure the register assignents are disjoint
  1.2617 +  guarantee (objReg != boxReg, "") ;
  1.2618 +  guarantee (objReg != tmpReg, "") ;
  1.2619 +  guarantee (objReg != scrReg, "") ;
  1.2620 +  guarantee (boxReg != tmpReg, "") ;
  1.2621 +  guarantee (boxReg != scrReg, "") ;
  1.2622 +
  1.2623 +
  1.2624 +  block_comment("FastLock");
  1.2625 +  /*
  1.2626 +     move(AT, 0x0);
  1.2627 +     return;
  1.2628 +     */
  1.2629 +  if (PrintBiasedLockingStatistics) {
  1.2630 +    push(tmpReg);
  1.2631 +    atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, AT, tmpReg);
  1.2632 +    pop(tmpReg);
  1.2633 +  }
  1.2634 +
  1.2635 +  if (EmitSync & 1) {
  1.2636 +    move(AT, 0x0);
  1.2637 +    return;
  1.2638 +  } else
  1.2639 +    if (EmitSync & 2) {
  1.2640 +      Label DONE_LABEL ;
  1.2641 +      if (UseBiasedLocking) {
  1.2642 +        // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
  1.2643 +        biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
  1.2644 +      }
  1.2645 +
  1.2646 +      ld(tmpReg, Address(objReg, 0)) ;          // fetch markword
  1.2647 +      ori(tmpReg, tmpReg, 0x1);
  1.2648 +      sd(tmpReg, Address(boxReg, 0));           // Anticipate successful CAS
  1.2649 +
  1.2650 +      cmpxchg(boxReg, Address(objReg, 0), tmpReg);          // Updates tmpReg
  1.2651 +      bne(AT, R0, DONE_LABEL);
  1.2652 +      delayed()->nop();
  1.2653 +
  1.2654 +      // Recursive locking
  1.2655 +      dsubu(tmpReg, tmpReg, SP);
  1.2656 +      li(AT, (7 - os::vm_page_size() ));
  1.2657 +      andr(tmpReg, tmpReg, AT);
  1.2658 +      sd(tmpReg, Address(boxReg, 0));
  1.2659 +      bind(DONE_LABEL) ;
  1.2660 +    } else {
  1.2661 +      // Possible cases that we'll encounter in fast_lock
  1.2662 +      // ------------------------------------------------
  1.2663 +      // * Inflated
  1.2664 +      //    -- unlocked
  1.2665 +      //    -- Locked
  1.2666 +      //       = by self
  1.2667 +      //       = by other
  1.2668 +      // * biased
  1.2669 +      //    -- by Self
  1.2670 +      //    -- by other
  1.2671 +      // * neutral
  1.2672 +      // * stack-locked
  1.2673 +      //    -- by self
  1.2674 +      //       = sp-proximity test hits
  1.2675 +      //       = sp-proximity test generates false-negative
  1.2676 +      //    -- by other
  1.2677 +      //
  1.2678 +
  1.2679 +      Label IsInflated, DONE_LABEL, PopDone ;
  1.2680 +
  1.2681 +      // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
  1.2682 +      // order to reduce the number of conditional branches in the most common cases.
  1.2683 +      // Beware -- there's a subtle invariant that fetch of the markword
  1.2684 +      // at [FETCH], below, will never observe a biased encoding (*101b).
  1.2685 +      // If this invariant is not held we risk exclusion (safety) failure.
  1.2686 +      if (UseBiasedLocking && !UseOptoBiasInlining) {
  1.2687 +        biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
  1.2688 +      }
  1.2689 +
  1.2690 +      ld(tmpReg, Address(objReg, 0)) ;         //Fetch the markword of the object.
  1.2691 +      andi(AT, tmpReg, markOopDesc::monitor_value);
  1.2692 +      bne(AT, R0, IsInflated);                      // inflated vs stack-locked|neutral|bias
  1.2693 +      delayed()->nop();
  1.2694 +
  1.2695 +      // Attempt stack-locking ...
  1.2696 +      ori (tmpReg, tmpReg, markOopDesc::unlocked_value);
  1.2697 +      sd(tmpReg, Address(boxReg, 0));          // Anticipate successful CAS
  1.2698 +      //if (os::is_MP()) {
  1.2699 +      //  sync();
  1.2700 +      //}
  1.2701 +
  1.2702 +      cmpxchg(boxReg, Address(objReg, 0), tmpReg);           // Updates tmpReg
  1.2703 +      //AT == 1: unlocked
  1.2704 +
  1.2705 +      if (PrintBiasedLockingStatistics) {
  1.2706 +        Label L;
  1.2707 +        beq(AT, R0, L);
  1.2708 +        delayed()->nop();
  1.2709 +        push(T0);
  1.2710 +        push(T1);
  1.2711 +        atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
  1.2712 +        pop(T1);
  1.2713 +        pop(T0);
  1.2714 +        bind(L);
  1.2715 +      }
  1.2716 +      bne(AT, R0, DONE_LABEL);
  1.2717 +      delayed()->nop();
  1.2718 +
  1.2719 +      // Recursive locking
  1.2720 +      // The object is stack-locked: markword contains stack pointer to BasicLock.
  1.2721 +      // Locked by current thread if difference with current SP is less than one page.
  1.2722 +      dsubu(tmpReg, tmpReg, SP);
  1.2723 +      li(AT, 7 - os::vm_page_size() );
  1.2724 +      andr(tmpReg, tmpReg, AT);
  1.2725 +      sd(tmpReg, Address(boxReg, 0));
  1.2726 +      if (PrintBiasedLockingStatistics) {
  1.2727 +        Label L;
  1.2728 +        // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++
  1.2729 +        bne(tmpReg, R0, L);
  1.2730 +        delayed()->nop();
  1.2731 +        push(T0);
  1.2732 +        push(T1);
  1.2733 +        atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
  1.2734 +        pop(T1);
  1.2735 +        pop(T0);
  1.2736 +        bind(L);
  1.2737 +      }
  1.2738 +      sltiu(AT, tmpReg, 1); /* AT = (tmpReg == 0) ? 1 : 0 */
  1.2739 +
  1.2740 +      b(DONE_LABEL) ;
  1.2741 +      delayed()->nop();
  1.2742 +
  1.2743 +      bind(IsInflated) ;
  1.2744 +      // The object's monitor m is unlocked iff m->owner == NULL,
  1.2745 +      // otherwise m->owner may contain a thread or a stack address.
  1.2746 +
  1.2747 +      // TODO: someday avoid the ST-before-CAS penalty by
  1.2748 +      // relocating (deferring) the following ST.
  1.2749 +      // We should also think about trying a CAS without having
  1.2750 +      // fetched _owner.  If the CAS is successful we may
  1.2751 +      // avoid an RTO->RTS upgrade on the $line.
  1.2752 +      // Without cast to int32_t a movptr will destroy r10 which is typically obj
  1.2753 +      li(AT, (int32_t)intptr_t(markOopDesc::unused_mark()));
  1.2754 +      sd(AT, Address(boxReg, 0));
  1.2755 +
  1.2756 +      move(boxReg, tmpReg) ;
  1.2757 +      ld(tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
  1.2758 +      // if (m->owner != 0) => AT = 0, goto slow path.
  1.2759 +      move(AT, R0);
  1.2760 +      bne(tmpReg, R0, DONE_LABEL);
  1.2761 +      delayed()->nop();
  1.2762 +
  1.2763 +#ifndef OPT_THREAD
  1.2764 +      get_thread (TREG) ;
  1.2765 +#endif
  1.2766 +      // It's inflated and appears unlocked
  1.2767 +      //if (os::is_MP()) {
  1.2768 +      //  sync();
  1.2769 +      //}
  1.2770 +      cmpxchg(TREG, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), tmpReg) ;
  1.2771 +      // Intentional fall-through into DONE_LABEL ...
  1.2772 +
  1.2773 +
  1.2774 +      // DONE_LABEL is a hot target - we'd really like to place it at the
  1.2775 +      // start of cache line by padding with NOPs.
  1.2776 +      // See the AMD and Intel software optimization manuals for the
  1.2777 +      // most efficient "long" NOP encodings.
  1.2778 +      // Unfortunately none of our alignment mechanisms suffice.
  1.2779 +      bind(DONE_LABEL);
  1.2780 +
  1.2781 +      // At DONE_LABEL the AT is set as follows ...
  1.2782 +      // Fast_Unlock uses the same protocol.
  1.2783 +      // AT == 1 -> Success
  1.2784 +      // AT == 0 -> Failure - force control through the slow-path
  1.2785 +
  1.2786 +      // Avoid branch-to-branch on AMD processors
  1.2787 +      // This appears to be superstition.
  1.2788 +      if (EmitSync & 32) nop() ;
  1.2789 +
  1.2790 +    }
  1.2791 +}
  1.2792 +
  1.2793 +// obj: object to unlock
  1.2794 +// box: box address (displaced header location), killed.  Must be EAX.
  1.2795 +// rbx,: killed tmp; cannot be obj nor box.
  1.2796 +//
  1.2797 +// Some commentary on balanced locking:
  1.2798 +//
  1.2799 +// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
  1.2800 +// Methods that don't have provably balanced locking are forced to run in the
  1.2801 +// interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
  1.2802 +// The interpreter provides two properties:
  1.2803 +// I1:  At return-time the interpreter automatically and quietly unlocks any
  1.2804 +//      objects acquired the current activation (frame).  Recall that the
  1.2805 +//      interpreter maintains an on-stack list of locks currently held by
  1.2806 +//      a frame.
  1.2807 +// I2:  If a method attempts to unlock an object that is not held by the
  1.2808 +//      the frame the interpreter throws IMSX.
  1.2809 +//
  1.2810 +// Lets say A(), which has provably balanced locking, acquires O and then calls B().
  1.2811 +// B() doesn't have provably balanced locking so it runs in the interpreter.
  1.2812 +// Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
  1.2813 +// is still locked by A().
  1.2814 +//
  1.2815 +// The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
  1.2816 +// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
  1.2817 +// should not be unlocked by "normal" java-level locking and vice-versa.  The specification
  1.2818 +// doesn't specify what will occur if a program engages in such mixed-mode locking, however.
  1.2819 +
  1.2820 +void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) {
  1.2821 +
  1.2822 +  guarantee (objReg != boxReg, "") ;
  1.2823 +  guarantee (objReg != tmpReg, "") ;
  1.2824 +  guarantee (boxReg != tmpReg, "") ;
  1.2825 +
  1.2826 +
  1.2827 +
  1.2828 +  block_comment("FastUnlock");
  1.2829 +
  1.2830 +
  1.2831 +  if (EmitSync & 4) {
  1.2832 +    // Disable - inhibit all inlining.  Force control through the slow-path
  1.2833 +    move(AT, 0x0);
  1.2834 +    return;
  1.2835 +  } else
  1.2836 +    if (EmitSync & 8) {
  1.2837 +      Label DONE_LABEL ;
  1.2838 +      if (UseBiasedLocking) {
  1.2839 +        biased_locking_exit(objReg, tmpReg, DONE_LABEL);
  1.2840 +      }
  1.2841 +      // classic stack-locking code ...
  1.2842 +      ld(tmpReg, Address(boxReg, 0)) ;
  1.2843 +      beq(tmpReg, R0, DONE_LABEL) ;
  1.2844 +      move(AT, 0x1);  // delay slot
  1.2845 +
  1.2846 +      cmpxchg(tmpReg, Address(objReg, 0), boxReg);          // Uses EAX which is box
  1.2847 +      bind(DONE_LABEL);
  1.2848 +    } else {
  1.2849 +      Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
  1.2850 +
  1.2851 +      // Critically, the biased locking test must have precedence over
  1.2852 +      // and appear before the (box->dhw == 0) recursive stack-lock test.
  1.2853 +      if (UseBiasedLocking && !UseOptoBiasInlining) {
  1.2854 +        biased_locking_exit(objReg, tmpReg, DONE_LABEL);
  1.2855 +      }
  1.2856 +
  1.2857 +      ld(AT, Address(boxReg, 0)) ;            // Examine the displaced header
  1.2858 +      beq(AT, R0, DONE_LABEL) ;      // 0 indicates recursive stack-lock
  1.2859 +      delayed()->daddiu(AT, R0, 0x1);
  1.2860 +
  1.2861 +      ld(tmpReg, Address(objReg, 0)) ;       // Examine the object's markword
  1.2862 +      andi(AT, tmpReg, markOopDesc::monitor_value) ;                     // Inflated?
  1.2863 +      beq(AT, R0, Stacked) ;                     // Inflated?
  1.2864 +      delayed()->nop();
  1.2865 +
  1.2866 +      bind(Inflated) ;
  1.2867 +      // It's inflated.
  1.2868 +      // Despite our balanced locking property we still check that m->_owner == Self
  1.2869 +      // as java routines or native JNI code called by this thread might
  1.2870 +      // have released the lock.
  1.2871 +      // Refer to the comments in synchronizer.cpp for how we might encode extra
  1.2872 +      // state in _succ so we can avoid fetching EntryList|cxq.
  1.2873 +      //
  1.2874 +      // I'd like to add more cases in fast_lock() and fast_unlock() --
  1.2875 +      // such as recursive enter and exit -- but we have to be wary of
  1.2876 +      // I$ bloat, T$ effects and BP$ effects.
  1.2877 +      //
  1.2878 +      // If there's no contention try a 1-0 exit.  That is, exit without
  1.2879 +      // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
  1.2880 +      // we detect and recover from the race that the 1-0 exit admits.
  1.2881 +      //
  1.2882 +      // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
  1.2883 +      // before it STs null into _owner, releasing the lock.  Updates
  1.2884 +      // to data protected by the critical section must be visible before
  1.2885 +      // we drop the lock (and thus before any other thread could acquire
  1.2886 +      // the lock and observe the fields protected by the lock).
  1.2887 +      // IA32's memory-model is SPO, so STs are ordered with respect to
  1.2888 +      // each other and there's no need for an explicit barrier (fence).
  1.2889 +      // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
  1.2890 +#ifndef OPT_THREAD
  1.2891 +      get_thread (TREG) ;
  1.2892 +#endif
  1.2893 +
  1.2894 +      // It's inflated
  1.2895 +      ld(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
  1.2896 +      xorr(boxReg, boxReg, TREG);
  1.2897 +
  1.2898 +      ld(AT, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
  1.2899 +      orr(boxReg, boxReg, AT);
  1.2900 +
  1.2901 +      move(AT, R0);
  1.2902 +      bne(boxReg, R0, DONE_LABEL);
  1.2903 +      delayed()->nop();
  1.2904 +
  1.2905 +      ld(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
  1.2906 +      ld(AT, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
  1.2907 +      orr(boxReg, boxReg, AT);
  1.2908 +
  1.2909 +      move(AT, R0);
  1.2910 +      bne(boxReg, R0, DONE_LABEL);
  1.2911 +      delayed()->nop();
  1.2912 +
  1.2913 +      sync();
  1.2914 +      sd(R0, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
  1.2915 +      move(AT, 0x1);
  1.2916 +      b(DONE_LABEL);
  1.2917 +      delayed()->nop();
  1.2918 +
  1.2919 +      bind  (Stacked);
  1.2920 +      ld(tmpReg, Address(boxReg, 0)) ;
  1.2921 +      //if (os::is_MP()) { sync(); }
  1.2922 +      cmpxchg(tmpReg, Address(objReg, 0), boxReg);
  1.2923 +
  1.2924 +      if (EmitSync & 65536) {
  1.2925 +        bind (CheckSucc);
  1.2926 +      }
  1.2927 +
  1.2928 +      bind(DONE_LABEL);
  1.2929 +
  1.2930 +      // Avoid branch to branch on AMD processors
  1.2931 +      if (EmitSync & 32768) { nop() ; }
  1.2932 +    }
  1.2933 +}
  1.2934 +
  1.2935 +void MacroAssembler::align(int modulus) {
  1.2936 +  while (offset() % modulus != 0) nop();
  1.2937 +}
  1.2938 +
  1.2939 +
  1.2940 +void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
  1.2941 +  //Unimplemented();
  1.2942 +}
  1.2943 +
  1.2944 +#ifdef _LP64
  1.2945 +Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
  1.2946 +
  1.2947 +/* FIXME: Jin: In MIPS64, F0~23 are all caller-saved registers */
  1.2948 +FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13};
  1.2949 +#else
  1.2950 +Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, T4, T5, T6, T7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
  1.2951 +
  1.2952 +Register caller_saved_fpu_registers[] = {};
  1.2953 +#endif
  1.2954 +
  1.2955 +//We preserve all caller-saved register
  1.2956 +void  MacroAssembler::pushad(){
  1.2957 +  int i;
  1.2958 +
  1.2959 +  /* Fixed-point registers */
  1.2960 +  int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
  1.2961 +  daddi(SP, SP, -1 * len * wordSize);
  1.2962 +  for (i = 0; i < len; i++)
  1.2963 +  {
  1.2964 +#ifdef _LP64
  1.2965 +    sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
  1.2966 +#else
  1.2967 +    sw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
  1.2968 +#endif
  1.2969 +  }
  1.2970 +
  1.2971 +  /* Floating-point registers */
  1.2972 +  len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
  1.2973 +  daddi(SP, SP, -1 * len * wordSize);
  1.2974 +  for (i = 0; i < len; i++)
  1.2975 +  {
  1.2976 +#ifdef _LP64
  1.2977 +    sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  1.2978 +#else
  1.2979 +    swc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  1.2980 +#endif
  1.2981 +  }
  1.2982 +};
  1.2983 +
  1.2984 +void  MacroAssembler::popad(){
  1.2985 +  int i;
  1.2986 +
  1.2987 +  /* Floating-point registers */
  1.2988 +  int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
  1.2989 +  for (i = 0; i < len; i++)
  1.2990 +  {
  1.2991 +#ifdef _LP64
  1.2992 +    ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  1.2993 +#else
  1.2994 +    lwc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  1.2995 +#endif
  1.2996 +  }
  1.2997 +  daddi(SP, SP, len * wordSize);
  1.2998 +
  1.2999 +  /* Fixed-point registers */
  1.3000 +  len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
  1.3001 +  for (i = 0; i < len; i++)
  1.3002 +  {
  1.3003 +#ifdef _LP64
  1.3004 +    ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
  1.3005 +#else
  1.3006 +    lw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
  1.3007 +#endif
  1.3008 +  }
  1.3009 +  daddi(SP, SP, len * wordSize);
  1.3010 +};
  1.3011 +
  1.3012 +void MacroAssembler::push2(Register reg1, Register reg2) {
  1.3013 +#ifdef _LP64
  1.3014 +  daddi(SP, SP, -16);
  1.3015 +  sd(reg2, SP, 0);
  1.3016 +  sd(reg1, SP, 8);
  1.3017 +#else
  1.3018 +  addi(SP, SP, -8);
  1.3019 +  sw(reg2, SP, 0);
  1.3020 +  sw(reg1, SP, 4);
  1.3021 +#endif
  1.3022 +}
  1.3023 +
  1.3024 +void MacroAssembler::pop2(Register reg1, Register reg2) {
  1.3025 +#ifdef _LP64
  1.3026 +  ld(reg1, SP, 0);
  1.3027 +  ld(reg2, SP, 8);
  1.3028 +  daddi(SP, SP, 16);
  1.3029 +#else
  1.3030 +  lw(reg1, SP, 0);
  1.3031 +  lw(reg2, SP, 4);
  1.3032 +  addi(SP, SP, 8);
  1.3033 +#endif
  1.3034 +}
  1.3035 +
  1.3036 +//for UseCompressedOops Option
  1.3037 +void MacroAssembler::load_klass(Register dst, Register src) {
  1.3038 +#ifdef _LP64
  1.3039 +    if(UseCompressedClassPointers){
  1.3040 +        lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
  1.3041 +  decode_klass_not_null(dst);
  1.3042 +    } else
  1.3043 +#endif
  1.3044 +        ld(dst, src, oopDesc::klass_offset_in_bytes());
  1.3045 +}
  1.3046 +
  1.3047 +void MacroAssembler::store_klass(Register dst, Register src) {
  1.3048 +#ifdef _LP64
  1.3049 +    if(UseCompressedClassPointers){
  1.3050 +    encode_klass_not_null(src);
  1.3051 +    sw(src, dst, oopDesc::klass_offset_in_bytes());
  1.3052 +    } else {
  1.3053 +#endif
  1.3054 +    sd(src, dst, oopDesc::klass_offset_in_bytes());
  1.3055 +    }
  1.3056 +}
  1.3057 +
  1.3058 +void MacroAssembler::load_prototype_header(Register dst, Register src) {
  1.3059 +  load_klass(dst, src);
  1.3060 +  ld(dst, Address(dst, Klass::prototype_header_offset()));
  1.3061 +}
  1.3062 +
  1.3063 +#ifdef _LP64
  1.3064 +void MacroAssembler::store_klass_gap(Register dst, Register src) {
  1.3065 +  if (UseCompressedClassPointers) {
  1.3066 +    sw(src, dst, oopDesc::klass_gap_offset_in_bytes());
  1.3067 +  }
  1.3068 +}
  1.3069 +
  1.3070 +void MacroAssembler::load_heap_oop(Register dst, Address src) {
  1.3071 +    if(UseCompressedOops){
  1.3072 +  lwu(dst, src);
  1.3073 +  decode_heap_oop(dst);
  1.3074 +    } else{
  1.3075 +  ld(dst, src);
  1.3076 +    }
  1.3077 +}
  1.3078 +
  1.3079 +void MacroAssembler::store_heap_oop(Address dst, Register src){
  1.3080 +    if(UseCompressedOops){
  1.3081 +       assert(!dst.uses(src), "not enough registers");
  1.3082 +       encode_heap_oop(src);
  1.3083 +       sw(src, dst);
  1.3084 +    } else{
  1.3085 +       sd(src, dst);
  1.3086 +    }
  1.3087 +}
  1.3088 +
  1.3089 +#ifdef ASSERT
  1.3090 +void MacroAssembler::verify_heapbase(const char* msg) {
  1.3091 +  assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed");
  1.3092 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  1.3093 +}
  1.3094 +#endif
  1.3095 +
  1.3096 +
  1.3097 +// Algorithm must match oop.inline.hpp encode_heap_oop.
  1.3098 +void MacroAssembler::encode_heap_oop(Register r) {
  1.3099 +#ifdef ASSERT
  1.3100 +  verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
  1.3101 +#endif
  1.3102 +  verify_oop(r, "broken oop in encode_heap_oop");
  1.3103 +  if (Universe::narrow_oop_base() == NULL) {
  1.3104 +    if (Universe::narrow_oop_shift() != 0) {
  1.3105 +      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  1.3106 +      shr(r, LogMinObjAlignmentInBytes);
  1.3107 +    }
  1.3108 +    return;
  1.3109 +  }
  1.3110 +
  1.3111 +    movz(r, S5_heapbase, r);
  1.3112 +    dsub(r, r, S5_heapbase);
  1.3113 +    if (Universe::narrow_oop_shift() != 0) {
  1.3114 +      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  1.3115 +      shr(r, LogMinObjAlignmentInBytes);
  1.3116 +    }
  1.3117 +}
  1.3118 +
  1.3119 +void MacroAssembler::encode_heap_oop(Register dst, Register src) {
  1.3120 +#ifdef ASSERT
  1.3121 +  verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
  1.3122 +#endif
  1.3123 +  verify_oop(src, "broken oop in encode_heap_oop");
  1.3124 +  if (Universe::narrow_oop_base() == NULL) {
  1.3125 +    if (Universe::narrow_oop_shift() != 0) {
  1.3126 +      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  1.3127 +      dsrl(dst, src, LogMinObjAlignmentInBytes);
  1.3128 +    } else {
  1.3129 +      if (dst != src) move(dst, src);
  1.3130 +    }
  1.3131 +  } else {
  1.3132 +    if (dst == src) {
  1.3133 +      movz(dst, S5_heapbase, dst);
  1.3134 +      dsub(dst, dst, S5_heapbase);
  1.3135 +      if (Universe::narrow_oop_shift() != 0) {
  1.3136 +        assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  1.3137 +        shr(dst, LogMinObjAlignmentInBytes);
  1.3138 +      }
  1.3139 +    } else {
  1.3140 +      dsub(dst, src, S5_heapbase);
  1.3141 +      if (Universe::narrow_oop_shift() != 0) {
  1.3142 +        assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  1.3143 +        shr(dst, LogMinObjAlignmentInBytes);
  1.3144 +      }
  1.3145 +      movz(dst, R0, src);
  1.3146 +    }
  1.3147 +  }
  1.3148 +}
  1.3149 +
  1.3150 +void MacroAssembler::encode_heap_oop_not_null(Register r) {
  1.3151 +    assert (UseCompressedOops, "should be compressed");
  1.3152 +#ifdef ASSERT
  1.3153 +    if (CheckCompressedOops) {
  1.3154 +  Label ok;
  1.3155 +  bne(r, R0, ok);
  1.3156 +  delayed()->nop();
  1.3157 +  stop("null oop passed to encode_heap_oop_not_null");
  1.3158 +  bind(ok);
  1.3159 +    }
  1.3160 +#endif
  1.3161 +  verify_oop(r, "broken oop in encode_heap_oop_not_null");
  1.3162 +  if (Universe::narrow_oop_base() != NULL) {
  1.3163 +    dsub(r, r, S5_heapbase);
  1.3164 +  }
  1.3165 +  if (Universe::narrow_oop_shift() != 0) {
  1.3166 +    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  1.3167 +    shr(r, LogMinObjAlignmentInBytes);
  1.3168 +  }
  1.3169 +
  1.3170 +}
  1.3171 +
  1.3172 +void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
  1.3173 +    assert (UseCompressedOops, "should be compressed");
  1.3174 +#ifdef ASSERT
  1.3175 +    if (CheckCompressedOops) {
  1.3176 +  Label ok;
  1.3177 +  bne(src, R0, ok);
  1.3178 +  delayed()->nop();
  1.3179 +  stop("null oop passed to encode_heap_oop_not_null2");
  1.3180 +  bind(ok);
  1.3181 +    }
  1.3182 +#endif
  1.3183 +    verify_oop(src, "broken oop in encode_heap_oop_not_null2");
  1.3184 +
  1.3185 +    if (Universe::narrow_oop_base() != NULL) {
  1.3186 +      dsub(dst, src, S5_heapbase);
  1.3187 +        if (Universe::narrow_oop_shift() != 0) {
  1.3188 +        assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  1.3189 +        shr(dst, LogMinObjAlignmentInBytes);
  1.3190 +        }
  1.3191 +    } else {
  1.3192 +        if (Universe::narrow_oop_shift() != 0) {
  1.3193 +        assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  1.3194 +          dsrl(dst, src, LogMinObjAlignmentInBytes);
  1.3195 +        } else {
  1.3196 +          if (dst != src) move(dst, src);
  1.3197 +        }
  1.3198 +    }
  1.3199 +}
  1.3200 +
  1.3201 +void  MacroAssembler::decode_heap_oop(Register r) {
  1.3202 +#ifdef ASSERT
  1.3203 +  verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
  1.3204 +#endif
  1.3205 +  if (Universe::narrow_oop_base() == NULL) {
  1.3206 +    if (Universe::narrow_oop_shift() != 0) {
  1.3207 +      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  1.3208 +      shl(r, LogMinObjAlignmentInBytes);
  1.3209 +    }
  1.3210 +  } else {
  1.3211 +    move(AT, r);
  1.3212 +    if (Universe::narrow_oop_shift() != 0) {
  1.3213 +      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  1.3214 +      shl(r, LogMinObjAlignmentInBytes);
  1.3215 +    }
  1.3216 +    dadd(r, r, S5_heapbase);
  1.3217 +    movz(r, R0, AT);
  1.3218 +  }
  1.3219 +  verify_oop(r, "broken oop in decode_heap_oop");
  1.3220 +}
  1.3221 +
  1.3222 +void  MacroAssembler::decode_heap_oop(Register dst, Register src) {
  1.3223 +#ifdef ASSERT
  1.3224 +  verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
  1.3225 +#endif
  1.3226 +  if (Universe::narrow_oop_base() == NULL) {
  1.3227 +    if (Universe::narrow_oop_shift() != 0) {
  1.3228 +      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  1.3229 +      if (dst != src) nop(); // DON'T DELETE THIS GUY.
  1.3230 +      dsll(dst, src, LogMinObjAlignmentInBytes);
  1.3231 +    } else {
  1.3232 +      if (dst != src) move(dst, src);
  1.3233 +    }
  1.3234 +  } else {
  1.3235 +    if (dst == src) {
  1.3236 +      move(AT, dst);
  1.3237 +      if (Universe::narrow_oop_shift() != 0) {
  1.3238 +        assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  1.3239 +        shl(dst, LogMinObjAlignmentInBytes);
  1.3240 +      }
  1.3241 +      dadd(dst, dst, S5_heapbase);
  1.3242 +      movz(dst, R0, AT);
  1.3243 +    } else {
  1.3244 +      if (Universe::narrow_oop_shift() != 0) {
  1.3245 +        assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  1.3246 +        dsll(dst, src, LogMinObjAlignmentInBytes);
  1.3247 +        daddu(dst, dst, S5_heapbase);
  1.3248 +      } else {
  1.3249 +        daddu(dst, src, S5_heapbase);
  1.3250 +      }
  1.3251 +      movz(dst, R0, src);
  1.3252 +    }
  1.3253 +  }
  1.3254 +  verify_oop(dst, "broken oop in decode_heap_oop");
  1.3255 +}
  1.3256 +
  1.3257 +void  MacroAssembler::decode_heap_oop_not_null(Register r) {
  1.3258 +  // Note: it will change flags
  1.3259 +  assert (UseCompressedOops, "should only be used for compressed headers");
  1.3260 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  1.3261 +  // Cannot assert, unverified entry point counts instructions (see .ad file)
  1.3262 +  // vtableStubs also counts instructions in pd_code_size_limit.
  1.3263 +  // Also do not verify_oop as this is called by verify_oop.
  1.3264 +  if (Universe::narrow_oop_shift() != 0) {
  1.3265 +    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  1.3266 +    shl(r, LogMinObjAlignmentInBytes);
  1.3267 +    if (Universe::narrow_oop_base() != NULL) {
  1.3268 +      daddu(r, r, S5_heapbase);
  1.3269 +    }
  1.3270 +  } else {
  1.3271 +    assert (Universe::narrow_oop_base() == NULL, "sanity");
  1.3272 +  }
  1.3273 +}
  1.3274 +
  1.3275 +void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
  1.3276 +  assert (UseCompressedOops, "should only be used for compressed headers");
  1.3277 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  1.3278 +
  1.3279 +  // Cannot assert, unverified entry point counts instructions (see .ad file)
  1.3280 +  // vtableStubs also counts instructions in pd_code_size_limit.
  1.3281 +  // Also do not verify_oop as this is called by verify_oop.
  1.3282 +  //lea(dst, Address(S5_heapbase, src, Address::times_8, 0));
  1.3283 +  if (Universe::narrow_oop_shift() != 0) {
  1.3284 +    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  1.3285 +    if (LogMinObjAlignmentInBytes == Address::times_8) {
  1.3286 +      dsll(dst, src, LogMinObjAlignmentInBytes);
  1.3287 +      daddu(dst, dst, S5_heapbase);
  1.3288 +    } else {
  1.3289 +      dsll(dst, src, LogMinObjAlignmentInBytes);
  1.3290 +      if (Universe::narrow_oop_base() != NULL) {
  1.3291 +        daddu(dst, dst, S5_heapbase);
  1.3292 +      }
  1.3293 +    }
  1.3294 +  } else {
  1.3295 +    assert (Universe::narrow_oop_base() == NULL, "sanity");
  1.3296 +    if (dst != src) {
  1.3297 +      move(dst, src);
  1.3298 +    }
  1.3299 +  }
  1.3300 +}
  1.3301 +
  1.3302 +void MacroAssembler::encode_klass_not_null(Register r) {
  1.3303 +  if (Universe::narrow_klass_base() != NULL) {
  1.3304 +    assert(r != AT, "Encoding a klass in AT");
  1.3305 +    set64(AT, (int64_t)Universe::narrow_klass_base());
  1.3306 +    dsub(r, r, AT);
  1.3307 +  }
  1.3308 +  if (Universe::narrow_klass_shift() != 0) {
  1.3309 +    assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  1.3310 +    shr(r, LogKlassAlignmentInBytes);
  1.3311 +  }
  1.3312 +  // Not neccessary for MIPS at all.
  1.3313 +  //if (Universe::narrow_klass_base() != NULL) {
  1.3314 +  //  reinit_heapbase();
  1.3315 +  //}
  1.3316 +}
  1.3317 +
  1.3318 +void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
  1.3319 +  if (dst == src) {
  1.3320 +    encode_klass_not_null(src);
  1.3321 +  } else {
  1.3322 +    if (Universe::narrow_klass_base() != NULL) {
  1.3323 +      set64(dst, (int64_t)Universe::narrow_klass_base());
  1.3324 +      dsub(dst, src, dst);
  1.3325 +      if (Universe::narrow_klass_shift() != 0) {
  1.3326 +        assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  1.3327 +        shr(dst, LogKlassAlignmentInBytes);
  1.3328 +      }
  1.3329 +    } else {
  1.3330 +      if (Universe::narrow_klass_shift() != 0) {
  1.3331 +        assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  1.3332 +        dsrl(dst, src, LogKlassAlignmentInBytes);
  1.3333 +      } else {
  1.3334 +        move(dst, src);
  1.3335 +      }
  1.3336 +    }
  1.3337 +  }
  1.3338 +}
  1.3339 +
  1.3340 +// Function instr_size_for_decode_klass_not_null() counts the instructions
  1.3341 +// generated by decode_klass_not_null(register r) and reinit_heapbase(),
  1.3342 +// when (Universe::heap() != NULL).  Hence, if the instructions they
  1.3343 +// generate change, then this method needs to be updated.
  1.3344 +int MacroAssembler::instr_size_for_decode_klass_not_null() {
  1.3345 +  assert (UseCompressedClassPointers, "only for compressed klass ptrs");
  1.3346 +  if (Universe::narrow_klass_base() != NULL) {
  1.3347 +    // mov64 + addq + shlq? + mov64  (for reinit_heapbase()).
  1.3348 +    return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10);
  1.3349 +  } else {
  1.3350 +    // longest load decode klass function, mov64, leaq
  1.3351 +    return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1);
  1.3352 +  }
  1.3353 +}
  1.3354 +
  1.3355 +void  MacroAssembler::decode_klass_not_null(Register r) {
  1.3356 +  assert (UseCompressedClassPointers, "should only be used for compressed headers");
  1.3357 +  assert(r != AT, "Decoding a klass in AT");
  1.3358 +  // Cannot assert, unverified entry point counts instructions (see .ad file)
  1.3359 +  // vtableStubs also counts instructions in pd_code_size_limit.
  1.3360 +  // Also do not verify_oop as this is called by verify_oop.
  1.3361 +  if (Universe::narrow_klass_shift() != 0) {
  1.3362 +    assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  1.3363 +    shl(r, LogKlassAlignmentInBytes);
  1.3364 +  }
  1.3365 +  if (Universe::narrow_klass_base() != NULL) {
  1.3366 +    set64(AT, (int64_t)Universe::narrow_klass_base());
  1.3367 +    daddu(r, r, AT);
  1.3368 +    //Not neccessary for MIPS at all.
  1.3369 +    //reinit_heapbase();
  1.3370 +  }
  1.3371 +}
  1.3372 +
  1.3373 +void  MacroAssembler::decode_klass_not_null(Register dst, Register src) {
  1.3374 +  assert (UseCompressedClassPointers, "should only be used for compressed headers");
  1.3375 +
  1.3376 +  if (dst == src) {
  1.3377 +    decode_klass_not_null(dst);
  1.3378 +  } else {
  1.3379 +    // Cannot assert, unverified entry point counts instructions (see .ad file)
  1.3380 +    // vtableStubs also counts instructions in pd_code_size_limit.
  1.3381 +    // Also do not verify_oop as this is called by verify_oop.
  1.3382 +    set64(dst, (int64_t)Universe::narrow_klass_base());
  1.3383 +    if (Universe::narrow_klass_shift() != 0) {
  1.3384 +      assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  1.3385 +      assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
  1.3386 +      dsll(AT, src, Address::times_8);
  1.3387 +      daddu(dst, dst, AT);
  1.3388 +    } else {
  1.3389 +      daddu(dst, src, dst);
  1.3390 +    }
  1.3391 +  }
  1.3392 +}
  1.3393 +
  1.3394 +void MacroAssembler::incrementl(Register reg, int value) {
  1.3395 +  if (value == min_jint) {
  1.3396 +     move(AT, value);
  1.3397 +     LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
  1.3398 +     return;
  1.3399 +  }
  1.3400 +  if (value <  0) { decrementl(reg, -value); return; }
  1.3401 +  if (value == 0) {                        ; return; }
  1.3402 +
  1.3403 +  if(Assembler::is_simm16(value)) {
  1.3404 +     NOT_LP64(addiu(reg, reg, value));
  1.3405 +     LP64_ONLY(move(AT, value); addu32(reg, reg, AT));
  1.3406 +  } else {
  1.3407 +     move(AT, value);
  1.3408 +     LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
  1.3409 +  }
  1.3410 +}
  1.3411 +
  1.3412 +void MacroAssembler::decrementl(Register reg, int value) {
  1.3413 +  if (value == min_jint) {
  1.3414 +     move(AT, value);
  1.3415 +     LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
  1.3416 +     return;
  1.3417 +  }
  1.3418 +  if (value <  0) { incrementl(reg, -value); return; }
  1.3419 +  if (value == 0) {                        ; return; }
  1.3420 +
  1.3421 +  if(Assembler::is_simm16(value)) {
  1.3422 +     NOT_LP64(addiu(reg, reg, -value));
  1.3423 +     LP64_ONLY(move(AT, value); subu32(reg, reg, AT));
  1.3424 +  } else {
  1.3425 +     move(AT, value);
  1.3426 +     LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
  1.3427 +  }
  1.3428 +}
  1.3429 +
  1.3430 +void MacroAssembler::reinit_heapbase() {
  1.3431 +  if (UseCompressedOops || UseCompressedClassPointers) {
  1.3432 +    if (Universe::heap() != NULL) {
  1.3433 +      if (Universe::narrow_oop_base() == NULL) {
  1.3434 +        move(S5_heapbase, R0);
  1.3435 +      } else {
  1.3436 +        set64(S5_heapbase, (int64_t)Universe::narrow_ptrs_base());
  1.3437 +      }
  1.3438 +    } else {
  1.3439 +      set64(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr());
  1.3440 +      ld(S5_heapbase, S5_heapbase, 0);
  1.3441 +    }
  1.3442 +  }
  1.3443 +}
  1.3444 +#endif // _LP64
  1.3445 +
  1.3446 +void MacroAssembler::check_klass_subtype(Register sub_klass,
  1.3447 +                           Register super_klass,
  1.3448 +                           Register temp_reg,
  1.3449 +                           Label& L_success) {
  1.3450 +//implement ind   gen_subtype_check
  1.3451 +  Label L_failure;
  1.3452 +  check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_failure, NULL);
  1.3453 +  check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
  1.3454 +  bind(L_failure);
  1.3455 +}
  1.3456 +
  1.3457 +SkipIfEqual::SkipIfEqual(
  1.3458 +    MacroAssembler* masm, const bool* flag_addr, bool value) {
  1.3459 +  _masm = masm;
  1.3460 +  _masm->li(AT, (address)flag_addr);
  1.3461 +  _masm->lb(AT,AT,0);
  1.3462 +  _masm->addi(AT,AT,-value);
  1.3463 +  _masm->beq(AT,R0,_label);
  1.3464 +  _masm->delayed()->nop();
  1.3465 +}
  1.3466 +void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
  1.3467 +                                                   Register super_klass,
  1.3468 +                                                   Register temp_reg,
  1.3469 +                                                   Label* L_success,
  1.3470 +                                                   Label* L_failure,
  1.3471 +                                                   Label* L_slow_path,
  1.3472 +                                        RegisterOrConstant super_check_offset) {
  1.3473 +  assert_different_registers(sub_klass, super_klass, temp_reg);
  1.3474 +  bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
  1.3475 +  if (super_check_offset.is_register()) {
  1.3476 +    assert_different_registers(sub_klass, super_klass,
  1.3477 +                               super_check_offset.as_register());
  1.3478 +  } else if (must_load_sco) {
  1.3479 +    assert(temp_reg != noreg, "supply either a temp or a register offset");
  1.3480 +  }
  1.3481 +
  1.3482 +  Label L_fallthrough;
  1.3483 +  int label_nulls = 0;
  1.3484 +  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
  1.3485 +  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
  1.3486 +  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
  1.3487 +  assert(label_nulls <= 1, "at most one NULL in the batch");
  1.3488 +
  1.3489 +  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
  1.3490 +  int sco_offset = in_bytes(Klass::super_check_offset_offset());
  1.3491 +  // If the pointers are equal, we are done (e.g., String[] elements).
  1.3492 +  // This self-check enables sharing of secondary supertype arrays among
  1.3493 +  // non-primary types such as array-of-interface.  Otherwise, each such
  1.3494 +  // type would need its own customized SSA.
  1.3495 +  // We move this check to the front of the fast path because many
  1.3496 +  // type checks are in fact trivially successful in this manner,
  1.3497 +  // so we get a nicely predicted branch right at the start of the check.
  1.3498 +  //cmpptr(sub_klass, super_klass);
  1.3499 +  //local_jcc(Assembler::equal, *L_success);
  1.3500 +  beq(sub_klass, super_klass, *L_success);
  1.3501 +  delayed()->nop();
  1.3502 +  // Check the supertype display:
  1.3503 +  if (must_load_sco) {
  1.3504 +    // Positive movl does right thing on LP64.
  1.3505 +  lwu(temp_reg, super_klass, sco_offset);
  1.3506 +    super_check_offset = RegisterOrConstant(temp_reg);
  1.3507 +  }
  1.3508 +  dsll(AT, super_check_offset.register_or_noreg(), Address::times_1);
  1.3509 +  daddu(AT, sub_klass, AT);
  1.3510 +  ld(AT, AT, super_check_offset.constant_or_zero()*Address::times_1);
  1.3511 +
  1.3512 +  // This check has worked decisively for primary supers.
  1.3513 +  // Secondary supers are sought in the super_cache ('super_cache_addr').
  1.3514 +  // (Secondary supers are interfaces and very deeply nested subtypes.)
  1.3515 +  // This works in the same check above because of a tricky aliasing
  1.3516 +  // between the super_cache and the primary super display elements.
  1.3517 +  // (The 'super_check_addr' can address either, as the case requires.)
  1.3518 +  // Note that the cache is updated below if it does not help us find
  1.3519 +  // what we need immediately.
  1.3520 +  // So if it was a primary super, we can just fail immediately.
  1.3521 +  // Otherwise, it's the slow path for us (no success at this point).
  1.3522 +
  1.3523 +  if (super_check_offset.is_register()) {
  1.3524 +  beq(super_klass, AT, *L_success);
  1.3525 +  delayed()->nop();
  1.3526 +  addi(AT, super_check_offset.as_register(), -sc_offset);
  1.3527 +    if (L_failure == &L_fallthrough) {
  1.3528 +    beq(AT, R0, *L_slow_path);
  1.3529 +    delayed()->nop();
  1.3530 +    } else {
  1.3531 +    bne(AT, R0, *L_failure);
  1.3532 +    delayed()->nop();
  1.3533 +    b(*L_slow_path);
  1.3534 +    delayed()->nop();
  1.3535 +    }
  1.3536 +  } else if (super_check_offset.as_constant() == sc_offset) {
  1.3537 +    // Need a slow path; fast failure is impossible.
  1.3538 +    if (L_slow_path == &L_fallthrough) {
  1.3539 +    beq(super_klass, AT, *L_success);
  1.3540 +    delayed()->nop();
  1.3541 +    } else {
  1.3542 +    bne(super_klass, AT, *L_slow_path);
  1.3543 +    delayed()->nop();
  1.3544 +    b(*L_success);
  1.3545 +    delayed()->nop();
  1.3546 +    }
  1.3547 +  } else {
  1.3548 +    // No slow path; it's a fast decision.
  1.3549 +    if (L_failure == &L_fallthrough) {
  1.3550 +    beq(super_klass, AT, *L_success);
  1.3551 +    delayed()->nop();
  1.3552 +    } else {
  1.3553 +    bne(super_klass, AT, *L_failure);
  1.3554 +    delayed()->nop();
  1.3555 +    b(*L_success);
  1.3556 +    delayed()->nop();
  1.3557 +    }
  1.3558 +  }
  1.3559 +
  1.3560 +  bind(L_fallthrough);
  1.3561 +
  1.3562 +}
  1.3563 +
  1.3564 +
  1.3565 +void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
  1.3566 +                                                   Register super_klass,
  1.3567 +                                                   Register temp_reg,
  1.3568 +                                                   Register temp2_reg,
  1.3569 +                                                   Label* L_success,
  1.3570 +                                                   Label* L_failure,
  1.3571 +                                                   bool set_cond_codes) {
  1.3572 +  assert_different_registers(sub_klass, super_klass, temp_reg);
  1.3573 +  if (temp2_reg != noreg)
  1.3574 +    assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
  1.3575 +  else
  1.3576 +    temp2_reg = T9;
  1.3577 +#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
  1.3578 +
  1.3579 +  Label L_fallthrough;
  1.3580 +  int label_nulls = 0;
  1.3581 +  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
  1.3582 +  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
  1.3583 +  assert(label_nulls <= 1, "at most one NULL in the batch");
  1.3584 +
  1.3585 +  // a couple of useful fields in sub_klass:
  1.3586 +  int ss_offset = in_bytes(Klass::secondary_supers_offset());
  1.3587 +  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
  1.3588 +  Address secondary_supers_addr(sub_klass, ss_offset);
  1.3589 +  Address super_cache_addr(     sub_klass, sc_offset);
  1.3590 +
  1.3591 +  // Do a linear scan of the secondary super-klass chain.
  1.3592 +  // This code is rarely used, so simplicity is a virtue here.
  1.3593 +  // The repne_scan instruction uses fixed registers, which we must spill.
  1.3594 +  // Don't worry too much about pre-existing connections with the input regs.
  1.3595 +
  1.3596 +#if 0
  1.3597 +  assert(sub_klass != T9, "killed reg"); // killed by mov(rax, super)
  1.3598 +  assert(sub_klass != T1, "killed reg"); // killed by lea(rcx, &pst_counter)
  1.3599 +#endif
  1.3600 +
  1.3601 +  // Get super_klass value into rax (even if it was in rdi or rcx).
  1.3602 +#ifndef PRODUCT
  1.3603 +  int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
  1.3604 +  ExternalAddress pst_counter_addr((address) pst_counter);
  1.3605 +  NOT_LP64(  incrementl(pst_counter_addr) );
  1.3606 +  //LP64_ONLY( lea(rcx, pst_counter_addr) );
  1.3607 +  //LP64_ONLY( incrementl(Address(rcx, 0)) );
  1.3608 +#endif //PRODUCT
  1.3609 +
  1.3610 +  // We will consult the secondary-super array.
  1.3611 +  ld(temp_reg, secondary_supers_addr);
  1.3612 +  // Load the array length.  (Positive movl does right thing on LP64.)
  1.3613 +  lw(temp2_reg, Address(temp_reg, Array<Klass*>::length_offset_in_bytes()));
  1.3614 +  // Skip to start of data.
  1.3615 +  daddiu(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes());
  1.3616 +
  1.3617 +  // Scan RCX words at [RDI] for an occurrence of RAX.
  1.3618 +  // Set NZ/Z based on last compare.
  1.3619 +  // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does
  1.3620 +  // not change flags (only scas instruction which is repeated sets flags).
  1.3621 +  // Set Z = 0 (not equal) before 'repne' to indicate that class was not found.
  1.3622 +
  1.3623 +  /* 2013/4/3 Jin: OpenJDK8 never compresses klass pointers in secondary-super array. */
  1.3624 +  Label Loop, subtype;
  1.3625 +  bind(Loop);
  1.3626 +  beq(temp2_reg, R0, *L_failure);
  1.3627 +  delayed()->nop();
  1.3628 +  ld(AT, temp_reg, 0);
  1.3629 +  beq(AT, super_klass, subtype);
  1.3630 +  delayed()->daddi(temp_reg, temp_reg, 1 * wordSize);
  1.3631 +  b(Loop);
  1.3632 +  delayed()->daddi(temp2_reg, temp2_reg, -1);
  1.3633 +
  1.3634 +  bind(subtype);
  1.3635 +  sd(super_klass, super_cache_addr);
  1.3636 +  if (L_success != &L_fallthrough) {
  1.3637 +    b(*L_success);
  1.3638 +    delayed()->nop();
  1.3639 +  }
  1.3640 +
  1.3641 +  // Success.  Cache the super we found and proceed in triumph.
  1.3642 +#undef IS_A_TEMP
  1.3643 +
  1.3644 +  bind(L_fallthrough);
  1.3645 +}
  1.3646 +void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
  1.3647 +  ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
  1.3648 +  sd(R0, Address(java_thread, JavaThread::vm_result_offset()));
  1.3649 +  verify_oop(oop_result, "broken oop in call_VM_base");
  1.3650 +}
  1.3651 +
  1.3652 +void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
  1.3653 +  ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
  1.3654 +  sd(R0, Address(java_thread, JavaThread::vm_result_2_offset()));
  1.3655 +}
  1.3656 +
  1.3657 +Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
  1.3658 +                                         int extra_slot_offset) {
  1.3659 +  // cf. TemplateTable::prepare_invoke(), if (load_receiver).
  1.3660 +  int stackElementSize = Interpreter::stackElementSize;
  1.3661 +  int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
  1.3662 +#ifdef ASSERT
  1.3663 +  int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
  1.3664 +  assert(offset1 - offset == stackElementSize, "correct arithmetic");
  1.3665 +#endif
  1.3666 +  Register             scale_reg    = NOREG;
  1.3667 +  Address::ScaleFactor scale_factor = Address::no_scale;
  1.3668 +  if (arg_slot.is_constant()) {
  1.3669 +    offset += arg_slot.as_constant() * stackElementSize;
  1.3670 +  } else {
  1.3671 +    scale_reg    = arg_slot.as_register();
  1.3672 +    scale_factor = Address::times_8;
  1.3673 +  }
  1.3674 +  // 2014/07/31 Fu: We don't push RA on stack in prepare_invoke.
  1.3675 +  //  offset += wordSize;           // return PC is on stack
  1.3676 +  if(scale_reg==NOREG) return Address(SP, offset);
  1.3677 +  else {
  1.3678 +  dsll(scale_reg, scale_reg, scale_factor);
  1.3679 +  daddu(scale_reg, SP, scale_reg);
  1.3680 +  return Address(scale_reg, offset);
  1.3681 +  }
  1.3682 +}
  1.3683 +
  1.3684 +SkipIfEqual::~SkipIfEqual() {
  1.3685 +  _masm->bind(_label);
  1.3686 +}
  1.3687 +
  1.3688 +void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
  1.3689 +  switch (size_in_bytes) {
  1.3690 +#ifndef _LP64
  1.3691 +  case  8:
  1.3692 +    assert(dst2 != noreg, "second dest register required");
  1.3693 +    lw(dst,  src);
  1.3694 +    lw(dst2, src.plus_disp(BytesPerInt));
  1.3695 +    break;
  1.3696 +#else
  1.3697 +  case  8:  ld(dst, src); break;
  1.3698 +#endif
  1.3699 +  case  4:  lw(dst, src); break;
  1.3700 +  case  2:  is_signed ? lh(dst, src) : lhu(dst, src); break;
  1.3701 +  case  1:  is_signed ? lb( dst, src) : lbu( dst, src); break;
  1.3702 +  default:  ShouldNotReachHere();
  1.3703 +  }
  1.3704 +}
  1.3705 +
  1.3706 +void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
  1.3707 +  switch (size_in_bytes) {
  1.3708 +#ifndef _LP64
  1.3709 +  case  8:
  1.3710 +    assert(src2 != noreg, "second source register required");
  1.3711 +    sw(src, dst);
  1.3712 +    sw(src2, dst.plus_disp(BytesPerInt));
  1.3713 +    break;
  1.3714 +#else
  1.3715 +  case  8:  sd(src, dst); break;
  1.3716 +#endif
  1.3717 +  case  4:  sw(src, dst); break;
  1.3718 +  case  2:  sh(src, dst); break;
  1.3719 +  case  1:  sb(src, dst); break;
  1.3720 +  default:  ShouldNotReachHere();
  1.3721 +  }
  1.3722 +}
  1.3723 +
  1.3724 +// Look up the method for a megamorphic invokeinterface call.
  1.3725 +// The target method is determined by <intf_klass, itable_index>.
  1.3726 +// The receiver klass is in recv_klass.
  1.3727 +// On success, the result will be in method_result, and execution falls through.
  1.3728 +// On failure, execution transfers to the given label.
  1.3729 +void MacroAssembler::lookup_interface_method(Register recv_klass,
  1.3730 +                                             Register intf_klass,
  1.3731 +                                             RegisterOrConstant itable_index,
  1.3732 +                                             Register method_result,
  1.3733 +                                             Register scan_temp,
  1.3734 +                                             Label& L_no_such_interface) {
  1.3735 +  assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
  1.3736 +  assert(itable_index.is_constant() || itable_index.as_register() == method_result,
  1.3737 +         "caller must use same register for non-constant itable index as for method");
  1.3738 +
  1.3739 +  // Compute start of first itableOffsetEntry (which is at the end of the vtable)
  1.3740 +  int vtable_base = InstanceKlass::vtable_start_offset() * wordSize;
  1.3741 +  int itentry_off = itableMethodEntry::method_offset_in_bytes();
  1.3742 +  int scan_step   = itableOffsetEntry::size() * wordSize;
  1.3743 +  int vte_size    = vtableEntry::size() * wordSize;
  1.3744 +  Address::ScaleFactor times_vte_scale = Address::times_ptr;
  1.3745 +  assert(vte_size == wordSize, "else adjust times_vte_scale");
  1.3746 +
  1.3747 +  lw(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize));
  1.3748 +
  1.3749 +  // %%% Could store the aligned, prescaled offset in the klassoop.
  1.3750 +  dsll(scan_temp, scan_temp, times_vte_scale);
  1.3751 +  daddu(scan_temp, recv_klass, scan_temp);
  1.3752 +  daddiu(scan_temp, scan_temp, vtable_base);
  1.3753 +  if (HeapWordsPerLong > 1) {
  1.3754 +    // Round up to align_object_offset boundary
  1.3755 +    // see code for InstanceKlass::start_of_itable!
  1.3756 +    round_to(scan_temp, BytesPerLong);
  1.3757 +  }
  1.3758 +
  1.3759 +  // Adjust recv_klass by scaled itable_index, so we can free itable_index.
  1.3760 +  assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
  1.3761 +//  lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off));
  1.3762 +  if (itable_index.is_constant()) {
  1.3763 +    set64(AT, (int)itable_index.is_constant());
  1.3764 +    dsll(AT, AT, (int)Address::times_ptr);
  1.3765 +  } else {
  1.3766 +    dsll(AT, itable_index.as_register(), (int)Address::times_ptr);
  1.3767 +  }
  1.3768 +  daddu(AT, AT, recv_klass);
  1.3769 +  daddiu(recv_klass, AT, itentry_off);
  1.3770 +
  1.3771 +  // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {
  1.3772 +  //   if (scan->interface() == intf) {
  1.3773 +  //     result = (klass + scan->offset() + itable_index);
  1.3774 +  //   }
  1.3775 +  // }
  1.3776 +  Label search, found_method;
  1.3777 +
  1.3778 +  for (int peel = 1; peel >= 0; peel--) {
  1.3779 +    ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
  1.3780 +
  1.3781 +    if (peel) {
  1.3782 +      beq(intf_klass, method_result, found_method);
  1.3783 +      nop();
  1.3784 +    } else {
  1.3785 +      bne(intf_klass, method_result, search);
  1.3786 +      nop();
  1.3787 +      // (invert the test to fall through to found_method...)
  1.3788 +    }
  1.3789 +
  1.3790 +    if (!peel)  break;
  1.3791 +
  1.3792 +    bind(search);
  1.3793 +
  1.3794 +    // Check that the previous entry is non-null.  A null entry means that
  1.3795 +    // the receiver class doesn't implement the interface, and wasn't the
  1.3796 +    // same as when the caller was compiled.
  1.3797 +    beq(method_result, R0, L_no_such_interface);
  1.3798 +    nop();
  1.3799 +    daddiu(scan_temp, scan_temp, scan_step);
  1.3800 +  }
  1.3801 +
  1.3802 +  bind(found_method);
  1.3803 +
  1.3804 +  // Got a hit.
  1.3805 +  lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
  1.3806 +  //ld(method_result, Address(recv_klass, scan_temp, Address::times_1));
  1.3807 +  if(UseLoongsonISA) {
  1.3808 +    gsldx(method_result, recv_klass, scan_temp, 0);
  1.3809 +  } else {
  1.3810 +    daddu(AT, recv_klass, scan_temp);
  1.3811 +    ld(method_result, AT);
  1.3812 +  }
  1.3813 +}
  1.3814 +
  1.3815 +
  1.3816 +// virtual method calling
  1.3817 +void MacroAssembler::lookup_virtual_method(Register recv_klass,
  1.3818 +                                           RegisterOrConstant vtable_index,
  1.3819 +                                           Register method_result) {
  1.3820 +  Register tmp = GP;
  1.3821 +  push(tmp);
  1.3822 +
  1.3823 +  if (vtable_index.is_constant()) {
  1.3824 +    assert_different_registers(recv_klass, method_result, tmp);
  1.3825 +  } else {
  1.3826 +    assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp);
  1.3827 +  }
  1.3828 +  const int base = InstanceKlass::vtable_start_offset() * wordSize;
  1.3829 +  assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
  1.3830 +/*
  1.3831 +  Address vtable_entry_addr(recv_klass,
  1.3832 +                            vtable_index, Address::times_ptr,
  1.3833 +                            base + vtableEntry::method_offset_in_bytes());
  1.3834 +*/
  1.3835 +  if (vtable_index.is_constant()) {
  1.3836 +    set64(AT, vtable_index.as_constant());
  1.3837 +    dsll(AT, AT, (int)Address::times_ptr);
  1.3838 +  } else {
  1.3839 +    dsll(AT, vtable_index.as_register(), (int)Address::times_ptr);
  1.3840 +  }
  1.3841 +  set64(tmp, base + vtableEntry::method_offset_in_bytes());
  1.3842 +  daddu(tmp, tmp, AT);
  1.3843 +  daddu(tmp, tmp, recv_klass);
  1.3844 +  ld(method_result, tmp, 0);
  1.3845 +
  1.3846 +  pop(tmp);
  1.3847 +}

mercurial