src/cpu/x86/vm/assembler_x86.cpp

changeset 4318
cd3d6a6b95d9
parent 4317
6ab62ad83507
child 4360
c4bd2eccea46
     1.1 --- a/src/cpu/x86/vm/assembler_x86.cpp	Fri Nov 30 11:44:05 2012 -0800
     1.2 +++ b/src/cpu/x86/vm/assembler_x86.cpp	Fri Nov 30 15:23:16 2012 -0800
     1.3 @@ -23,7 +23,8 @@
     1.4   */
     1.5  
     1.6  #include "precompiled.hpp"
     1.7 -#include "assembler_x86.inline.hpp"
     1.8 +#include "asm/assembler.hpp"
     1.9 +#include "asm/assembler.inline.hpp"
    1.10  #include "gc_interface/collectedHeap.inline.hpp"
    1.11  #include "interpreter/interpreter.hpp"
    1.12  #include "memory/cardTableModRefBS.hpp"
    1.13 @@ -1167,6 +1168,10 @@
    1.14    emit_byte(0x99);
    1.15  }
    1.16  
    1.17 +void Assembler::cld() {
    1.18 +  emit_byte(0xfc);
    1.19 +}
    1.20 +
    1.21  void Assembler::cmovl(Condition cc, Register dst, Register src) {
    1.22    NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
    1.23    int encode = prefix_and_encode(dst->encoding(), src->encoding());
    1.24 @@ -1260,6 +1265,11 @@
    1.25    emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE);
    1.26  }
    1.27  
    1.28 +void Assembler::cpuid() {
    1.29 +  emit_byte(0x0F);
    1.30 +  emit_byte(0xA2);
    1.31 +}
    1.32 +
    1.33  void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
    1.34    NOT_LP64(assert(VM_Version::supports_sse2(), ""));
    1.35    emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3);
    1.36 @@ -1558,6 +1568,12 @@
    1.37    emit_operand(dst, src);
    1.38  }
    1.39  
    1.40 +void Assembler::lfence() {
    1.41 +  emit_byte(0x0F);
    1.42 +  emit_byte(0xAE);
    1.43 +  emit_byte(0xE8);
    1.44 +}
    1.45 +
    1.46  void Assembler::lock() {
    1.47    emit_byte(0xF0);
    1.48  }
    1.49 @@ -2671,6 +2687,10 @@
    1.50    emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
    1.51  }
    1.52  
    1.53 +void Assembler::std() {
    1.54 +  emit_byte(0xfd);
    1.55 +}
    1.56 +
    1.57  void Assembler::sqrtss(XMMRegister dst, Address src) {
    1.58    NOT_LP64(assert(VM_Version::supports_sse(), ""));
    1.59    emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
    1.60 @@ -2816,6 +2836,12 @@
    1.61    emit_byte(0xc0 | encode);
    1.62  }
    1.63  
    1.64 +void Assembler::xgetbv() {
    1.65 +  emit_byte(0x0F);
    1.66 +  emit_byte(0x01);
    1.67 +  emit_byte(0xD0);
    1.68 +}
    1.69 +
    1.70  void Assembler::xorl(Register dst, int32_t imm32) {
    1.71    prefix(dst);
    1.72    emit_arith(0x81, 0xF0, dst, imm32);
    1.73 @@ -5417,6043 +5443,3 @@
    1.74  }
    1.75  
    1.76  #endif // !LP64
    1.77 -
    1.78 -static Assembler::Condition reverse[] = {
    1.79 -    Assembler::noOverflow     /* overflow      = 0x0 */ ,
    1.80 -    Assembler::overflow       /* noOverflow    = 0x1 */ ,
    1.81 -    Assembler::aboveEqual     /* carrySet      = 0x2, below         = 0x2 */ ,
    1.82 -    Assembler::below          /* aboveEqual    = 0x3, carryClear    = 0x3 */ ,
    1.83 -    Assembler::notZero        /* zero          = 0x4, equal         = 0x4 */ ,
    1.84 -    Assembler::zero           /* notZero       = 0x5, notEqual      = 0x5 */ ,
    1.85 -    Assembler::above          /* belowEqual    = 0x6 */ ,
    1.86 -    Assembler::belowEqual     /* above         = 0x7 */ ,
    1.87 -    Assembler::positive       /* negative      = 0x8 */ ,
    1.88 -    Assembler::negative       /* positive      = 0x9 */ ,
    1.89 -    Assembler::noParity       /* parity        = 0xa */ ,
    1.90 -    Assembler::parity         /* noParity      = 0xb */ ,
    1.91 -    Assembler::greaterEqual   /* less          = 0xc */ ,
    1.92 -    Assembler::less           /* greaterEqual  = 0xd */ ,
    1.93 -    Assembler::greater        /* lessEqual     = 0xe */ ,
    1.94 -    Assembler::lessEqual      /* greater       = 0xf, */
    1.95 -
    1.96 -};
    1.97 -
    1.98 -
    1.99 -// Implementation of MacroAssembler
   1.100 -
   1.101 -// First all the versions that have distinct versions depending on 32/64 bit
   1.102 -// Unless the difference is trivial (1 line or so).
   1.103 -
   1.104 -#ifndef _LP64
   1.105 -
   1.106 -// 32bit versions
   1.107 -
   1.108 -Address MacroAssembler::as_Address(AddressLiteral adr) {
   1.109 -  return Address(adr.target(), adr.rspec());
   1.110 -}
   1.111 -
   1.112 -Address MacroAssembler::as_Address(ArrayAddress adr) {
   1.113 -  return Address::make_array(adr);
   1.114 -}
   1.115 -
   1.116 -int MacroAssembler::biased_locking_enter(Register lock_reg,
   1.117 -                                         Register obj_reg,
   1.118 -                                         Register swap_reg,
   1.119 -                                         Register tmp_reg,
   1.120 -                                         bool swap_reg_contains_mark,
   1.121 -                                         Label& done,
   1.122 -                                         Label* slow_case,
   1.123 -                                         BiasedLockingCounters* counters) {
   1.124 -  assert(UseBiasedLocking, "why call this otherwise?");
   1.125 -  assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg");
   1.126 -  assert_different_registers(lock_reg, obj_reg, swap_reg);
   1.127 -
   1.128 -  if (PrintBiasedLockingStatistics && counters == NULL)
   1.129 -    counters = BiasedLocking::counters();
   1.130 -
   1.131 -  bool need_tmp_reg = false;
   1.132 -  if (tmp_reg == noreg) {
   1.133 -    need_tmp_reg = true;
   1.134 -    tmp_reg = lock_reg;
   1.135 -  } else {
   1.136 -    assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
   1.137 -  }
   1.138 -  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
   1.139 -  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
   1.140 -  Address klass_addr     (obj_reg, oopDesc::klass_offset_in_bytes());
   1.141 -  Address saved_mark_addr(lock_reg, 0);
   1.142 -
   1.143 -  // Biased locking
   1.144 -  // See whether the lock is currently biased toward our thread and
   1.145 -  // whether the epoch is still valid
   1.146 -  // Note that the runtime guarantees sufficient alignment of JavaThread
   1.147 -  // pointers to allow age to be placed into low bits
   1.148 -  // First check to see whether biasing is even enabled for this object
   1.149 -  Label cas_label;
   1.150 -  int null_check_offset = -1;
   1.151 -  if (!swap_reg_contains_mark) {
   1.152 -    null_check_offset = offset();
   1.153 -    movl(swap_reg, mark_addr);
   1.154 -  }
   1.155 -  if (need_tmp_reg) {
   1.156 -    push(tmp_reg);
   1.157 -  }
   1.158 -  movl(tmp_reg, swap_reg);
   1.159 -  andl(tmp_reg, markOopDesc::biased_lock_mask_in_place);
   1.160 -  cmpl(tmp_reg, markOopDesc::biased_lock_pattern);
   1.161 -  if (need_tmp_reg) {
   1.162 -    pop(tmp_reg);
   1.163 -  }
   1.164 -  jcc(Assembler::notEqual, cas_label);
   1.165 -  // The bias pattern is present in the object's header. Need to check
   1.166 -  // whether the bias owner and the epoch are both still current.
   1.167 -  // Note that because there is no current thread register on x86 we
   1.168 -  // need to store off the mark word we read out of the object to
   1.169 -  // avoid reloading it and needing to recheck invariants below. This
   1.170 -  // store is unfortunate but it makes the overall code shorter and
   1.171 -  // simpler.
   1.172 -  movl(saved_mark_addr, swap_reg);
   1.173 -  if (need_tmp_reg) {
   1.174 -    push(tmp_reg);
   1.175 -  }
   1.176 -  get_thread(tmp_reg);
   1.177 -  xorl(swap_reg, tmp_reg);
   1.178 -  if (swap_reg_contains_mark) {
   1.179 -    null_check_offset = offset();
   1.180 -  }
   1.181 -  movl(tmp_reg, klass_addr);
   1.182 -  xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset()));
   1.183 -  andl(swap_reg, ~((int) markOopDesc::age_mask_in_place));
   1.184 -  if (need_tmp_reg) {
   1.185 -    pop(tmp_reg);
   1.186 -  }
   1.187 -  if (counters != NULL) {
   1.188 -    cond_inc32(Assembler::zero,
   1.189 -               ExternalAddress((address)counters->biased_lock_entry_count_addr()));
   1.190 -  }
   1.191 -  jcc(Assembler::equal, done);
   1.192 -
   1.193 -  Label try_revoke_bias;
   1.194 -  Label try_rebias;
   1.195 -
   1.196 -  // At this point we know that the header has the bias pattern and
   1.197 -  // that we are not the bias owner in the current epoch. We need to
   1.198 -  // figure out more details about the state of the header in order to
   1.199 -  // know what operations can be legally performed on the object's
   1.200 -  // header.
   1.201 -
   1.202 -  // If the low three bits in the xor result aren't clear, that means
   1.203 -  // the prototype header is no longer biased and we have to revoke
   1.204 -  // the bias on this object.
   1.205 -  testl(swap_reg, markOopDesc::biased_lock_mask_in_place);
   1.206 -  jcc(Assembler::notZero, try_revoke_bias);
   1.207 -
   1.208 -  // Biasing is still enabled for this data type. See whether the
   1.209 -  // epoch of the current bias is still valid, meaning that the epoch
   1.210 -  // bits of the mark word are equal to the epoch bits of the
   1.211 -  // prototype header. (Note that the prototype header's epoch bits
   1.212 -  // only change at a safepoint.) If not, attempt to rebias the object
   1.213 -  // toward the current thread. Note that we must be absolutely sure
   1.214 -  // that the current epoch is invalid in order to do this because
   1.215 -  // otherwise the manipulations it performs on the mark word are
   1.216 -  // illegal.
   1.217 -  testl(swap_reg, markOopDesc::epoch_mask_in_place);
   1.218 -  jcc(Assembler::notZero, try_rebias);
   1.219 -
   1.220 -  // The epoch of the current bias is still valid but we know nothing
   1.221 -  // about the owner; it might be set or it might be clear. Try to
   1.222 -  // acquire the bias of the object using an atomic operation. If this
   1.223 -  // fails we will go in to the runtime to revoke the object's bias.
   1.224 -  // Note that we first construct the presumed unbiased header so we
   1.225 -  // don't accidentally blow away another thread's valid bias.
   1.226 -  movl(swap_reg, saved_mark_addr);
   1.227 -  andl(swap_reg,
   1.228 -       markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
   1.229 -  if (need_tmp_reg) {
   1.230 -    push(tmp_reg);
   1.231 -  }
   1.232 -  get_thread(tmp_reg);
   1.233 -  orl(tmp_reg, swap_reg);
   1.234 -  if (os::is_MP()) {
   1.235 -    lock();
   1.236 -  }
   1.237 -  cmpxchgptr(tmp_reg, Address(obj_reg, 0));
   1.238 -  if (need_tmp_reg) {
   1.239 -    pop(tmp_reg);
   1.240 -  }
   1.241 -  // If the biasing toward our thread failed, this means that
   1.242 -  // another thread succeeded in biasing it toward itself and we
   1.243 -  // need to revoke that bias. The revocation will occur in the
   1.244 -  // interpreter runtime in the slow case.
   1.245 -  if (counters != NULL) {
   1.246 -    cond_inc32(Assembler::zero,
   1.247 -               ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr()));
   1.248 -  }
   1.249 -  if (slow_case != NULL) {
   1.250 -    jcc(Assembler::notZero, *slow_case);
   1.251 -  }
   1.252 -  jmp(done);
   1.253 -
   1.254 -  bind(try_rebias);
   1.255 -  // At this point we know the epoch has expired, meaning that the
   1.256 -  // current "bias owner", if any, is actually invalid. Under these
   1.257 -  // circumstances _only_, we are allowed to use the current header's
   1.258 -  // value as the comparison value when doing the cas to acquire the
   1.259 -  // bias in the current epoch. In other words, we allow transfer of
   1.260 -  // the bias from one thread to another directly in this situation.
   1.261 -  //
   1.262 -  // FIXME: due to a lack of registers we currently blow away the age
   1.263 -  // bits in this situation. Should attempt to preserve them.
   1.264 -  if (need_tmp_reg) {
   1.265 -    push(tmp_reg);
   1.266 -  }
   1.267 -  get_thread(tmp_reg);
   1.268 -  movl(swap_reg, klass_addr);
   1.269 -  orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset()));
   1.270 -  movl(swap_reg, saved_mark_addr);
   1.271 -  if (os::is_MP()) {
   1.272 -    lock();
   1.273 -  }
   1.274 -  cmpxchgptr(tmp_reg, Address(obj_reg, 0));
   1.275 -  if (need_tmp_reg) {
   1.276 -    pop(tmp_reg);
   1.277 -  }
   1.278 -  // If the biasing toward our thread failed, then another thread
   1.279 -  // succeeded in biasing it toward itself and we need to revoke that
   1.280 -  // bias. The revocation will occur in the runtime in the slow case.
   1.281 -  if (counters != NULL) {
   1.282 -    cond_inc32(Assembler::zero,
   1.283 -               ExternalAddress((address)counters->rebiased_lock_entry_count_addr()));
   1.284 -  }
   1.285 -  if (slow_case != NULL) {
   1.286 -    jcc(Assembler::notZero, *slow_case);
   1.287 -  }
   1.288 -  jmp(done);
   1.289 -
   1.290 -  bind(try_revoke_bias);
   1.291 -  // The prototype mark in the klass doesn't have the bias bit set any
   1.292 -  // more, indicating that objects of this data type are not supposed
   1.293 -  // to be biased any more. We are going to try to reset the mark of
   1.294 -  // this object to the prototype value and fall through to the
   1.295 -  // CAS-based locking scheme. Note that if our CAS fails, it means
   1.296 -  // that another thread raced us for the privilege of revoking the
   1.297 -  // bias of this particular object, so it's okay to continue in the
   1.298 -  // normal locking code.
   1.299 -  //
   1.300 -  // FIXME: due to a lack of registers we currently blow away the age
   1.301 -  // bits in this situation. Should attempt to preserve them.
   1.302 -  movl(swap_reg, saved_mark_addr);
   1.303 -  if (need_tmp_reg) {
   1.304 -    push(tmp_reg);
   1.305 -  }
   1.306 -  movl(tmp_reg, klass_addr);
   1.307 -  movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset()));
   1.308 -  if (os::is_MP()) {
   1.309 -    lock();
   1.310 -  }
   1.311 -  cmpxchgptr(tmp_reg, Address(obj_reg, 0));
   1.312 -  if (need_tmp_reg) {
   1.313 -    pop(tmp_reg);
   1.314 -  }
   1.315 -  // Fall through to the normal CAS-based lock, because no matter what
   1.316 -  // the result of the above CAS, some thread must have succeeded in
   1.317 -  // removing the bias bit from the object's header.
   1.318 -  if (counters != NULL) {
   1.319 -    cond_inc32(Assembler::zero,
   1.320 -               ExternalAddress((address)counters->revoked_lock_entry_count_addr()));
   1.321 -  }
   1.322 -
   1.323 -  bind(cas_label);
   1.324 -
   1.325 -  return null_check_offset;
   1.326 -}
   1.327 -void MacroAssembler::call_VM_leaf_base(address entry_point,
   1.328 -                                       int number_of_arguments) {
   1.329 -  call(RuntimeAddress(entry_point));
   1.330 -  increment(rsp, number_of_arguments * wordSize);
   1.331 -}
   1.332 -
   1.333 -void MacroAssembler::cmpklass(Address src1, Metadata* obj) {
   1.334 -  cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
   1.335 -}
   1.336 -
   1.337 -void MacroAssembler::cmpklass(Register src1, Metadata* obj) {
   1.338 -  cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
   1.339 -}
   1.340 -
   1.341 -void MacroAssembler::cmpoop(Address src1, jobject obj) {
   1.342 -  cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
   1.343 -}
   1.344 -
   1.345 -void MacroAssembler::cmpoop(Register src1, jobject obj) {
   1.346 -  cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
   1.347 -}
   1.348 -
   1.349 -void MacroAssembler::extend_sign(Register hi, Register lo) {
   1.350 -  // According to Intel Doc. AP-526, "Integer Divide", p.18.
   1.351 -  if (VM_Version::is_P6() && hi == rdx && lo == rax) {
   1.352 -    cdql();
   1.353 -  } else {
   1.354 -    movl(hi, lo);
   1.355 -    sarl(hi, 31);
   1.356 -  }
   1.357 -}
   1.358 -
   1.359 -void MacroAssembler::jC2(Register tmp, Label& L) {
   1.360 -  // set parity bit if FPU flag C2 is set (via rax)
   1.361 -  save_rax(tmp);
   1.362 -  fwait(); fnstsw_ax();
   1.363 -  sahf();
   1.364 -  restore_rax(tmp);
   1.365 -  // branch
   1.366 -  jcc(Assembler::parity, L);
   1.367 -}
   1.368 -
   1.369 -void MacroAssembler::jnC2(Register tmp, Label& L) {
   1.370 -  // set parity bit if FPU flag C2 is set (via rax)
   1.371 -  save_rax(tmp);
   1.372 -  fwait(); fnstsw_ax();
   1.373 -  sahf();
   1.374 -  restore_rax(tmp);
   1.375 -  // branch
   1.376 -  jcc(Assembler::noParity, L);
   1.377 -}
   1.378 -
   1.379 -// 32bit can do a case table jump in one instruction but we no longer allow the base
   1.380 -// to be installed in the Address class
   1.381 -void MacroAssembler::jump(ArrayAddress entry) {
   1.382 -  jmp(as_Address(entry));
   1.383 -}
   1.384 -
   1.385 -// Note: y_lo will be destroyed
   1.386 -void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
   1.387 -  // Long compare for Java (semantics as described in JVM spec.)
   1.388 -  Label high, low, done;
   1.389 -
   1.390 -  cmpl(x_hi, y_hi);
   1.391 -  jcc(Assembler::less, low);
   1.392 -  jcc(Assembler::greater, high);
   1.393 -  // x_hi is the return register
   1.394 -  xorl(x_hi, x_hi);
   1.395 -  cmpl(x_lo, y_lo);
   1.396 -  jcc(Assembler::below, low);
   1.397 -  jcc(Assembler::equal, done);
   1.398 -
   1.399 -  bind(high);
   1.400 -  xorl(x_hi, x_hi);
   1.401 -  increment(x_hi);
   1.402 -  jmp(done);
   1.403 -
   1.404 -  bind(low);
   1.405 -  xorl(x_hi, x_hi);
   1.406 -  decrementl(x_hi);
   1.407 -
   1.408 -  bind(done);
   1.409 -}
   1.410 -
   1.411 -void MacroAssembler::lea(Register dst, AddressLiteral src) {
   1.412 -    mov_literal32(dst, (int32_t)src.target(), src.rspec());
   1.413 -}
   1.414 -
   1.415 -void MacroAssembler::lea(Address dst, AddressLiteral adr) {
   1.416 -  // leal(dst, as_Address(adr));
   1.417 -  // see note in movl as to why we must use a move
   1.418 -  mov_literal32(dst, (int32_t) adr.target(), adr.rspec());
   1.419 -}
   1.420 -
   1.421 -void MacroAssembler::leave() {
   1.422 -  mov(rsp, rbp);
   1.423 -  pop(rbp);
   1.424 -}
   1.425 -
   1.426 -void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) {
   1.427 -  // Multiplication of two Java long values stored on the stack
   1.428 -  // as illustrated below. Result is in rdx:rax.
   1.429 -  //
   1.430 -  // rsp ---> [  ??  ] \               \
   1.431 -  //            ....    | y_rsp_offset  |
   1.432 -  //          [ y_lo ] /  (in bytes)    | x_rsp_offset
   1.433 -  //          [ y_hi ]                  | (in bytes)
   1.434 -  //            ....                    |
   1.435 -  //          [ x_lo ]                 /
   1.436 -  //          [ x_hi ]
   1.437 -  //            ....
   1.438 -  //
   1.439 -  // Basic idea: lo(result) = lo(x_lo * y_lo)
   1.440 -  //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
   1.441 -  Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset);
   1.442 -  Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset);
   1.443 -  Label quick;
   1.444 -  // load x_hi, y_hi and check if quick
   1.445 -  // multiplication is possible
   1.446 -  movl(rbx, x_hi);
   1.447 -  movl(rcx, y_hi);
   1.448 -  movl(rax, rbx);
   1.449 -  orl(rbx, rcx);                                 // rbx, = 0 <=> x_hi = 0 and y_hi = 0
   1.450 -  jcc(Assembler::zero, quick);                   // if rbx, = 0 do quick multiply
   1.451 -  // do full multiplication
   1.452 -  // 1st step
   1.453 -  mull(y_lo);                                    // x_hi * y_lo
   1.454 -  movl(rbx, rax);                                // save lo(x_hi * y_lo) in rbx,
   1.455 -  // 2nd step
   1.456 -  movl(rax, x_lo);
   1.457 -  mull(rcx);                                     // x_lo * y_hi
   1.458 -  addl(rbx, rax);                                // add lo(x_lo * y_hi) to rbx,
   1.459 -  // 3rd step
   1.460 -  bind(quick);                                   // note: rbx, = 0 if quick multiply!
   1.461 -  movl(rax, x_lo);
   1.462 -  mull(y_lo);                                    // x_lo * y_lo
   1.463 -  addl(rdx, rbx);                                // correct hi(x_lo * y_lo)
   1.464 -}
   1.465 -
   1.466 -void MacroAssembler::lneg(Register hi, Register lo) {
   1.467 -  negl(lo);
   1.468 -  adcl(hi, 0);
   1.469 -  negl(hi);
   1.470 -}
   1.471 -
   1.472 -void MacroAssembler::lshl(Register hi, Register lo) {
   1.473 -  // Java shift left long support (semantics as described in JVM spec., p.305)
   1.474 -  // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n))
   1.475 -  // shift value is in rcx !
   1.476 -  assert(hi != rcx, "must not use rcx");
   1.477 -  assert(lo != rcx, "must not use rcx");
   1.478 -  const Register s = rcx;                        // shift count
   1.479 -  const int      n = BitsPerWord;
   1.480 -  Label L;
   1.481 -  andl(s, 0x3f);                                 // s := s & 0x3f (s < 0x40)
   1.482 -  cmpl(s, n);                                    // if (s < n)
   1.483 -  jcc(Assembler::less, L);                       // else (s >= n)
   1.484 -  movl(hi, lo);                                  // x := x << n
   1.485 -  xorl(lo, lo);
   1.486 -  // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
   1.487 -  bind(L);                                       // s (mod n) < n
   1.488 -  shldl(hi, lo);                                 // x := x << s
   1.489 -  shll(lo);
   1.490 -}
   1.491 -
   1.492 -
   1.493 -void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) {
   1.494 -  // Java shift right long support (semantics as described in JVM spec., p.306 & p.310)
   1.495 -  // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n))
   1.496 -  assert(hi != rcx, "must not use rcx");
   1.497 -  assert(lo != rcx, "must not use rcx");
   1.498 -  const Register s = rcx;                        // shift count
   1.499 -  const int      n = BitsPerWord;
   1.500 -  Label L;
   1.501 -  andl(s, 0x3f);                                 // s := s & 0x3f (s < 0x40)
   1.502 -  cmpl(s, n);                                    // if (s < n)
   1.503 -  jcc(Assembler::less, L);                       // else (s >= n)
   1.504 -  movl(lo, hi);                                  // x := x >> n
   1.505 -  if (sign_extension) sarl(hi, 31);
   1.506 -  else                xorl(hi, hi);
   1.507 -  // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
   1.508 -  bind(L);                                       // s (mod n) < n
   1.509 -  shrdl(lo, hi);                                 // x := x >> s
   1.510 -  if (sign_extension) sarl(hi);
   1.511 -  else                shrl(hi);
   1.512 -}
   1.513 -
   1.514 -void MacroAssembler::movoop(Register dst, jobject obj) {
   1.515 -  mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
   1.516 -}
   1.517 -
   1.518 -void MacroAssembler::movoop(Address dst, jobject obj) {
   1.519 -  mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
   1.520 -}
   1.521 -
   1.522 -void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
   1.523 -  mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate());
   1.524 -}
   1.525 -
   1.526 -void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
   1.527 -  mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate());
   1.528 -}
   1.529 -
   1.530 -void MacroAssembler::movptr(Register dst, AddressLiteral src) {
   1.531 -  if (src.is_lval()) {
   1.532 -    mov_literal32(dst, (intptr_t)src.target(), src.rspec());
   1.533 -  } else {
   1.534 -    movl(dst, as_Address(src));
   1.535 -  }
   1.536 -}
   1.537 -
   1.538 -void MacroAssembler::movptr(ArrayAddress dst, Register src) {
   1.539 -  movl(as_Address(dst), src);
   1.540 -}
   1.541 -
   1.542 -void MacroAssembler::movptr(Register dst, ArrayAddress src) {
   1.543 -  movl(dst, as_Address(src));
   1.544 -}
   1.545 -
   1.546 -// src should NEVER be a real pointer. Use AddressLiteral for true pointers
   1.547 -void MacroAssembler::movptr(Address dst, intptr_t src) {
   1.548 -  movl(dst, src);
   1.549 -}
   1.550 -
   1.551 -
   1.552 -void MacroAssembler::pop_callee_saved_registers() {
   1.553 -  pop(rcx);
   1.554 -  pop(rdx);
   1.555 -  pop(rdi);
   1.556 -  pop(rsi);
   1.557 -}
   1.558 -
   1.559 -void MacroAssembler::pop_fTOS() {
   1.560 -  fld_d(Address(rsp, 0));
   1.561 -  addl(rsp, 2 * wordSize);
   1.562 -}
   1.563 -
   1.564 -void MacroAssembler::push_callee_saved_registers() {
   1.565 -  push(rsi);
   1.566 -  push(rdi);
   1.567 -  push(rdx);
   1.568 -  push(rcx);
   1.569 -}
   1.570 -
   1.571 -void MacroAssembler::push_fTOS() {
   1.572 -  subl(rsp, 2 * wordSize);
   1.573 -  fstp_d(Address(rsp, 0));
   1.574 -}
   1.575 -
   1.576 -
   1.577 -void MacroAssembler::pushoop(jobject obj) {
   1.578 -  push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate());
   1.579 -}
   1.580 -
   1.581 -void MacroAssembler::pushklass(Metadata* obj) {
   1.582 -  push_literal32((int32_t)obj, metadata_Relocation::spec_for_immediate());
   1.583 -}
   1.584 -
   1.585 -void MacroAssembler::pushptr(AddressLiteral src) {
   1.586 -  if (src.is_lval()) {
   1.587 -    push_literal32((int32_t)src.target(), src.rspec());
   1.588 -  } else {
   1.589 -    pushl(as_Address(src));
   1.590 -  }
   1.591 -}
   1.592 -
   1.593 -void MacroAssembler::set_word_if_not_zero(Register dst) {
   1.594 -  xorl(dst, dst);
   1.595 -  set_byte_if_not_zero(dst);
   1.596 -}
   1.597 -
   1.598 -static void pass_arg0(MacroAssembler* masm, Register arg) {
   1.599 -  masm->push(arg);
   1.600 -}
   1.601 -
   1.602 -static void pass_arg1(MacroAssembler* masm, Register arg) {
   1.603 -  masm->push(arg);
   1.604 -}
   1.605 -
   1.606 -static void pass_arg2(MacroAssembler* masm, Register arg) {
   1.607 -  masm->push(arg);
   1.608 -}
   1.609 -
   1.610 -static void pass_arg3(MacroAssembler* masm, Register arg) {
   1.611 -  masm->push(arg);
   1.612 -}
   1.613 -
   1.614 -#ifndef PRODUCT
   1.615 -extern "C" void findpc(intptr_t x);
   1.616 -#endif
   1.617 -
   1.618 -void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
   1.619 -  // In order to get locks to work, we need to fake a in_VM state
   1.620 -  JavaThread* thread = JavaThread::current();
   1.621 -  JavaThreadState saved_state = thread->thread_state();
   1.622 -  thread->set_thread_state(_thread_in_vm);
   1.623 -  if (ShowMessageBoxOnError) {
   1.624 -    JavaThread* thread = JavaThread::current();
   1.625 -    JavaThreadState saved_state = thread->thread_state();
   1.626 -    thread->set_thread_state(_thread_in_vm);
   1.627 -    if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
   1.628 -      ttyLocker ttyl;
   1.629 -      BytecodeCounter::print();
   1.630 -    }
   1.631 -    // To see where a verify_oop failed, get $ebx+40/X for this frame.
   1.632 -    // This is the value of eip which points to where verify_oop will return.
   1.633 -    if (os::message_box(msg, "Execution stopped, print registers?")) {
   1.634 -      print_state32(rdi, rsi, rbp, rsp, rbx, rdx, rcx, rax, eip);
   1.635 -      BREAKPOINT;
   1.636 -    }
   1.637 -  } else {
   1.638 -    ttyLocker ttyl;
   1.639 -    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
   1.640 -  }
   1.641 -  // Don't assert holding the ttyLock
   1.642 -    assert(false, err_msg("DEBUG MESSAGE: %s", msg));
   1.643 -  ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
   1.644 -}
   1.645 -
   1.646 -void MacroAssembler::print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip) {
   1.647 -  ttyLocker ttyl;
   1.648 -  FlagSetting fs(Debugging, true);
   1.649 -  tty->print_cr("eip = 0x%08x", eip);
   1.650 -#ifndef PRODUCT
   1.651 -  if ((WizardMode || Verbose) && PrintMiscellaneous) {
   1.652 -    tty->cr();
   1.653 -    findpc(eip);
   1.654 -    tty->cr();
   1.655 -  }
   1.656 -#endif
   1.657 -#define PRINT_REG(rax) \
   1.658 -  { tty->print("%s = ", #rax); os::print_location(tty, rax); }
   1.659 -  PRINT_REG(rax);
   1.660 -  PRINT_REG(rbx);
   1.661 -  PRINT_REG(rcx);
   1.662 -  PRINT_REG(rdx);
   1.663 -  PRINT_REG(rdi);
   1.664 -  PRINT_REG(rsi);
   1.665 -  PRINT_REG(rbp);
   1.666 -  PRINT_REG(rsp);
   1.667 -#undef PRINT_REG
   1.668 -  // Print some words near top of staack.
   1.669 -  int* dump_sp = (int*) rsp;
   1.670 -  for (int col1 = 0; col1 < 8; col1++) {
   1.671 -    tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp);
   1.672 -    os::print_location(tty, *dump_sp++);
   1.673 -  }
   1.674 -  for (int row = 0; row < 16; row++) {
   1.675 -    tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp);
   1.676 -    for (int col = 0; col < 8; col++) {
   1.677 -      tty->print(" 0x%08x", *dump_sp++);
   1.678 -    }
   1.679 -    tty->cr();
   1.680 -  }
   1.681 -  // Print some instructions around pc:
   1.682 -  Disassembler::decode((address)eip-64, (address)eip);
   1.683 -  tty->print_cr("--------");
   1.684 -  Disassembler::decode((address)eip, (address)eip+32);
   1.685 -}
   1.686 -
   1.687 -void MacroAssembler::stop(const char* msg) {
   1.688 -  ExternalAddress message((address)msg);
   1.689 -  // push address of message
   1.690 -  pushptr(message.addr());
   1.691 -  { Label L; call(L, relocInfo::none); bind(L); }     // push eip
   1.692 -  pusha();                                            // push registers
   1.693 -  call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32)));
   1.694 -  hlt();
   1.695 -}
   1.696 -
   1.697 -void MacroAssembler::warn(const char* msg) {
   1.698 -  push_CPU_state();
   1.699 -
   1.700 -  ExternalAddress message((address) msg);
   1.701 -  // push address of message
   1.702 -  pushptr(message.addr());
   1.703 -
   1.704 -  call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning)));
   1.705 -  addl(rsp, wordSize);       // discard argument
   1.706 -  pop_CPU_state();
   1.707 -}
   1.708 -
   1.709 -void MacroAssembler::print_state() {
   1.710 -  { Label L; call(L, relocInfo::none); bind(L); }     // push eip
   1.711 -  pusha();                                            // push registers
   1.712 -
   1.713 -  push_CPU_state();
   1.714 -  call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::print_state32)));
   1.715 -  pop_CPU_state();
   1.716 -
   1.717 -  popa();
   1.718 -  addl(rsp, wordSize);
   1.719 -}
   1.720 -
   1.721 -#else // _LP64
   1.722 -
   1.723 -// 64 bit versions
   1.724 -
   1.725 -Address MacroAssembler::as_Address(AddressLiteral adr) {
   1.726 -  // amd64 always does this as a pc-rel
   1.727 -  // we can be absolute or disp based on the instruction type
   1.728 -  // jmp/call are displacements others are absolute
   1.729 -  assert(!adr.is_lval(), "must be rval");
   1.730 -  assert(reachable(adr), "must be");
   1.731 -  return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc());
   1.732 -
   1.733 -}
   1.734 -
   1.735 -Address MacroAssembler::as_Address(ArrayAddress adr) {
   1.736 -  AddressLiteral base = adr.base();
   1.737 -  lea(rscratch1, base);
   1.738 -  Address index = adr.index();
   1.739 -  assert(index._disp == 0, "must not have disp"); // maybe it can?
   1.740 -  Address array(rscratch1, index._index, index._scale, index._disp);
   1.741 -  return array;
   1.742 -}
   1.743 -
   1.744 -int MacroAssembler::biased_locking_enter(Register lock_reg,
   1.745 -                                         Register obj_reg,
   1.746 -                                         Register swap_reg,
   1.747 -                                         Register tmp_reg,
   1.748 -                                         bool swap_reg_contains_mark,
   1.749 -                                         Label& done,
   1.750 -                                         Label* slow_case,
   1.751 -                                         BiasedLockingCounters* counters) {
   1.752 -  assert(UseBiasedLocking, "why call this otherwise?");
   1.753 -  assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
   1.754 -  assert(tmp_reg != noreg, "tmp_reg must be supplied");
   1.755 -  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
   1.756 -  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
   1.757 -  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
   1.758 -  Address saved_mark_addr(lock_reg, 0);
   1.759 -
   1.760 -  if (PrintBiasedLockingStatistics && counters == NULL)
   1.761 -    counters = BiasedLocking::counters();
   1.762 -
   1.763 -  // Biased locking
   1.764 -  // See whether the lock is currently biased toward our thread and
   1.765 -  // whether the epoch is still valid
   1.766 -  // Note that the runtime guarantees sufficient alignment of JavaThread
   1.767 -  // pointers to allow age to be placed into low bits
   1.768 -  // First check to see whether biasing is even enabled for this object
   1.769 -  Label cas_label;
   1.770 -  int null_check_offset = -1;
   1.771 -  if (!swap_reg_contains_mark) {
   1.772 -    null_check_offset = offset();
   1.773 -    movq(swap_reg, mark_addr);
   1.774 -  }
   1.775 -  movq(tmp_reg, swap_reg);
   1.776 -  andq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
   1.777 -  cmpq(tmp_reg, markOopDesc::biased_lock_pattern);
   1.778 -  jcc(Assembler::notEqual, cas_label);
   1.779 -  // The bias pattern is present in the object's header. Need to check
   1.780 -  // whether the bias owner and the epoch are both still current.
   1.781 -  load_prototype_header(tmp_reg, obj_reg);
   1.782 -  orq(tmp_reg, r15_thread);
   1.783 -  xorq(tmp_reg, swap_reg);
   1.784 -  andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place));
   1.785 -  if (counters != NULL) {
   1.786 -    cond_inc32(Assembler::zero,
   1.787 -               ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
   1.788 -  }
   1.789 -  jcc(Assembler::equal, done);
   1.790 -
   1.791 -  Label try_revoke_bias;
   1.792 -  Label try_rebias;
   1.793 -
   1.794 -  // At this point we know that the header has the bias pattern and
   1.795 -  // that we are not the bias owner in the current epoch. We need to
   1.796 -  // figure out more details about the state of the header in order to
   1.797 -  // know what operations can be legally performed on the object's
   1.798 -  // header.
   1.799 -
   1.800 -  // If the low three bits in the xor result aren't clear, that means
   1.801 -  // the prototype header is no longer biased and we have to revoke
   1.802 -  // the bias on this object.
   1.803 -  testq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
   1.804 -  jcc(Assembler::notZero, try_revoke_bias);
   1.805 -
   1.806 -  // Biasing is still enabled for this data type. See whether the
   1.807 -  // epoch of the current bias is still valid, meaning that the epoch
   1.808 -  // bits of the mark word are equal to the epoch bits of the
   1.809 -  // prototype header. (Note that the prototype header's epoch bits
   1.810 -  // only change at a safepoint.) If not, attempt to rebias the object
   1.811 -  // toward the current thread. Note that we must be absolutely sure
   1.812 -  // that the current epoch is invalid in order to do this because
   1.813 -  // otherwise the manipulations it performs on the mark word are
   1.814 -  // illegal.
   1.815 -  testq(tmp_reg, markOopDesc::epoch_mask_in_place);
   1.816 -  jcc(Assembler::notZero, try_rebias);
   1.817 -
   1.818 -  // The epoch of the current bias is still valid but we know nothing
   1.819 -  // about the owner; it might be set or it might be clear. Try to
   1.820 -  // acquire the bias of the object using an atomic operation. If this
   1.821 -  // fails we will go in to the runtime to revoke the object's bias.
   1.822 -  // Note that we first construct the presumed unbiased header so we
   1.823 -  // don't accidentally blow away another thread's valid bias.
   1.824 -  andq(swap_reg,
   1.825 -       markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
   1.826 -  movq(tmp_reg, swap_reg);
   1.827 -  orq(tmp_reg, r15_thread);
   1.828 -  if (os::is_MP()) {
   1.829 -    lock();
   1.830 -  }
   1.831 -  cmpxchgq(tmp_reg, Address(obj_reg, 0));
   1.832 -  // If the biasing toward our thread failed, this means that
   1.833 -  // another thread succeeded in biasing it toward itself and we
   1.834 -  // need to revoke that bias. The revocation will occur in the
   1.835 -  // interpreter runtime in the slow case.
   1.836 -  if (counters != NULL) {
   1.837 -    cond_inc32(Assembler::zero,
   1.838 -               ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
   1.839 -  }
   1.840 -  if (slow_case != NULL) {
   1.841 -    jcc(Assembler::notZero, *slow_case);
   1.842 -  }
   1.843 -  jmp(done);
   1.844 -
   1.845 -  bind(try_rebias);
   1.846 -  // At this point we know the epoch has expired, meaning that the
   1.847 -  // current "bias owner", if any, is actually invalid. Under these
   1.848 -  // circumstances _only_, we are allowed to use the current header's
   1.849 -  // value as the comparison value when doing the cas to acquire the
   1.850 -  // bias in the current epoch. In other words, we allow transfer of
   1.851 -  // the bias from one thread to another directly in this situation.
   1.852 -  //
   1.853 -  // FIXME: due to a lack of registers we currently blow away the age
   1.854 -  // bits in this situation. Should attempt to preserve them.
   1.855 -  load_prototype_header(tmp_reg, obj_reg);
   1.856 -  orq(tmp_reg, r15_thread);
   1.857 -  if (os::is_MP()) {
   1.858 -    lock();
   1.859 -  }
   1.860 -  cmpxchgq(tmp_reg, Address(obj_reg, 0));
   1.861 -  // If the biasing toward our thread failed, then another thread
   1.862 -  // succeeded in biasing it toward itself and we need to revoke that
   1.863 -  // bias. The revocation will occur in the runtime in the slow case.
   1.864 -  if (counters != NULL) {
   1.865 -    cond_inc32(Assembler::zero,
   1.866 -               ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
   1.867 -  }
   1.868 -  if (slow_case != NULL) {
   1.869 -    jcc(Assembler::notZero, *slow_case);
   1.870 -  }
   1.871 -  jmp(done);
   1.872 -
   1.873 -  bind(try_revoke_bias);
   1.874 -  // The prototype mark in the klass doesn't have the bias bit set any
   1.875 -  // more, indicating that objects of this data type are not supposed
   1.876 -  // to be biased any more. We are going to try to reset the mark of
   1.877 -  // this object to the prototype value and fall through to the
   1.878 -  // CAS-based locking scheme. Note that if our CAS fails, it means
   1.879 -  // that another thread raced us for the privilege of revoking the
   1.880 -  // bias of this particular object, so it's okay to continue in the
   1.881 -  // normal locking code.
   1.882 -  //
   1.883 -  // FIXME: due to a lack of registers we currently blow away the age
   1.884 -  // bits in this situation. Should attempt to preserve them.
   1.885 -  load_prototype_header(tmp_reg, obj_reg);
   1.886 -  if (os::is_MP()) {
   1.887 -    lock();
   1.888 -  }
   1.889 -  cmpxchgq(tmp_reg, Address(obj_reg, 0));
   1.890 -  // Fall through to the normal CAS-based lock, because no matter what
   1.891 -  // the result of the above CAS, some thread must have succeeded in
   1.892 -  // removing the bias bit from the object's header.
   1.893 -  if (counters != NULL) {
   1.894 -    cond_inc32(Assembler::zero,
   1.895 -               ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
   1.896 -  }
   1.897 -
   1.898 -  bind(cas_label);
   1.899 -
   1.900 -  return null_check_offset;
   1.901 -}
   1.902 -
   1.903 -void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) {
   1.904 -  Label L, E;
   1.905 -
   1.906 -#ifdef _WIN64
   1.907 -  // Windows always allocates space for it's register args
   1.908 -  assert(num_args <= 4, "only register arguments supported");
   1.909 -  subq(rsp,  frame::arg_reg_save_area_bytes);
   1.910 -#endif
   1.911 -
   1.912 -  // Align stack if necessary
   1.913 -  testl(rsp, 15);
   1.914 -  jcc(Assembler::zero, L);
   1.915 -
   1.916 -  subq(rsp, 8);
   1.917 -  {
   1.918 -    call(RuntimeAddress(entry_point));
   1.919 -  }
   1.920 -  addq(rsp, 8);
   1.921 -  jmp(E);
   1.922 -
   1.923 -  bind(L);
   1.924 -  {
   1.925 -    call(RuntimeAddress(entry_point));
   1.926 -  }
   1.927 -
   1.928 -  bind(E);
   1.929 -
   1.930 -#ifdef _WIN64
   1.931 -  // restore stack pointer
   1.932 -  addq(rsp, frame::arg_reg_save_area_bytes);
   1.933 -#endif
   1.934 -
   1.935 -}
   1.936 -
   1.937 -void MacroAssembler::cmp64(Register src1, AddressLiteral src2) {
   1.938 -  assert(!src2.is_lval(), "should use cmpptr");
   1.939 -
   1.940 -  if (reachable(src2)) {
   1.941 -    cmpq(src1, as_Address(src2));
   1.942 -  } else {
   1.943 -    lea(rscratch1, src2);
   1.944 -    Assembler::cmpq(src1, Address(rscratch1, 0));
   1.945 -  }
   1.946 -}
   1.947 -
   1.948 -int MacroAssembler::corrected_idivq(Register reg) {
   1.949 -  // Full implementation of Java ldiv and lrem; checks for special
   1.950 -  // case as described in JVM spec., p.243 & p.271.  The function
   1.951 -  // returns the (pc) offset of the idivl instruction - may be needed
   1.952 -  // for implicit exceptions.
   1.953 -  //
   1.954 -  //         normal case                           special case
   1.955 -  //
   1.956 -  // input : rax: dividend                         min_long
   1.957 -  //         reg: divisor   (may not be eax/edx)   -1
   1.958 -  //
   1.959 -  // output: rax: quotient  (= rax idiv reg)       min_long
   1.960 -  //         rdx: remainder (= rax irem reg)       0
   1.961 -  assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register");
   1.962 -  static const int64_t min_long = 0x8000000000000000;
   1.963 -  Label normal_case, special_case;
   1.964 -
   1.965 -  // check for special case
   1.966 -  cmp64(rax, ExternalAddress((address) &min_long));
   1.967 -  jcc(Assembler::notEqual, normal_case);
   1.968 -  xorl(rdx, rdx); // prepare rdx for possible special case (where
   1.969 -                  // remainder = 0)
   1.970 -  cmpq(reg, -1);
   1.971 -  jcc(Assembler::equal, special_case);
   1.972 -
   1.973 -  // handle normal case
   1.974 -  bind(normal_case);
   1.975 -  cdqq();
   1.976 -  int idivq_offset = offset();
   1.977 -  idivq(reg);
   1.978 -
   1.979 -  // normal and special case exit
   1.980 -  bind(special_case);
   1.981 -
   1.982 -  return idivq_offset;
   1.983 -}
   1.984 -
   1.985 -void MacroAssembler::decrementq(Register reg, int value) {
   1.986 -  if (value == min_jint) { subq(reg, value); return; }
   1.987 -  if (value <  0) { incrementq(reg, -value); return; }
   1.988 -  if (value == 0) {                        ; return; }
   1.989 -  if (value == 1 && UseIncDec) { decq(reg) ; return; }
   1.990 -  /* else */      { subq(reg, value)       ; return; }
   1.991 -}
   1.992 -
   1.993 -void MacroAssembler::decrementq(Address dst, int value) {
   1.994 -  if (value == min_jint) { subq(dst, value); return; }
   1.995 -  if (value <  0) { incrementq(dst, -value); return; }
   1.996 -  if (value == 0) {                        ; return; }
   1.997 -  if (value == 1 && UseIncDec) { decq(dst) ; return; }
   1.998 -  /* else */      { subq(dst, value)       ; return; }
   1.999 -}
  1.1000 -
  1.1001 -void MacroAssembler::incrementq(Register reg, int value) {
  1.1002 -  if (value == min_jint) { addq(reg, value); return; }
  1.1003 -  if (value <  0) { decrementq(reg, -value); return; }
  1.1004 -  if (value == 0) {                        ; return; }
  1.1005 -  if (value == 1 && UseIncDec) { incq(reg) ; return; }
  1.1006 -  /* else */      { addq(reg, value)       ; return; }
  1.1007 -}
  1.1008 -
  1.1009 -void MacroAssembler::incrementq(Address dst, int value) {
  1.1010 -  if (value == min_jint) { addq(dst, value); return; }
  1.1011 -  if (value <  0) { decrementq(dst, -value); return; }
  1.1012 -  if (value == 0) {                        ; return; }
  1.1013 -  if (value == 1 && UseIncDec) { incq(dst) ; return; }
  1.1014 -  /* else */      { addq(dst, value)       ; return; }
  1.1015 -}
  1.1016 -
  1.1017 -// 32bit can do a case table jump in one instruction but we no longer allow the base
  1.1018 -// to be installed in the Address class
  1.1019 -void MacroAssembler::jump(ArrayAddress entry) {
  1.1020 -  lea(rscratch1, entry.base());
  1.1021 -  Address dispatch = entry.index();
  1.1022 -  assert(dispatch._base == noreg, "must be");
  1.1023 -  dispatch._base = rscratch1;
  1.1024 -  jmp(dispatch);
  1.1025 -}
  1.1026 -
  1.1027 -void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
  1.1028 -  ShouldNotReachHere(); // 64bit doesn't use two regs
  1.1029 -  cmpq(x_lo, y_lo);
  1.1030 -}
  1.1031 -
  1.1032 -void MacroAssembler::lea(Register dst, AddressLiteral src) {
  1.1033 -    mov_literal64(dst, (intptr_t)src.target(), src.rspec());
  1.1034 -}
  1.1035 -
  1.1036 -void MacroAssembler::lea(Address dst, AddressLiteral adr) {
  1.1037 -  mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec());
  1.1038 -  movptr(dst, rscratch1);
  1.1039 -}
  1.1040 -
  1.1041 -void MacroAssembler::leave() {
  1.1042 -  // %%% is this really better? Why not on 32bit too?
  1.1043 -  emit_byte(0xC9); // LEAVE
  1.1044 -}
  1.1045 -
  1.1046 -void MacroAssembler::lneg(Register hi, Register lo) {
  1.1047 -  ShouldNotReachHere(); // 64bit doesn't use two regs
  1.1048 -  negq(lo);
  1.1049 -}
  1.1050 -
  1.1051 -void MacroAssembler::movoop(Register dst, jobject obj) {
  1.1052 -  mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate());
  1.1053 -}
  1.1054 -
  1.1055 -void MacroAssembler::movoop(Address dst, jobject obj) {
  1.1056 -  mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate());
  1.1057 -  movq(dst, rscratch1);
  1.1058 -}
  1.1059 -
  1.1060 -void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
  1.1061 -  mov_literal64(dst, (intptr_t)obj, metadata_Relocation::spec_for_immediate());
  1.1062 -}
  1.1063 -
  1.1064 -void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
  1.1065 -  mov_literal64(rscratch1, (intptr_t)obj, metadata_Relocation::spec_for_immediate());
  1.1066 -  movq(dst, rscratch1);
  1.1067 -}
  1.1068 -
  1.1069 -void MacroAssembler::movptr(Register dst, AddressLiteral src) {
  1.1070 -  if (src.is_lval()) {
  1.1071 -    mov_literal64(dst, (intptr_t)src.target(), src.rspec());
  1.1072 -  } else {
  1.1073 -    if (reachable(src)) {
  1.1074 -      movq(dst, as_Address(src));
  1.1075 -    } else {
  1.1076 -      lea(rscratch1, src);
  1.1077 -      movq(dst, Address(rscratch1,0));
  1.1078 -    }
  1.1079 -  }
  1.1080 -}
  1.1081 -
  1.1082 -void MacroAssembler::movptr(ArrayAddress dst, Register src) {
  1.1083 -  movq(as_Address(dst), src);
  1.1084 -}
  1.1085 -
  1.1086 -void MacroAssembler::movptr(Register dst, ArrayAddress src) {
  1.1087 -  movq(dst, as_Address(src));
  1.1088 -}
  1.1089 -
  1.1090 -// src should NEVER be a real pointer. Use AddressLiteral for true pointers
  1.1091 -void MacroAssembler::movptr(Address dst, intptr_t src) {
  1.1092 -  mov64(rscratch1, src);
  1.1093 -  movq(dst, rscratch1);
  1.1094 -}
  1.1095 -
  1.1096 -// These are mostly for initializing NULL
  1.1097 -void MacroAssembler::movptr(Address dst, int32_t src) {
  1.1098 -  movslq(dst, src);
  1.1099 -}
  1.1100 -
  1.1101 -void MacroAssembler::movptr(Register dst, int32_t src) {
  1.1102 -  mov64(dst, (intptr_t)src);
  1.1103 -}
  1.1104 -
  1.1105 -void MacroAssembler::pushoop(jobject obj) {
  1.1106 -  movoop(rscratch1, obj);
  1.1107 -  push(rscratch1);
  1.1108 -}
  1.1109 -
  1.1110 -void MacroAssembler::pushklass(Metadata* obj) {
  1.1111 -  mov_metadata(rscratch1, obj);
  1.1112 -  push(rscratch1);
  1.1113 -}
  1.1114 -
  1.1115 -void MacroAssembler::pushptr(AddressLiteral src) {
  1.1116 -  lea(rscratch1, src);
  1.1117 -  if (src.is_lval()) {
  1.1118 -    push(rscratch1);
  1.1119 -  } else {
  1.1120 -    pushq(Address(rscratch1, 0));
  1.1121 -  }
  1.1122 -}
  1.1123 -
  1.1124 -void MacroAssembler::reset_last_Java_frame(bool clear_fp,
  1.1125 -                                           bool clear_pc) {
  1.1126 -  // we must set sp to zero to clear frame
  1.1127 -  movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
  1.1128 -  // must clear fp, so that compiled frames are not confused; it is
  1.1129 -  // possible that we need it only for debugging
  1.1130 -  if (clear_fp) {
  1.1131 -    movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
  1.1132 -  }
  1.1133 -
  1.1134 -  if (clear_pc) {
  1.1135 -    movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
  1.1136 -  }
  1.1137 -}
  1.1138 -
  1.1139 -void MacroAssembler::set_last_Java_frame(Register last_java_sp,
  1.1140 -                                         Register last_java_fp,
  1.1141 -                                         address  last_java_pc) {
  1.1142 -  // determine last_java_sp register
  1.1143 -  if (!last_java_sp->is_valid()) {
  1.1144 -    last_java_sp = rsp;
  1.1145 -  }
  1.1146 -
  1.1147 -  // last_java_fp is optional
  1.1148 -  if (last_java_fp->is_valid()) {
  1.1149 -    movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()),
  1.1150 -           last_java_fp);
  1.1151 -  }
  1.1152 -
  1.1153 -  // last_java_pc is optional
  1.1154 -  if (last_java_pc != NULL) {
  1.1155 -    Address java_pc(r15_thread,
  1.1156 -                    JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
  1.1157 -    lea(rscratch1, InternalAddress(last_java_pc));
  1.1158 -    movptr(java_pc, rscratch1);
  1.1159 -  }
  1.1160 -
  1.1161 -  movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
  1.1162 -}
  1.1163 -
  1.1164 -static void pass_arg0(MacroAssembler* masm, Register arg) {
  1.1165 -  if (c_rarg0 != arg ) {
  1.1166 -    masm->mov(c_rarg0, arg);
  1.1167 -  }
  1.1168 -}
  1.1169 -
  1.1170 -static void pass_arg1(MacroAssembler* masm, Register arg) {
  1.1171 -  if (c_rarg1 != arg ) {
  1.1172 -    masm->mov(c_rarg1, arg);
  1.1173 -  }
  1.1174 -}
  1.1175 -
  1.1176 -static void pass_arg2(MacroAssembler* masm, Register arg) {
  1.1177 -  if (c_rarg2 != arg ) {
  1.1178 -    masm->mov(c_rarg2, arg);
  1.1179 -  }
  1.1180 -}
  1.1181 -
  1.1182 -static void pass_arg3(MacroAssembler* masm, Register arg) {
  1.1183 -  if (c_rarg3 != arg ) {
  1.1184 -    masm->mov(c_rarg3, arg);
  1.1185 -  }
  1.1186 -}
  1.1187 -
  1.1188 -void MacroAssembler::stop(const char* msg) {
  1.1189 -  address rip = pc();
  1.1190 -  pusha(); // get regs on stack
  1.1191 -  lea(c_rarg0, ExternalAddress((address) msg));
  1.1192 -  lea(c_rarg1, InternalAddress(rip));
  1.1193 -  movq(c_rarg2, rsp); // pass pointer to regs array
  1.1194 -  andq(rsp, -16); // align stack as required by ABI
  1.1195 -  call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64)));
  1.1196 -  hlt();
  1.1197 -}
  1.1198 -
  1.1199 -void MacroAssembler::warn(const char* msg) {
  1.1200 -  push(rbp);
  1.1201 -  movq(rbp, rsp);
  1.1202 -  andq(rsp, -16);     // align stack as required by push_CPU_state and call
  1.1203 -  push_CPU_state();   // keeps alignment at 16 bytes
  1.1204 -  lea(c_rarg0, ExternalAddress((address) msg));
  1.1205 -  call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0);
  1.1206 -  pop_CPU_state();
  1.1207 -  mov(rsp, rbp);
  1.1208 -  pop(rbp);
  1.1209 -}
  1.1210 -
  1.1211 -void MacroAssembler::print_state() {
  1.1212 -  address rip = pc();
  1.1213 -  pusha();            // get regs on stack
  1.1214 -  push(rbp);
  1.1215 -  movq(rbp, rsp);
  1.1216 -  andq(rsp, -16);     // align stack as required by push_CPU_state and call
  1.1217 -  push_CPU_state();   // keeps alignment at 16 bytes
  1.1218 -
  1.1219 -  lea(c_rarg0, InternalAddress(rip));
  1.1220 -  lea(c_rarg1, Address(rbp, wordSize)); // pass pointer to regs array
  1.1221 -  call_VM_leaf(CAST_FROM_FN_PTR(address, MacroAssembler::print_state64), c_rarg0, c_rarg1);
  1.1222 -
  1.1223 -  pop_CPU_state();
  1.1224 -  mov(rsp, rbp);
  1.1225 -  pop(rbp);
  1.1226 -  popa();
  1.1227 -}
  1.1228 -
  1.1229 -#ifndef PRODUCT
  1.1230 -extern "C" void findpc(intptr_t x);
  1.1231 -#endif
  1.1232 -
  1.1233 -void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) {
  1.1234 -  // In order to get locks to work, we need to fake a in_VM state
  1.1235 -  if (ShowMessageBoxOnError) {
  1.1236 -    JavaThread* thread = JavaThread::current();
  1.1237 -    JavaThreadState saved_state = thread->thread_state();
  1.1238 -    thread->set_thread_state(_thread_in_vm);
  1.1239 -#ifndef PRODUCT
  1.1240 -    if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
  1.1241 -      ttyLocker ttyl;
  1.1242 -      BytecodeCounter::print();
  1.1243 -    }
  1.1244 -#endif
  1.1245 -    // To see where a verify_oop failed, get $ebx+40/X for this frame.
  1.1246 -    // XXX correct this offset for amd64
  1.1247 -    // This is the value of eip which points to where verify_oop will return.
  1.1248 -    if (os::message_box(msg, "Execution stopped, print registers?")) {
  1.1249 -      print_state64(pc, regs);
  1.1250 -      BREAKPOINT;
  1.1251 -      assert(false, "start up GDB");
  1.1252 -    }
  1.1253 -    ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
  1.1254 -  } else {
  1.1255 -    ttyLocker ttyl;
  1.1256 -    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n",
  1.1257 -                    msg);
  1.1258 -    assert(false, err_msg("DEBUG MESSAGE: %s", msg));
  1.1259 -  }
  1.1260 -}
  1.1261 -
  1.1262 -void MacroAssembler::print_state64(int64_t pc, int64_t regs[]) {
  1.1263 -  ttyLocker ttyl;
  1.1264 -  FlagSetting fs(Debugging, true);
  1.1265 -  tty->print_cr("rip = 0x%016lx", pc);
  1.1266 -#ifndef PRODUCT
  1.1267 -  tty->cr();
  1.1268 -  findpc(pc);
  1.1269 -  tty->cr();
  1.1270 -#endif
  1.1271 -#define PRINT_REG(rax, value) \
  1.1272 -  { tty->print("%s = ", #rax); os::print_location(tty, value); }
  1.1273 -  PRINT_REG(rax, regs[15]);
  1.1274 -  PRINT_REG(rbx, regs[12]);
  1.1275 -  PRINT_REG(rcx, regs[14]);
  1.1276 -  PRINT_REG(rdx, regs[13]);
  1.1277 -  PRINT_REG(rdi, regs[8]);
  1.1278 -  PRINT_REG(rsi, regs[9]);
  1.1279 -  PRINT_REG(rbp, regs[10]);
  1.1280 -  PRINT_REG(rsp, regs[11]);
  1.1281 -  PRINT_REG(r8 , regs[7]);
  1.1282 -  PRINT_REG(r9 , regs[6]);
  1.1283 -  PRINT_REG(r10, regs[5]);
  1.1284 -  PRINT_REG(r11, regs[4]);
  1.1285 -  PRINT_REG(r12, regs[3]);
  1.1286 -  PRINT_REG(r13, regs[2]);
  1.1287 -  PRINT_REG(r14, regs[1]);
  1.1288 -  PRINT_REG(r15, regs[0]);
  1.1289 -#undef PRINT_REG
  1.1290 -  // Print some words near top of staack.
  1.1291 -  int64_t* rsp = (int64_t*) regs[11];
  1.1292 -  int64_t* dump_sp = rsp;
  1.1293 -  for (int col1 = 0; col1 < 8; col1++) {
  1.1294 -    tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp);
  1.1295 -    os::print_location(tty, *dump_sp++);
  1.1296 -  }
  1.1297 -  for (int row = 0; row < 25; row++) {
  1.1298 -    tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp);
  1.1299 -    for (int col = 0; col < 4; col++) {
  1.1300 -      tty->print(" 0x%016lx", *dump_sp++);
  1.1301 -    }
  1.1302 -    tty->cr();
  1.1303 -  }
  1.1304 -  // Print some instructions around pc:
  1.1305 -  Disassembler::decode((address)pc-64, (address)pc);
  1.1306 -  tty->print_cr("--------");
  1.1307 -  Disassembler::decode((address)pc, (address)pc+32);
  1.1308 -}
  1.1309 -
  1.1310 -#endif // _LP64
  1.1311 -
  1.1312 -// Now versions that are common to 32/64 bit
  1.1313 -
  1.1314 -void MacroAssembler::addptr(Register dst, int32_t imm32) {
  1.1315 -  LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32));
  1.1316 -}
  1.1317 -
  1.1318 -void MacroAssembler::addptr(Register dst, Register src) {
  1.1319 -  LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
  1.1320 -}
  1.1321 -
  1.1322 -void MacroAssembler::addptr(Address dst, Register src) {
  1.1323 -  LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
  1.1324 -}
  1.1325 -
  1.1326 -void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) {
  1.1327 -  if (reachable(src)) {
  1.1328 -    Assembler::addsd(dst, as_Address(src));
  1.1329 -  } else {
  1.1330 -    lea(rscratch1, src);
  1.1331 -    Assembler::addsd(dst, Address(rscratch1, 0));
  1.1332 -  }
  1.1333 -}
  1.1334 -
  1.1335 -void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) {
  1.1336 -  if (reachable(src)) {
  1.1337 -    addss(dst, as_Address(src));
  1.1338 -  } else {
  1.1339 -    lea(rscratch1, src);
  1.1340 -    addss(dst, Address(rscratch1, 0));
  1.1341 -  }
  1.1342 -}
  1.1343 -
  1.1344 -void MacroAssembler::align(int modulus) {
  1.1345 -  if (offset() % modulus != 0) {
  1.1346 -    nop(modulus - (offset() % modulus));
  1.1347 -  }
  1.1348 -}
  1.1349 -
  1.1350 -void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) {
  1.1351 -  // Used in sign-masking with aligned address.
  1.1352 -  assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
  1.1353 -  if (reachable(src)) {
  1.1354 -    Assembler::andpd(dst, as_Address(src));
  1.1355 -  } else {
  1.1356 -    lea(rscratch1, src);
  1.1357 -    Assembler::andpd(dst, Address(rscratch1, 0));
  1.1358 -  }
  1.1359 -}
  1.1360 -
  1.1361 -void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) {
  1.1362 -  // Used in sign-masking with aligned address.
  1.1363 -  assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
  1.1364 -  if (reachable(src)) {
  1.1365 -    Assembler::andps(dst, as_Address(src));
  1.1366 -  } else {
  1.1367 -    lea(rscratch1, src);
  1.1368 -    Assembler::andps(dst, Address(rscratch1, 0));
  1.1369 -  }
  1.1370 -}
  1.1371 -
  1.1372 -void MacroAssembler::andptr(Register dst, int32_t imm32) {
  1.1373 -  LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32));
  1.1374 -}
  1.1375 -
  1.1376 -void MacroAssembler::atomic_incl(AddressLiteral counter_addr) {
  1.1377 -  pushf();
  1.1378 -  if (os::is_MP())
  1.1379 -    lock();
  1.1380 -  incrementl(counter_addr);
  1.1381 -  popf();
  1.1382 -}
  1.1383 -
  1.1384 -// Writes to stack successive pages until offset reached to check for
  1.1385 -// stack overflow + shadow pages.  This clobbers tmp.
  1.1386 -void MacroAssembler::bang_stack_size(Register size, Register tmp) {
  1.1387 -  movptr(tmp, rsp);
  1.1388 -  // Bang stack for total size given plus shadow page size.
  1.1389 -  // Bang one page at a time because large size can bang beyond yellow and
  1.1390 -  // red zones.
  1.1391 -  Label loop;
  1.1392 -  bind(loop);
  1.1393 -  movl(Address(tmp, (-os::vm_page_size())), size );
  1.1394 -  subptr(tmp, os::vm_page_size());
  1.1395 -  subl(size, os::vm_page_size());
  1.1396 -  jcc(Assembler::greater, loop);
  1.1397 -
  1.1398 -  // Bang down shadow pages too.
  1.1399 -  // The -1 because we already subtracted 1 page.
  1.1400 -  for (int i = 0; i< StackShadowPages-1; i++) {
  1.1401 -    // this could be any sized move but this is can be a debugging crumb
  1.1402 -    // so the bigger the better.
  1.1403 -    movptr(Address(tmp, (-i*os::vm_page_size())), size );
  1.1404 -  }
  1.1405 -}
  1.1406 -
  1.1407 -void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
  1.1408 -  assert(UseBiasedLocking, "why call this otherwise?");
  1.1409 -
  1.1410 -  // Check for biased locking unlock case, which is a no-op
  1.1411 -  // Note: we do not have to check the thread ID for two reasons.
  1.1412 -  // First, the interpreter checks for IllegalMonitorStateException at
  1.1413 -  // a higher level. Second, if the bias was revoked while we held the
  1.1414 -  // lock, the object could not be rebiased toward another thread, so
  1.1415 -  // the bias bit would be clear.
  1.1416 -  movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
  1.1417 -  andptr(temp_reg, markOopDesc::biased_lock_mask_in_place);
  1.1418 -  cmpptr(temp_reg, markOopDesc::biased_lock_pattern);
  1.1419 -  jcc(Assembler::equal, done);
  1.1420 -}
  1.1421 -
  1.1422 -void MacroAssembler::c2bool(Register x) {
  1.1423 -  // implements x == 0 ? 0 : 1
  1.1424 -  // note: must only look at least-significant byte of x
  1.1425 -  //       since C-style booleans are stored in one byte
  1.1426 -  //       only! (was bug)
  1.1427 -  andl(x, 0xFF);
  1.1428 -  setb(Assembler::notZero, x);
  1.1429 -}
  1.1430 -
  1.1431 -// Wouldn't need if AddressLiteral version had new name
  1.1432 -void MacroAssembler::call(Label& L, relocInfo::relocType rtype) {
  1.1433 -  Assembler::call(L, rtype);
  1.1434 -}
  1.1435 -
  1.1436 -void MacroAssembler::call(Register entry) {
  1.1437 -  Assembler::call(entry);
  1.1438 -}
  1.1439 -
  1.1440 -void MacroAssembler::call(AddressLiteral entry) {
  1.1441 -  if (reachable(entry)) {
  1.1442 -    Assembler::call_literal(entry.target(), entry.rspec());
  1.1443 -  } else {
  1.1444 -    lea(rscratch1, entry);
  1.1445 -    Assembler::call(rscratch1);
  1.1446 -  }
  1.1447 -}
  1.1448 -
  1.1449 -void MacroAssembler::ic_call(address entry) {
  1.1450 -  RelocationHolder rh = virtual_call_Relocation::spec(pc());
  1.1451 -  movptr(rax, (intptr_t)Universe::non_oop_word());
  1.1452 -  call(AddressLiteral(entry, rh));
  1.1453 -}
  1.1454 -
  1.1455 -// Implementation of call_VM versions
  1.1456 -
  1.1457 -void MacroAssembler::call_VM(Register oop_result,
  1.1458 -                             address entry_point,
  1.1459 -                             bool check_exceptions) {
  1.1460 -  Label C, E;
  1.1461 -  call(C, relocInfo::none);
  1.1462 -  jmp(E);
  1.1463 -
  1.1464 -  bind(C);
  1.1465 -  call_VM_helper(oop_result, entry_point, 0, check_exceptions);
  1.1466 -  ret(0);
  1.1467 -
  1.1468 -  bind(E);
  1.1469 -}
  1.1470 -
  1.1471 -void MacroAssembler::call_VM(Register oop_result,
  1.1472 -                             address entry_point,
  1.1473 -                             Register arg_1,
  1.1474 -                             bool check_exceptions) {
  1.1475 -  Label C, E;
  1.1476 -  call(C, relocInfo::none);
  1.1477 -  jmp(E);
  1.1478 -
  1.1479 -  bind(C);
  1.1480 -  pass_arg1(this, arg_1);
  1.1481 -  call_VM_helper(oop_result, entry_point, 1, check_exceptions);
  1.1482 -  ret(0);
  1.1483 -
  1.1484 -  bind(E);
  1.1485 -}
  1.1486 -
  1.1487 -void MacroAssembler::call_VM(Register oop_result,
  1.1488 -                             address entry_point,
  1.1489 -                             Register arg_1,
  1.1490 -                             Register arg_2,
  1.1491 -                             bool check_exceptions) {
  1.1492 -  Label C, E;
  1.1493 -  call(C, relocInfo::none);
  1.1494 -  jmp(E);
  1.1495 -
  1.1496 -  bind(C);
  1.1497 -
  1.1498 -  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
  1.1499 -
  1.1500 -  pass_arg2(this, arg_2);
  1.1501 -  pass_arg1(this, arg_1);
  1.1502 -  call_VM_helper(oop_result, entry_point, 2, check_exceptions);
  1.1503 -  ret(0);
  1.1504 -
  1.1505 -  bind(E);
  1.1506 -}
  1.1507 -
  1.1508 -void MacroAssembler::call_VM(Register oop_result,
  1.1509 -                             address entry_point,
  1.1510 -                             Register arg_1,
  1.1511 -                             Register arg_2,
  1.1512 -                             Register arg_3,
  1.1513 -                             bool check_exceptions) {
  1.1514 -  Label C, E;
  1.1515 -  call(C, relocInfo::none);
  1.1516 -  jmp(E);
  1.1517 -
  1.1518 -  bind(C);
  1.1519 -
  1.1520 -  LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
  1.1521 -  LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
  1.1522 -  pass_arg3(this, arg_3);
  1.1523 -
  1.1524 -  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
  1.1525 -  pass_arg2(this, arg_2);
  1.1526 -
  1.1527 -  pass_arg1(this, arg_1);
  1.1528 -  call_VM_helper(oop_result, entry_point, 3, check_exceptions);
  1.1529 -  ret(0);
  1.1530 -
  1.1531 -  bind(E);
  1.1532 -}
  1.1533 -
  1.1534 -void MacroAssembler::call_VM(Register oop_result,
  1.1535 -                             Register last_java_sp,
  1.1536 -                             address entry_point,
  1.1537 -                             int number_of_arguments,
  1.1538 -                             bool check_exceptions) {
  1.1539 -  Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg);
  1.1540 -  call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
  1.1541 -}
  1.1542 -
  1.1543 -void MacroAssembler::call_VM(Register oop_result,
  1.1544 -                             Register last_java_sp,
  1.1545 -                             address entry_point,
  1.1546 -                             Register arg_1,
  1.1547 -                             bool check_exceptions) {
  1.1548 -  pass_arg1(this, arg_1);
  1.1549 -  call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
  1.1550 -}
  1.1551 -
  1.1552 -void MacroAssembler::call_VM(Register oop_result,
  1.1553 -                             Register last_java_sp,
  1.1554 -                             address entry_point,
  1.1555 -                             Register arg_1,
  1.1556 -                             Register arg_2,
  1.1557 -                             bool check_exceptions) {
  1.1558 -
  1.1559 -  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
  1.1560 -  pass_arg2(this, arg_2);
  1.1561 -  pass_arg1(this, arg_1);
  1.1562 -  call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
  1.1563 -}
  1.1564 -
  1.1565 -void MacroAssembler::call_VM(Register oop_result,
  1.1566 -                             Register last_java_sp,
  1.1567 -                             address entry_point,
  1.1568 -                             Register arg_1,
  1.1569 -                             Register arg_2,
  1.1570 -                             Register arg_3,
  1.1571 -                             bool check_exceptions) {
  1.1572 -  LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
  1.1573 -  LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
  1.1574 -  pass_arg3(this, arg_3);
  1.1575 -  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
  1.1576 -  pass_arg2(this, arg_2);
  1.1577 -  pass_arg1(this, arg_1);
  1.1578 -  call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
  1.1579 -}
  1.1580 -
  1.1581 -void MacroAssembler::super_call_VM(Register oop_result,
  1.1582 -                                   Register last_java_sp,
  1.1583 -                                   address entry_point,
  1.1584 -                                   int number_of_arguments,
  1.1585 -                                   bool check_exceptions) {
  1.1586 -  Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg);
  1.1587 -  MacroAssembler::call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
  1.1588 -}
  1.1589 -
  1.1590 -void MacroAssembler::super_call_VM(Register oop_result,
  1.1591 -                                   Register last_java_sp,
  1.1592 -                                   address entry_point,
  1.1593 -                                   Register arg_1,
  1.1594 -                                   bool check_exceptions) {
  1.1595 -  pass_arg1(this, arg_1);
  1.1596 -  super_call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
  1.1597 -}
  1.1598 -
  1.1599 -void MacroAssembler::super_call_VM(Register oop_result,
  1.1600 -                                   Register last_java_sp,
  1.1601 -                                   address entry_point,
  1.1602 -                                   Register arg_1,
  1.1603 -                                   Register arg_2,
  1.1604 -                                   bool check_exceptions) {
  1.1605 -
  1.1606 -  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
  1.1607 -  pass_arg2(this, arg_2);
  1.1608 -  pass_arg1(this, arg_1);
  1.1609 -  super_call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
  1.1610 -}
  1.1611 -
  1.1612 -void MacroAssembler::super_call_VM(Register oop_result,
  1.1613 -                                   Register last_java_sp,
  1.1614 -                                   address entry_point,
  1.1615 -                                   Register arg_1,
  1.1616 -                                   Register arg_2,
  1.1617 -                                   Register arg_3,
  1.1618 -                                   bool check_exceptions) {
  1.1619 -  LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
  1.1620 -  LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
  1.1621 -  pass_arg3(this, arg_3);
  1.1622 -  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
  1.1623 -  pass_arg2(this, arg_2);
  1.1624 -  pass_arg1(this, arg_1);
  1.1625 -  super_call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
  1.1626 -}
  1.1627 -
  1.1628 -void MacroAssembler::call_VM_base(Register oop_result,
  1.1629 -                                  Register java_thread,
  1.1630 -                                  Register last_java_sp,
  1.1631 -                                  address  entry_point,
  1.1632 -                                  int      number_of_arguments,
  1.1633 -                                  bool     check_exceptions) {
  1.1634 -  // determine java_thread register
  1.1635 -  if (!java_thread->is_valid()) {
  1.1636 -#ifdef _LP64
  1.1637 -    java_thread = r15_thread;
  1.1638 -#else
  1.1639 -    java_thread = rdi;
  1.1640 -    get_thread(java_thread);
  1.1641 -#endif // LP64
  1.1642 -  }
  1.1643 -  // determine last_java_sp register
  1.1644 -  if (!last_java_sp->is_valid()) {
  1.1645 -    last_java_sp = rsp;
  1.1646 -  }
  1.1647 -  // debugging support
  1.1648 -  assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
  1.1649 -  LP64_ONLY(assert(java_thread == r15_thread, "unexpected register"));
  1.1650 -#ifdef ASSERT
  1.1651 -  // TraceBytecodes does not use r12 but saves it over the call, so don't verify
  1.1652 -  // r12 is the heapbase.
  1.1653 -  LP64_ONLY(if ((UseCompressedOops || UseCompressedKlassPointers) && !TraceBytecodes) verify_heapbase("call_VM_base: heap base corrupted?");)
  1.1654 -#endif // ASSERT
  1.1655 -
  1.1656 -  assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
  1.1657 -  assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
  1.1658 -
  1.1659 -  // push java thread (becomes first argument of C function)
  1.1660 -
  1.1661 -  NOT_LP64(push(java_thread); number_of_arguments++);
  1.1662 -  LP64_ONLY(mov(c_rarg0, r15_thread));
  1.1663 -
  1.1664 -  // set last Java frame before call
  1.1665 -  assert(last_java_sp != rbp, "can't use ebp/rbp");
  1.1666 -
  1.1667 -  // Only interpreter should have to set fp
  1.1668 -  set_last_Java_frame(java_thread, last_java_sp, rbp, NULL);
  1.1669 -
  1.1670 -  // do the call, remove parameters
  1.1671 -  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
  1.1672 -
  1.1673 -  // restore the thread (cannot use the pushed argument since arguments
  1.1674 -  // may be overwritten by C code generated by an optimizing compiler);
  1.1675 -  // however can use the register value directly if it is callee saved.
  1.1676 -  if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) {
  1.1677 -    // rdi & rsi (also r15) are callee saved -> nothing to do
  1.1678 -#ifdef ASSERT
  1.1679 -    guarantee(java_thread != rax, "change this code");
  1.1680 -    push(rax);
  1.1681 -    { Label L;
  1.1682 -      get_thread(rax);
  1.1683 -      cmpptr(java_thread, rax);
  1.1684 -      jcc(Assembler::equal, L);
  1.1685 -      STOP("MacroAssembler::call_VM_base: rdi not callee saved?");
  1.1686 -      bind(L);
  1.1687 -    }
  1.1688 -    pop(rax);
  1.1689 -#endif
  1.1690 -  } else {
  1.1691 -    get_thread(java_thread);
  1.1692 -  }
  1.1693 -  // reset last Java frame
  1.1694 -  // Only interpreter should have to clear fp
  1.1695 -  reset_last_Java_frame(java_thread, true, false);
  1.1696 -
  1.1697 -#ifndef CC_INTERP
  1.1698 -   // C++ interp handles this in the interpreter
  1.1699 -  check_and_handle_popframe(java_thread);
  1.1700 -  check_and_handle_earlyret(java_thread);
  1.1701 -#endif /* CC_INTERP */
  1.1702 -
  1.1703 -  if (check_exceptions) {
  1.1704 -    // check for pending exceptions (java_thread is set upon return)
  1.1705 -    cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD);
  1.1706 -#ifndef _LP64
  1.1707 -    jump_cc(Assembler::notEqual,
  1.1708 -            RuntimeAddress(StubRoutines::forward_exception_entry()));
  1.1709 -#else
  1.1710 -    // This used to conditionally jump to forward_exception however it is
  1.1711 -    // possible if we relocate that the branch will not reach. So we must jump
  1.1712 -    // around so we can always reach
  1.1713 -
  1.1714 -    Label ok;
  1.1715 -    jcc(Assembler::equal, ok);
  1.1716 -    jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
  1.1717 -    bind(ok);
  1.1718 -#endif // LP64
  1.1719 -  }
  1.1720 -
  1.1721 -  // get oop result if there is one and reset the value in the thread
  1.1722 -  if (oop_result->is_valid()) {
  1.1723 -    get_vm_result(oop_result, java_thread);
  1.1724 -  }
  1.1725 -}
  1.1726 -
  1.1727 -void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
  1.1728 -
  1.1729 -  // Calculate the value for last_Java_sp
  1.1730 -  // somewhat subtle. call_VM does an intermediate call
  1.1731 -  // which places a return address on the stack just under the
  1.1732 -  // stack pointer as the user finsihed with it. This allows
  1.1733 -  // use to retrieve last_Java_pc from last_Java_sp[-1].
  1.1734 -  // On 32bit we then have to push additional args on the stack to accomplish
  1.1735 -  // the actual requested call. On 64bit call_VM only can use register args
  1.1736 -  // so the only extra space is the return address that call_VM created.
  1.1737 -  // This hopefully explains the calculations here.
  1.1738 -
  1.1739 -#ifdef _LP64
  1.1740 -  // We've pushed one address, correct last_Java_sp
  1.1741 -  lea(rax, Address(rsp, wordSize));
  1.1742 -#else
  1.1743 -  lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize));
  1.1744 -#endif // LP64
  1.1745 -
  1.1746 -  call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions);
  1.1747 -
  1.1748 -}
  1.1749 -
  1.1750 -void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
  1.1751 -  call_VM_leaf_base(entry_point, number_of_arguments);
  1.1752 -}
  1.1753 -
  1.1754 -void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
  1.1755 -  pass_arg0(this, arg_0);
  1.1756 -  call_VM_leaf(entry_point, 1);
  1.1757 -}
  1.1758 -
  1.1759 -void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
  1.1760 -
  1.1761 -  LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
  1.1762 -  pass_arg1(this, arg_1);
  1.1763 -  pass_arg0(this, arg_0);
  1.1764 -  call_VM_leaf(entry_point, 2);
  1.1765 -}
  1.1766 -
  1.1767 -void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
  1.1768 -  LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
  1.1769 -  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
  1.1770 -  pass_arg2(this, arg_2);
  1.1771 -  LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
  1.1772 -  pass_arg1(this, arg_1);
  1.1773 -  pass_arg0(this, arg_0);
  1.1774 -  call_VM_leaf(entry_point, 3);
  1.1775 -}
  1.1776 -
  1.1777 -void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
  1.1778 -  pass_arg0(this, arg_0);
  1.1779 -  MacroAssembler::call_VM_leaf_base(entry_point, 1);
  1.1780 -}
  1.1781 -
  1.1782 -void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
  1.1783 -
  1.1784 -  LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
  1.1785 -  pass_arg1(this, arg_1);
  1.1786 -  pass_arg0(this, arg_0);
  1.1787 -  MacroAssembler::call_VM_leaf_base(entry_point, 2);
  1.1788 -}
  1.1789 -
  1.1790 -void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
  1.1791 -  LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
  1.1792 -  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
  1.1793 -  pass_arg2(this, arg_2);
  1.1794 -  LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
  1.1795 -  pass_arg1(this, arg_1);
  1.1796 -  pass_arg0(this, arg_0);
  1.1797 -  MacroAssembler::call_VM_leaf_base(entry_point, 3);
  1.1798 -}
  1.1799 -
  1.1800 -void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
  1.1801 -  LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg"));
  1.1802 -  LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
  1.1803 -  LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
  1.1804 -  pass_arg3(this, arg_3);
  1.1805 -  LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
  1.1806 -  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
  1.1807 -  pass_arg2(this, arg_2);
  1.1808 -  LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
  1.1809 -  pass_arg1(this, arg_1);
  1.1810 -  pass_arg0(this, arg_0);
  1.1811 -  MacroAssembler::call_VM_leaf_base(entry_point, 4);
  1.1812 -}
  1.1813 -
  1.1814 -void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
  1.1815 -  movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
  1.1816 -  movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD);
  1.1817 -  verify_oop(oop_result, "broken oop in call_VM_base");
  1.1818 -}
  1.1819 -
  1.1820 -void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
  1.1821 -  movptr(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
  1.1822 -  movptr(Address(java_thread, JavaThread::vm_result_2_offset()), NULL_WORD);
  1.1823 -}
  1.1824 -
  1.1825 -void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
  1.1826 -}
  1.1827 -
  1.1828 -void MacroAssembler::check_and_handle_popframe(Register java_thread) {
  1.1829 -}
  1.1830 -
  1.1831 -void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) {
  1.1832 -  if (reachable(src1)) {
  1.1833 -    cmpl(as_Address(src1), imm);
  1.1834 -  } else {
  1.1835 -    lea(rscratch1, src1);
  1.1836 -    cmpl(Address(rscratch1, 0), imm);
  1.1837 -  }
  1.1838 -}
  1.1839 -
  1.1840 -void MacroAssembler::cmp32(Register src1, AddressLiteral src2) {
  1.1841 -  assert(!src2.is_lval(), "use cmpptr");
  1.1842 -  if (reachable(src2)) {
  1.1843 -    cmpl(src1, as_Address(src2));
  1.1844 -  } else {
  1.1845 -    lea(rscratch1, src2);
  1.1846 -    cmpl(src1, Address(rscratch1, 0));
  1.1847 -  }
  1.1848 -}
  1.1849 -
  1.1850 -void MacroAssembler::cmp32(Register src1, int32_t imm) {
  1.1851 -  Assembler::cmpl(src1, imm);
  1.1852 -}
  1.1853 -
  1.1854 -void MacroAssembler::cmp32(Register src1, Address src2) {
  1.1855 -  Assembler::cmpl(src1, src2);
  1.1856 -}
  1.1857 -
  1.1858 -void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
  1.1859 -  ucomisd(opr1, opr2);
  1.1860 -
  1.1861 -  Label L;
  1.1862 -  if (unordered_is_less) {
  1.1863 -    movl(dst, -1);
  1.1864 -    jcc(Assembler::parity, L);
  1.1865 -    jcc(Assembler::below , L);
  1.1866 -    movl(dst, 0);
  1.1867 -    jcc(Assembler::equal , L);
  1.1868 -    increment(dst);
  1.1869 -  } else { // unordered is greater
  1.1870 -    movl(dst, 1);
  1.1871 -    jcc(Assembler::parity, L);
  1.1872 -    jcc(Assembler::above , L);
  1.1873 -    movl(dst, 0);
  1.1874 -    jcc(Assembler::equal , L);
  1.1875 -    decrementl(dst);
  1.1876 -  }
  1.1877 -  bind(L);
  1.1878 -}
  1.1879 -
  1.1880 -void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
  1.1881 -  ucomiss(opr1, opr2);
  1.1882 -
  1.1883 -  Label L;
  1.1884 -  if (unordered_is_less) {
  1.1885 -    movl(dst, -1);
  1.1886 -    jcc(Assembler::parity, L);
  1.1887 -    jcc(Assembler::below , L);
  1.1888 -    movl(dst, 0);
  1.1889 -    jcc(Assembler::equal , L);
  1.1890 -    increment(dst);
  1.1891 -  } else { // unordered is greater
  1.1892 -    movl(dst, 1);
  1.1893 -    jcc(Assembler::parity, L);
  1.1894 -    jcc(Assembler::above , L);
  1.1895 -    movl(dst, 0);
  1.1896 -    jcc(Assembler::equal , L);
  1.1897 -    decrementl(dst);
  1.1898 -  }
  1.1899 -  bind(L);
  1.1900 -}
  1.1901 -
  1.1902 -
  1.1903 -void MacroAssembler::cmp8(AddressLiteral src1, int imm) {
  1.1904 -  if (reachable(src1)) {
  1.1905 -    cmpb(as_Address(src1), imm);
  1.1906 -  } else {
  1.1907 -    lea(rscratch1, src1);
  1.1908 -    cmpb(Address(rscratch1, 0), imm);
  1.1909 -  }
  1.1910 -}
  1.1911 -
  1.1912 -void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) {
  1.1913 -#ifdef _LP64
  1.1914 -  if (src2.is_lval()) {
  1.1915 -    movptr(rscratch1, src2);
  1.1916 -    Assembler::cmpq(src1, rscratch1);
  1.1917 -  } else if (reachable(src2)) {
  1.1918 -    cmpq(src1, as_Address(src2));
  1.1919 -  } else {
  1.1920 -    lea(rscratch1, src2);
  1.1921 -    Assembler::cmpq(src1, Address(rscratch1, 0));
  1.1922 -  }
  1.1923 -#else
  1.1924 -  if (src2.is_lval()) {
  1.1925 -    cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
  1.1926 -  } else {
  1.1927 -    cmpl(src1, as_Address(src2));
  1.1928 -  }
  1.1929 -#endif // _LP64
  1.1930 -}
  1.1931 -
  1.1932 -void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) {
  1.1933 -  assert(src2.is_lval(), "not a mem-mem compare");
  1.1934 -#ifdef _LP64
  1.1935 -  // moves src2's literal address
  1.1936 -  movptr(rscratch1, src2);
  1.1937 -  Assembler::cmpq(src1, rscratch1);
  1.1938 -#else
  1.1939 -  cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
  1.1940 -#endif // _LP64
  1.1941 -}
  1.1942 -
  1.1943 -void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) {
  1.1944 -  if (reachable(adr)) {
  1.1945 -    if (os::is_MP())
  1.1946 -      lock();
  1.1947 -    cmpxchgptr(reg, as_Address(adr));
  1.1948 -  } else {
  1.1949 -    lea(rscratch1, adr);
  1.1950 -    if (os::is_MP())
  1.1951 -      lock();
  1.1952 -    cmpxchgptr(reg, Address(rscratch1, 0));
  1.1953 -  }
  1.1954 -}
  1.1955 -
  1.1956 -void MacroAssembler::cmpxchgptr(Register reg, Address adr) {
  1.1957 -  LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr));
  1.1958 -}
  1.1959 -
  1.1960 -void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) {
  1.1961 -  if (reachable(src)) {
  1.1962 -    Assembler::comisd(dst, as_Address(src));
  1.1963 -  } else {
  1.1964 -    lea(rscratch1, src);
  1.1965 -    Assembler::comisd(dst, Address(rscratch1, 0));
  1.1966 -  }
  1.1967 -}
  1.1968 -
  1.1969 -void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) {
  1.1970 -  if (reachable(src)) {
  1.1971 -    Assembler::comiss(dst, as_Address(src));
  1.1972 -  } else {
  1.1973 -    lea(rscratch1, src);
  1.1974 -    Assembler::comiss(dst, Address(rscratch1, 0));
  1.1975 -  }
  1.1976 -}
  1.1977 -
  1.1978 -
  1.1979 -void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) {
  1.1980 -  Condition negated_cond = negate_condition(cond);
  1.1981 -  Label L;
  1.1982 -  jcc(negated_cond, L);
  1.1983 -  atomic_incl(counter_addr);
  1.1984 -  bind(L);
  1.1985 -}
  1.1986 -
  1.1987 -int MacroAssembler::corrected_idivl(Register reg) {
  1.1988 -  // Full implementation of Java idiv and irem; checks for
  1.1989 -  // special case as described in JVM spec., p.243 & p.271.
  1.1990 -  // The function returns the (pc) offset of the idivl
  1.1991 -  // instruction - may be needed for implicit exceptions.
  1.1992 -  //
  1.1993 -  //         normal case                           special case
  1.1994 -  //
  1.1995 -  // input : rax,: dividend                         min_int
  1.1996 -  //         reg: divisor   (may not be rax,/rdx)   -1
  1.1997 -  //
  1.1998 -  // output: rax,: quotient  (= rax, idiv reg)       min_int
  1.1999 -  //         rdx: remainder (= rax, irem reg)       0
  1.2000 -  assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register");
  1.2001 -  const int min_int = 0x80000000;
  1.2002 -  Label normal_case, special_case;
  1.2003 -
  1.2004 -  // check for special case
  1.2005 -  cmpl(rax, min_int);
  1.2006 -  jcc(Assembler::notEqual, normal_case);
  1.2007 -  xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0)
  1.2008 -  cmpl(reg, -1);
  1.2009 -  jcc(Assembler::equal, special_case);
  1.2010 -
  1.2011 -  // handle normal case
  1.2012 -  bind(normal_case);
  1.2013 -  cdql();
  1.2014 -  int idivl_offset = offset();
  1.2015 -  idivl(reg);
  1.2016 -
  1.2017 -  // normal and special case exit
  1.2018 -  bind(special_case);
  1.2019 -
  1.2020 -  return idivl_offset;
  1.2021 -}
  1.2022 -
  1.2023 -
  1.2024 -
  1.2025 -void MacroAssembler::decrementl(Register reg, int value) {
  1.2026 -  if (value == min_jint) {subl(reg, value) ; return; }
  1.2027 -  if (value <  0) { incrementl(reg, -value); return; }
  1.2028 -  if (value == 0) {                        ; return; }
  1.2029 -  if (value == 1 && UseIncDec) { decl(reg) ; return; }
  1.2030 -  /* else */      { subl(reg, value)       ; return; }
  1.2031 -}
  1.2032 -
  1.2033 -void MacroAssembler::decrementl(Address dst, int value) {
  1.2034 -  if (value == min_jint) {subl(dst, value) ; return; }
  1.2035 -  if (value <  0) { incrementl(dst, -value); return; }
  1.2036 -  if (value == 0) {                        ; return; }
  1.2037 -  if (value == 1 && UseIncDec) { decl(dst) ; return; }
  1.2038 -  /* else */      { subl(dst, value)       ; return; }
  1.2039 -}
  1.2040 -
  1.2041 -void MacroAssembler::division_with_shift (Register reg, int shift_value) {
  1.2042 -  assert (shift_value > 0, "illegal shift value");
  1.2043 -  Label _is_positive;
  1.2044 -  testl (reg, reg);
  1.2045 -  jcc (Assembler::positive, _is_positive);
  1.2046 -  int offset = (1 << shift_value) - 1 ;
  1.2047 -
  1.2048 -  if (offset == 1) {
  1.2049 -    incrementl(reg);
  1.2050 -  } else {
  1.2051 -    addl(reg, offset);
  1.2052 -  }
  1.2053 -
  1.2054 -  bind (_is_positive);
  1.2055 -  sarl(reg, shift_value);
  1.2056 -}
  1.2057 -
  1.2058 -void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) {
  1.2059 -  if (reachable(src)) {
  1.2060 -    Assembler::divsd(dst, as_Address(src));
  1.2061 -  } else {
  1.2062 -    lea(rscratch1, src);
  1.2063 -    Assembler::divsd(dst, Address(rscratch1, 0));
  1.2064 -  }
  1.2065 -}
  1.2066 -
  1.2067 -void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) {
  1.2068 -  if (reachable(src)) {
  1.2069 -    Assembler::divss(dst, as_Address(src));
  1.2070 -  } else {
  1.2071 -    lea(rscratch1, src);
  1.2072 -    Assembler::divss(dst, Address(rscratch1, 0));
  1.2073 -  }
  1.2074 -}
  1.2075 -
  1.2076 -// !defined(COMPILER2) is because of stupid core builds
  1.2077 -#if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2)
  1.2078 -void MacroAssembler::empty_FPU_stack() {
  1.2079 -  if (VM_Version::supports_mmx()) {
  1.2080 -    emms();
  1.2081 -  } else {
  1.2082 -    for (int i = 8; i-- > 0; ) ffree(i);
  1.2083 -  }
  1.2084 -}
  1.2085 -#endif // !LP64 || C1 || !C2
  1.2086 -
  1.2087 -
  1.2088 -// Defines obj, preserves var_size_in_bytes
  1.2089 -void MacroAssembler::eden_allocate(Register obj,
  1.2090 -                                   Register var_size_in_bytes,
  1.2091 -                                   int con_size_in_bytes,
  1.2092 -                                   Register t1,
  1.2093 -                                   Label& slow_case) {
  1.2094 -  assert(obj == rax, "obj must be in rax, for cmpxchg");
  1.2095 -  assert_different_registers(obj, var_size_in_bytes, t1);
  1.2096 -  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
  1.2097 -    jmp(slow_case);
  1.2098 -  } else {
  1.2099 -    Register end = t1;
  1.2100 -    Label retry;
  1.2101 -    bind(retry);
  1.2102 -    ExternalAddress heap_top((address) Universe::heap()->top_addr());
  1.2103 -    movptr(obj, heap_top);
  1.2104 -    if (var_size_in_bytes == noreg) {
  1.2105 -      lea(end, Address(obj, con_size_in_bytes));
  1.2106 -    } else {
  1.2107 -      lea(end, Address(obj, var_size_in_bytes, Address::times_1));
  1.2108 -    }
  1.2109 -    // if end < obj then we wrapped around => object too long => slow case
  1.2110 -    cmpptr(end, obj);
  1.2111 -    jcc(Assembler::below, slow_case);
  1.2112 -    cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));
  1.2113 -    jcc(Assembler::above, slow_case);
  1.2114 -    // Compare obj with the top addr, and if still equal, store the new top addr in
  1.2115 -    // end at the address of the top addr pointer. Sets ZF if was equal, and clears
  1.2116 -    // it otherwise. Use lock prefix for atomicity on MPs.
  1.2117 -    locked_cmpxchgptr(end, heap_top);
  1.2118 -    jcc(Assembler::notEqual, retry);
  1.2119 -  }
  1.2120 -}
  1.2121 -
  1.2122 -void MacroAssembler::enter() {
  1.2123 -  push(rbp);
  1.2124 -  mov(rbp, rsp);
  1.2125 -}
  1.2126 -
  1.2127 -// A 5 byte nop that is safe for patching (see patch_verified_entry)
  1.2128 -void MacroAssembler::fat_nop() {
  1.2129 -  if (UseAddressNop) {
  1.2130 -    addr_nop_5();
  1.2131 -  } else {
  1.2132 -    emit_byte(0x26); // es:
  1.2133 -    emit_byte(0x2e); // cs:
  1.2134 -    emit_byte(0x64); // fs:
  1.2135 -    emit_byte(0x65); // gs:
  1.2136 -    emit_byte(0x90);
  1.2137 -  }
  1.2138 -}
  1.2139 -
  1.2140 -void MacroAssembler::fcmp(Register tmp) {
  1.2141 -  fcmp(tmp, 1, true, true);
  1.2142 -}
  1.2143 -
  1.2144 -void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) {
  1.2145 -  assert(!pop_right || pop_left, "usage error");
  1.2146 -  if (VM_Version::supports_cmov()) {
  1.2147 -    assert(tmp == noreg, "unneeded temp");
  1.2148 -    if (pop_left) {
  1.2149 -      fucomip(index);
  1.2150 -    } else {
  1.2151 -      fucomi(index);
  1.2152 -    }
  1.2153 -    if (pop_right) {
  1.2154 -      fpop();
  1.2155 -    }
  1.2156 -  } else {
  1.2157 -    assert(tmp != noreg, "need temp");
  1.2158 -    if (pop_left) {
  1.2159 -      if (pop_right) {
  1.2160 -        fcompp();
  1.2161 -      } else {
  1.2162 -        fcomp(index);
  1.2163 -      }
  1.2164 -    } else {
  1.2165 -      fcom(index);
  1.2166 -    }
  1.2167 -    // convert FPU condition into eflags condition via rax,
  1.2168 -    save_rax(tmp);
  1.2169 -    fwait(); fnstsw_ax();
  1.2170 -    sahf();
  1.2171 -    restore_rax(tmp);
  1.2172 -  }
  1.2173 -  // condition codes set as follows:
  1.2174 -  //
  1.2175 -  // CF (corresponds to C0) if x < y
  1.2176 -  // PF (corresponds to C2) if unordered
  1.2177 -  // ZF (corresponds to C3) if x = y
  1.2178 -}
  1.2179 -
  1.2180 -void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) {
  1.2181 -  fcmp2int(dst, unordered_is_less, 1, true, true);
  1.2182 -}
  1.2183 -
  1.2184 -void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) {
  1.2185 -  fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right);
  1.2186 -  Label L;
  1.2187 -  if (unordered_is_less) {
  1.2188 -    movl(dst, -1);
  1.2189 -    jcc(Assembler::parity, L);
  1.2190 -    jcc(Assembler::below , L);
  1.2191 -    movl(dst, 0);
  1.2192 -    jcc(Assembler::equal , L);
  1.2193 -    increment(dst);
  1.2194 -  } else { // unordered is greater
  1.2195 -    movl(dst, 1);
  1.2196 -    jcc(Assembler::parity, L);
  1.2197 -    jcc(Assembler::above , L);
  1.2198 -    movl(dst, 0);
  1.2199 -    jcc(Assembler::equal , L);
  1.2200 -    decrementl(dst);
  1.2201 -  }
  1.2202 -  bind(L);
  1.2203 -}
  1.2204 -
  1.2205 -void MacroAssembler::fld_d(AddressLiteral src) {
  1.2206 -  fld_d(as_Address(src));
  1.2207 -}
  1.2208 -
  1.2209 -void MacroAssembler::fld_s(AddressLiteral src) {
  1.2210 -  fld_s(as_Address(src));
  1.2211 -}
  1.2212 -
  1.2213 -void MacroAssembler::fld_x(AddressLiteral src) {
  1.2214 -  Assembler::fld_x(as_Address(src));
  1.2215 -}
  1.2216 -
  1.2217 -void MacroAssembler::fldcw(AddressLiteral src) {
  1.2218 -  Assembler::fldcw(as_Address(src));
  1.2219 -}
  1.2220 -
  1.2221 -void MacroAssembler::pow_exp_core_encoding() {
  1.2222 -  // kills rax, rcx, rdx
  1.2223 -  subptr(rsp,sizeof(jdouble));
  1.2224 -  // computes 2^X. Stack: X ...
  1.2225 -  // f2xm1 computes 2^X-1 but only operates on -1<=X<=1. Get int(X) and
  1.2226 -  // keep it on the thread's stack to compute 2^int(X) later
  1.2227 -  // then compute 2^(X-int(X)) as (2^(X-int(X)-1+1)
  1.2228 -  // final result is obtained with: 2^X = 2^int(X) * 2^(X-int(X))
  1.2229 -  fld_s(0);                 // Stack: X X ...
  1.2230 -  frndint();                // Stack: int(X) X ...
  1.2231 -  fsuba(1);                 // Stack: int(X) X-int(X) ...
  1.2232 -  fistp_s(Address(rsp,0));  // move int(X) as integer to thread's stack. Stack: X-int(X) ...
  1.2233 -  f2xm1();                  // Stack: 2^(X-int(X))-1 ...
  1.2234 -  fld1();                   // Stack: 1 2^(X-int(X))-1 ...
  1.2235 -  faddp(1);                 // Stack: 2^(X-int(X))
  1.2236 -  // computes 2^(int(X)): add exponent bias (1023) to int(X), then
  1.2237 -  // shift int(X)+1023 to exponent position.
  1.2238 -  // Exponent is limited to 11 bits if int(X)+1023 does not fit in 11
  1.2239 -  // bits, set result to NaN. 0x000 and 0x7FF are reserved exponent
  1.2240 -  // values so detect them and set result to NaN.
  1.2241 -  movl(rax,Address(rsp,0));
  1.2242 -  movl(rcx, -2048); // 11 bit mask and valid NaN binary encoding
  1.2243 -  addl(rax, 1023);
  1.2244 -  movl(rdx,rax);
  1.2245 -  shll(rax,20);
  1.2246 -  // Check that 0 < int(X)+1023 < 2047. Otherwise set rax to NaN.
  1.2247 -  addl(rdx,1);
  1.2248 -  // Check that 1 < int(X)+1023+1 < 2048
  1.2249 -  // in 3 steps:
  1.2250 -  // 1- (int(X)+1023+1)&-2048 == 0 => 0 <= int(X)+1023+1 < 2048
  1.2251 -  // 2- (int(X)+1023+1)&-2048 != 0
  1.2252 -  // 3- (int(X)+1023+1)&-2048 != 1
  1.2253 -  // Do 2- first because addl just updated the flags.
  1.2254 -  cmov32(Assembler::equal,rax,rcx);
  1.2255 -  cmpl(rdx,1);
  1.2256 -  cmov32(Assembler::equal,rax,rcx);
  1.2257 -  testl(rdx,rcx);
  1.2258 -  cmov32(Assembler::notEqual,rax,rcx);
  1.2259 -  movl(Address(rsp,4),rax);
  1.2260 -  movl(Address(rsp,0),0);
  1.2261 -  fmul_d(Address(rsp,0));   // Stack: 2^X ...
  1.2262 -  addptr(rsp,sizeof(jdouble));
  1.2263 -}
  1.2264 -
  1.2265 -void MacroAssembler::increase_precision() {
  1.2266 -  subptr(rsp, BytesPerWord);
  1.2267 -  fnstcw(Address(rsp, 0));
  1.2268 -  movl(rax, Address(rsp, 0));
  1.2269 -  orl(rax, 0x300);
  1.2270 -  push(rax);
  1.2271 -  fldcw(Address(rsp, 0));
  1.2272 -  pop(rax);
  1.2273 -}
  1.2274 -
  1.2275 -void MacroAssembler::restore_precision() {
  1.2276 -  fldcw(Address(rsp, 0));
  1.2277 -  addptr(rsp, BytesPerWord);
  1.2278 -}
  1.2279 -
  1.2280 -void MacroAssembler::fast_pow() {
  1.2281 -  // computes X^Y = 2^(Y * log2(X))
  1.2282 -  // if fast computation is not possible, result is NaN. Requires
  1.2283 -  // fallback from user of this macro.
  1.2284 -  // increase precision for intermediate steps of the computation
  1.2285 -  increase_precision();
  1.2286 -  fyl2x();                 // Stack: (Y*log2(X)) ...
  1.2287 -  pow_exp_core_encoding(); // Stack: exp(X) ...
  1.2288 -  restore_precision();
  1.2289 -}
  1.2290 -
  1.2291 -void MacroAssembler::fast_exp() {
  1.2292 -  // computes exp(X) = 2^(X * log2(e))
  1.2293 -  // if fast computation is not possible, result is NaN. Requires
  1.2294 -  // fallback from user of this macro.
  1.2295 -  // increase precision for intermediate steps of the computation
  1.2296 -  increase_precision();
  1.2297 -  fldl2e();                // Stack: log2(e) X ...
  1.2298 -  fmulp(1);                // Stack: (X*log2(e)) ...
  1.2299 -  pow_exp_core_encoding(); // Stack: exp(X) ...
  1.2300 -  restore_precision();
  1.2301 -}
  1.2302 -
  1.2303 -void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) {
  1.2304 -  // kills rax, rcx, rdx
  1.2305 -  // pow and exp needs 2 extra registers on the fpu stack.
  1.2306 -  Label slow_case, done;
  1.2307 -  Register tmp = noreg;
  1.2308 -  if (!VM_Version::supports_cmov()) {
  1.2309 -    // fcmp needs a temporary so preserve rdx,
  1.2310 -    tmp = rdx;
  1.2311 -  }
  1.2312 -  Register tmp2 = rax;
  1.2313 -  Register tmp3 = rcx;
  1.2314 -
  1.2315 -  if (is_exp) {
  1.2316 -    // Stack: X
  1.2317 -    fld_s(0);                   // duplicate argument for runtime call. Stack: X X
  1.2318 -    fast_exp();                 // Stack: exp(X) X
  1.2319 -    fcmp(tmp, 0, false, false); // Stack: exp(X) X
  1.2320 -    // exp(X) not equal to itself: exp(X) is NaN go to slow case.
  1.2321 -    jcc(Assembler::parity, slow_case);
  1.2322 -    // get rid of duplicate argument. Stack: exp(X)
  1.2323 -    if (num_fpu_regs_in_use > 0) {
  1.2324 -      fxch();
  1.2325 -      fpop();
  1.2326 -    } else {
  1.2327 -      ffree(1);
  1.2328 -    }
  1.2329 -    jmp(done);
  1.2330 -  } else {
  1.2331 -    // Stack: X Y
  1.2332 -    Label x_negative, y_odd;
  1.2333 -
  1.2334 -    fldz();                     // Stack: 0 X Y
  1.2335 -    fcmp(tmp, 1, true, false);  // Stack: X Y
  1.2336 -    jcc(Assembler::above, x_negative);
  1.2337 -
  1.2338 -    // X >= 0
  1.2339 -
  1.2340 -    fld_s(1);                   // duplicate arguments for runtime call. Stack: Y X Y
  1.2341 -    fld_s(1);                   // Stack: X Y X Y
  1.2342 -    fast_pow();                 // Stack: X^Y X Y
  1.2343 -    fcmp(tmp, 0, false, false); // Stack: X^Y X Y
  1.2344 -    // X^Y not equal to itself: X^Y is NaN go to slow case.
  1.2345 -    jcc(Assembler::parity, slow_case);
  1.2346 -    // get rid of duplicate arguments. Stack: X^Y
  1.2347 -    if (num_fpu_regs_in_use > 0) {
  1.2348 -      fxch(); fpop();
  1.2349 -      fxch(); fpop();
  1.2350 -    } else {
  1.2351 -      ffree(2);
  1.2352 -      ffree(1);
  1.2353 -    }
  1.2354 -    jmp(done);
  1.2355 -
  1.2356 -    // X <= 0
  1.2357 -    bind(x_negative);
  1.2358 -
  1.2359 -    fld_s(1);                   // Stack: Y X Y
  1.2360 -    frndint();                  // Stack: int(Y) X Y
  1.2361 -    fcmp(tmp, 2, false, false); // Stack: int(Y) X Y
  1.2362 -    jcc(Assembler::notEqual, slow_case);
  1.2363 -
  1.2364 -    subptr(rsp, 8);
  1.2365 -
  1.2366 -    // For X^Y, when X < 0, Y has to be an integer and the final
  1.2367 -    // result depends on whether it's odd or even. We just checked
  1.2368 -    // that int(Y) == Y.  We move int(Y) to gp registers as a 64 bit
  1.2369 -    // integer to test its parity. If int(Y) is huge and doesn't fit
  1.2370 -    // in the 64 bit integer range, the integer indefinite value will
  1.2371 -    // end up in the gp registers. Huge numbers are all even, the
  1.2372 -    // integer indefinite number is even so it's fine.
  1.2373 -
  1.2374 -#ifdef ASSERT
  1.2375 -    // Let's check we don't end up with an integer indefinite number
  1.2376 -    // when not expected. First test for huge numbers: check whether
  1.2377 -    // int(Y)+1 == int(Y) which is true for very large numbers and
  1.2378 -    // those are all even. A 64 bit integer is guaranteed to not
  1.2379 -    // overflow for numbers where y+1 != y (when precision is set to
  1.2380 -    // double precision).
  1.2381 -    Label y_not_huge;
  1.2382 -
  1.2383 -    fld1();                     // Stack: 1 int(Y) X Y
  1.2384 -    fadd(1);                    // Stack: 1+int(Y) int(Y) X Y
  1.2385 -
  1.2386 -#ifdef _LP64
  1.2387 -    // trip to memory to force the precision down from double extended
  1.2388 -    // precision
  1.2389 -    fstp_d(Address(rsp, 0));
  1.2390 -    fld_d(Address(rsp, 0));
  1.2391 -#endif
  1.2392 -
  1.2393 -    fcmp(tmp, 1, true, false);  // Stack: int(Y) X Y
  1.2394 -#endif
  1.2395 -
  1.2396 -    // move int(Y) as 64 bit integer to thread's stack
  1.2397 -    fistp_d(Address(rsp,0));    // Stack: X Y
  1.2398 -
  1.2399 -#ifdef ASSERT
  1.2400 -    jcc(Assembler::notEqual, y_not_huge);
  1.2401 -
  1.2402 -    // Y is huge so we know it's even. It may not fit in a 64 bit
  1.2403 -    // integer and we don't want the debug code below to see the
  1.2404 -    // integer indefinite value so overwrite int(Y) on the thread's
  1.2405 -    // stack with 0.
  1.2406 -    movl(Address(rsp, 0), 0);
  1.2407 -    movl(Address(rsp, 4), 0);
  1.2408 -
  1.2409 -    bind(y_not_huge);
  1.2410 -#endif
  1.2411 -
  1.2412 -    fld_s(1);                   // duplicate arguments for runtime call. Stack: Y X Y
  1.2413 -    fld_s(1);                   // Stack: X Y X Y
  1.2414 -    fabs();                     // Stack: abs(X) Y X Y
  1.2415 -    fast_pow();                 // Stack: abs(X)^Y X Y
  1.2416 -    fcmp(tmp, 0, false, false); // Stack: abs(X)^Y X Y
  1.2417 -    // abs(X)^Y not equal to itself: abs(X)^Y is NaN go to slow case.
  1.2418 -
  1.2419 -    pop(tmp2);
  1.2420 -    NOT_LP64(pop(tmp3));
  1.2421 -    jcc(Assembler::parity, slow_case);
  1.2422 -
  1.2423 -#ifdef ASSERT
  1.2424 -    // Check that int(Y) is not integer indefinite value (int
  1.2425 -    // overflow). Shouldn't happen because for values that would
  1.2426 -    // overflow, 1+int(Y)==Y which was tested earlier.
  1.2427 -#ifndef _LP64
  1.2428 -    {
  1.2429 -      Label integer;
  1.2430 -      testl(tmp2, tmp2);
  1.2431 -      jcc(Assembler::notZero, integer);
  1.2432 -      cmpl(tmp3, 0x80000000);
  1.2433 -      jcc(Assembler::notZero, integer);
  1.2434 -      STOP("integer indefinite value shouldn't be seen here");
  1.2435 -      bind(integer);
  1.2436 -    }
  1.2437 -#else
  1.2438 -    {
  1.2439 -      Label integer;
  1.2440 -      mov(tmp3, tmp2); // preserve tmp2 for parity check below
  1.2441 -      shlq(tmp3, 1);
  1.2442 -      jcc(Assembler::carryClear, integer);
  1.2443 -      jcc(Assembler::notZero, integer);
  1.2444 -      STOP("integer indefinite value shouldn't be seen here");
  1.2445 -      bind(integer);
  1.2446 -    }
  1.2447 -#endif
  1.2448 -#endif
  1.2449 -
  1.2450 -    // get rid of duplicate arguments. Stack: X^Y
  1.2451 -    if (num_fpu_regs_in_use > 0) {
  1.2452 -      fxch(); fpop();
  1.2453 -      fxch(); fpop();
  1.2454 -    } else {
  1.2455 -      ffree(2);
  1.2456 -      ffree(1);
  1.2457 -    }
  1.2458 -
  1.2459 -    testl(tmp2, 1);
  1.2460 -    jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y
  1.2461 -    // X <= 0, Y even: X^Y = -abs(X)^Y
  1.2462 -
  1.2463 -    fchs();                     // Stack: -abs(X)^Y Y
  1.2464 -    jmp(done);
  1.2465 -  }
  1.2466 -
  1.2467 -  // slow case: runtime call
  1.2468 -  bind(slow_case);
  1.2469 -
  1.2470 -  fpop();                       // pop incorrect result or int(Y)
  1.2471 -
  1.2472 -  fp_runtime_fallback(is_exp ? CAST_FROM_FN_PTR(address, SharedRuntime::dexp) : CAST_FROM_FN_PTR(address, SharedRuntime::dpow),
  1.2473 -                      is_exp ? 1 : 2, num_fpu_regs_in_use);
  1.2474 -
  1.2475 -  // Come here with result in F-TOS
  1.2476 -  bind(done);
  1.2477 -}
  1.2478 -
  1.2479 -void MacroAssembler::fpop() {
  1.2480 -  ffree();
  1.2481 -  fincstp();
  1.2482 -}
  1.2483 -
  1.2484 -void MacroAssembler::fremr(Register tmp) {
  1.2485 -  save_rax(tmp);
  1.2486 -  { Label L;
  1.2487 -    bind(L);
  1.2488 -    fprem();
  1.2489 -    fwait(); fnstsw_ax();
  1.2490 -#ifdef _LP64
  1.2491 -    testl(rax, 0x400);
  1.2492 -    jcc(Assembler::notEqual, L);
  1.2493 -#else
  1.2494 -    sahf();
  1.2495 -    jcc(Assembler::parity, L);
  1.2496 -#endif // _LP64
  1.2497 -  }
  1.2498 -  restore_rax(tmp);
  1.2499 -  // Result is in ST0.
  1.2500 -  // Note: fxch & fpop to get rid of ST1
  1.2501 -  // (otherwise FPU stack could overflow eventually)
  1.2502 -  fxch(1);
  1.2503 -  fpop();
  1.2504 -}
  1.2505 -
  1.2506 -
  1.2507 -void MacroAssembler::incrementl(AddressLiteral dst) {
  1.2508 -  if (reachable(dst)) {
  1.2509 -    incrementl(as_Address(dst));
  1.2510 -  } else {
  1.2511 -    lea(rscratch1, dst);
  1.2512 -    incrementl(Address(rscratch1, 0));
  1.2513 -  }
  1.2514 -}
  1.2515 -
  1.2516 -void MacroAssembler::incrementl(ArrayAddress dst) {
  1.2517 -  incrementl(as_Address(dst));
  1.2518 -}
  1.2519 -
  1.2520 -void MacroAssembler::incrementl(Register reg, int value) {
  1.2521 -  if (value == min_jint) {addl(reg, value) ; return; }
  1.2522 -  if (value <  0) { decrementl(reg, -value); return; }
  1.2523 -  if (value == 0) {                        ; return; }
  1.2524 -  if (value == 1 && UseIncDec) { incl(reg) ; return; }
  1.2525 -  /* else */      { addl(reg, value)       ; return; }
  1.2526 -}
  1.2527 -
  1.2528 -void MacroAssembler::incrementl(Address dst, int value) {
  1.2529 -  if (value == min_jint) {addl(dst, value) ; return; }
  1.2530 -  if (value <  0) { decrementl(dst, -value); return; }
  1.2531 -  if (value == 0) {                        ; return; }
  1.2532 -  if (value == 1 && UseIncDec) { incl(dst) ; return; }
  1.2533 -  /* else */      { addl(dst, value)       ; return; }
  1.2534 -}
  1.2535 -
  1.2536 -void MacroAssembler::jump(AddressLiteral dst) {
  1.2537 -  if (reachable(dst)) {
  1.2538 -    jmp_literal(dst.target(), dst.rspec());
  1.2539 -  } else {
  1.2540 -    lea(rscratch1, dst);
  1.2541 -    jmp(rscratch1);
  1.2542 -  }
  1.2543 -}
  1.2544 -
  1.2545 -void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) {
  1.2546 -  if (reachable(dst)) {
  1.2547 -    InstructionMark im(this);
  1.2548 -    relocate(dst.reloc());
  1.2549 -    const int short_size = 2;
  1.2550 -    const int long_size = 6;
  1.2551 -    int offs = (intptr_t)dst.target() - ((intptr_t)pc());
  1.2552 -    if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) {
  1.2553 -      // 0111 tttn #8-bit disp
  1.2554 -      emit_byte(0x70 | cc);
  1.2555 -      emit_byte((offs - short_size) & 0xFF);
  1.2556 -    } else {
  1.2557 -      // 0000 1111 1000 tttn #32-bit disp
  1.2558 -      emit_byte(0x0F);
  1.2559 -      emit_byte(0x80 | cc);
  1.2560 -      emit_long(offs - long_size);
  1.2561 -    }
  1.2562 -  } else {
  1.2563 -#ifdef ASSERT
  1.2564 -    warning("reversing conditional branch");
  1.2565 -#endif /* ASSERT */
  1.2566 -    Label skip;
  1.2567 -    jccb(reverse[cc], skip);
  1.2568 -    lea(rscratch1, dst);
  1.2569 -    Assembler::jmp(rscratch1);
  1.2570 -    bind(skip);
  1.2571 -  }
  1.2572 -}
  1.2573 -
  1.2574 -void MacroAssembler::ldmxcsr(AddressLiteral src) {
  1.2575 -  if (reachable(src)) {
  1.2576 -    Assembler::ldmxcsr(as_Address(src));
  1.2577 -  } else {
  1.2578 -    lea(rscratch1, src);
  1.2579 -    Assembler::ldmxcsr(Address(rscratch1, 0));
  1.2580 -  }
  1.2581 -}
  1.2582 -
  1.2583 -int MacroAssembler::load_signed_byte(Register dst, Address src) {
  1.2584 -  int off;
  1.2585 -  if (LP64_ONLY(true ||) VM_Version::is_P6()) {
  1.2586 -    off = offset();
  1.2587 -    movsbl(dst, src); // movsxb
  1.2588 -  } else {
  1.2589 -    off = load_unsigned_byte(dst, src);
  1.2590 -    shll(dst, 24);
  1.2591 -    sarl(dst, 24);
  1.2592 -  }
  1.2593 -  return off;
  1.2594 -}
  1.2595 -
  1.2596 -// Note: load_signed_short used to be called load_signed_word.
  1.2597 -// Although the 'w' in x86 opcodes refers to the term "word" in the assembler
  1.2598 -// manual, which means 16 bits, that usage is found nowhere in HotSpot code.
  1.2599 -// The term "word" in HotSpot means a 32- or 64-bit machine word.
  1.2600 -int MacroAssembler::load_signed_short(Register dst, Address src) {
  1.2601 -  int off;
  1.2602 -  if (LP64_ONLY(true ||) VM_Version::is_P6()) {
  1.2603 -    // This is dubious to me since it seems safe to do a signed 16 => 64 bit
  1.2604 -    // version but this is what 64bit has always done. This seems to imply
  1.2605 -    // that users are only using 32bits worth.
  1.2606 -    off = offset();
  1.2607 -    movswl(dst, src); // movsxw
  1.2608 -  } else {
  1.2609 -    off = load_unsigned_short(dst, src);
  1.2610 -    shll(dst, 16);
  1.2611 -    sarl(dst, 16);
  1.2612 -  }
  1.2613 -  return off;
  1.2614 -}
  1.2615 -
  1.2616 -int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
  1.2617 -  // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
  1.2618 -  // and "3.9 Partial Register Penalties", p. 22).
  1.2619 -  int off;
  1.2620 -  if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) {
  1.2621 -    off = offset();
  1.2622 -    movzbl(dst, src); // movzxb
  1.2623 -  } else {
  1.2624 -    xorl(dst, dst);
  1.2625 -    off = offset();
  1.2626 -    movb(dst, src);
  1.2627 -  }
  1.2628 -  return off;
  1.2629 -}
  1.2630 -
  1.2631 -// Note: load_unsigned_short used to be called load_unsigned_word.
  1.2632 -int MacroAssembler::load_unsigned_short(Register dst, Address src) {
  1.2633 -  // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
  1.2634 -  // and "3.9 Partial Register Penalties", p. 22).
  1.2635 -  int off;
  1.2636 -  if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) {
  1.2637 -    off = offset();
  1.2638 -    movzwl(dst, src); // movzxw
  1.2639 -  } else {
  1.2640 -    xorl(dst, dst);
  1.2641 -    off = offset();
  1.2642 -    movw(dst, src);
  1.2643 -  }
  1.2644 -  return off;
  1.2645 -}
  1.2646 -
  1.2647 -void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
  1.2648 -  switch (size_in_bytes) {
  1.2649 -#ifndef _LP64
  1.2650 -  case  8:
  1.2651 -    assert(dst2 != noreg, "second dest register required");
  1.2652 -    movl(dst,  src);
  1.2653 -    movl(dst2, src.plus_disp(BytesPerInt));
  1.2654 -    break;
  1.2655 -#else
  1.2656 -  case  8:  movq(dst, src); break;
  1.2657 -#endif
  1.2658 -  case  4:  movl(dst, src); break;
  1.2659 -  case  2:  is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break;
  1.2660 -  case  1:  is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break;
  1.2661 -  default:  ShouldNotReachHere();
  1.2662 -  }
  1.2663 -}
  1.2664 -
  1.2665 -void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
  1.2666 -  switch (size_in_bytes) {
  1.2667 -#ifndef _LP64
  1.2668 -  case  8:
  1.2669 -    assert(src2 != noreg, "second source register required");
  1.2670 -    movl(dst,                        src);
  1.2671 -    movl(dst.plus_disp(BytesPerInt), src2);
  1.2672 -    break;
  1.2673 -#else
  1.2674 -  case  8:  movq(dst, src); break;
  1.2675 -#endif
  1.2676 -  case  4:  movl(dst, src); break;
  1.2677 -  case  2:  movw(dst, src); break;
  1.2678 -  case  1:  movb(dst, src); break;
  1.2679 -  default:  ShouldNotReachHere();
  1.2680 -  }
  1.2681 -}
  1.2682 -
  1.2683 -void MacroAssembler::mov32(AddressLiteral dst, Register src) {
  1.2684 -  if (reachable(dst)) {
  1.2685 -    movl(as_Address(dst), src);
  1.2686 -  } else {
  1.2687 -    lea(rscratch1, dst);
  1.2688 -    movl(Address(rscratch1, 0), src);
  1.2689 -  }
  1.2690 -}
  1.2691 -
  1.2692 -void MacroAssembler::mov32(Register dst, AddressLiteral src) {
  1.2693 -  if (reachable(src)) {
  1.2694 -    movl(dst, as_Address(src));
  1.2695 -  } else {
  1.2696 -    lea(rscratch1, src);
  1.2697 -    movl(dst, Address(rscratch1, 0));
  1.2698 -  }
  1.2699 -}
  1.2700 -
  1.2701 -// C++ bool manipulation
  1.2702 -
  1.2703 -void MacroAssembler::movbool(Register dst, Address src) {
  1.2704 -  if(sizeof(bool) == 1)
  1.2705 -    movb(dst, src);
  1.2706 -  else if(sizeof(bool) == 2)
  1.2707 -    movw(dst, src);
  1.2708 -  else if(sizeof(bool) == 4)
  1.2709 -    movl(dst, src);
  1.2710 -  else
  1.2711 -    // unsupported
  1.2712 -    ShouldNotReachHere();
  1.2713 -}
  1.2714 -
  1.2715 -void MacroAssembler::movbool(Address dst, bool boolconst) {
  1.2716 -  if(sizeof(bool) == 1)
  1.2717 -    movb(dst, (int) boolconst);
  1.2718 -  else if(sizeof(bool) == 2)
  1.2719 -    movw(dst, (int) boolconst);
  1.2720 -  else if(sizeof(bool) == 4)
  1.2721 -    movl(dst, (int) boolconst);
  1.2722 -  else
  1.2723 -    // unsupported
  1.2724 -    ShouldNotReachHere();
  1.2725 -}
  1.2726 -
  1.2727 -void MacroAssembler::movbool(Address dst, Register src) {
  1.2728 -  if(sizeof(bool) == 1)
  1.2729 -    movb(dst, src);
  1.2730 -  else if(sizeof(bool) == 2)
  1.2731 -    movw(dst, src);
  1.2732 -  else if(sizeof(bool) == 4)
  1.2733 -    movl(dst, src);
  1.2734 -  else
  1.2735 -    // unsupported
  1.2736 -    ShouldNotReachHere();
  1.2737 -}
  1.2738 -
  1.2739 -void MacroAssembler::movbyte(ArrayAddress dst, int src) {
  1.2740 -  movb(as_Address(dst), src);
  1.2741 -}
  1.2742 -
  1.2743 -void MacroAssembler::movdl(XMMRegister dst, AddressLiteral src) {
  1.2744 -  if (reachable(src)) {
  1.2745 -    movdl(dst, as_Address(src));
  1.2746 -  } else {
  1.2747 -    lea(rscratch1, src);
  1.2748 -    movdl(dst, Address(rscratch1, 0));
  1.2749 -  }
  1.2750 -}
  1.2751 -
  1.2752 -void MacroAssembler::movq(XMMRegister dst, AddressLiteral src) {
  1.2753 -  if (reachable(src)) {
  1.2754 -    movq(dst, as_Address(src));
  1.2755 -  } else {
  1.2756 -    lea(rscratch1, src);
  1.2757 -    movq(dst, Address(rscratch1, 0));
  1.2758 -  }
  1.2759 -}
  1.2760 -
  1.2761 -void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) {
  1.2762 -  if (reachable(src)) {
  1.2763 -    if (UseXmmLoadAndClearUpper) {
  1.2764 -      movsd (dst, as_Address(src));
  1.2765 -    } else {
  1.2766 -      movlpd(dst, as_Address(src));
  1.2767 -    }
  1.2768 -  } else {
  1.2769 -    lea(rscratch1, src);
  1.2770 -    if (UseXmmLoadAndClearUpper) {
  1.2771 -      movsd (dst, Address(rscratch1, 0));
  1.2772 -    } else {
  1.2773 -      movlpd(dst, Address(rscratch1, 0));
  1.2774 -    }
  1.2775 -  }
  1.2776 -}
  1.2777 -
  1.2778 -void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) {
  1.2779 -  if (reachable(src)) {
  1.2780 -    movss(dst, as_Address(src));
  1.2781 -  } else {
  1.2782 -    lea(rscratch1, src);
  1.2783 -    movss(dst, Address(rscratch1, 0));
  1.2784 -  }
  1.2785 -}
  1.2786 -
  1.2787 -void MacroAssembler::movptr(Register dst, Register src) {
  1.2788 -  LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
  1.2789 -}
  1.2790 -
  1.2791 -void MacroAssembler::movptr(Register dst, Address src) {
  1.2792 -  LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
  1.2793 -}
  1.2794 -
  1.2795 -// src should NEVER be a real pointer. Use AddressLiteral for true pointers
  1.2796 -void MacroAssembler::movptr(Register dst, intptr_t src) {
  1.2797 -  LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src));
  1.2798 -}
  1.2799 -
  1.2800 -void MacroAssembler::movptr(Address dst, Register src) {
  1.2801 -  LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
  1.2802 -}
  1.2803 -
  1.2804 -void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src) {
  1.2805 -  if (reachable(src)) {
  1.2806 -    Assembler::movdqu(dst, as_Address(src));
  1.2807 -  } else {
  1.2808 -    lea(rscratch1, src);
  1.2809 -    Assembler::movdqu(dst, Address(rscratch1, 0));
  1.2810 -  }
  1.2811 -}
  1.2812 -
  1.2813 -void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
  1.2814 -  if (reachable(src)) {
  1.2815 -    Assembler::movsd(dst, as_Address(src));
  1.2816 -  } else {
  1.2817 -    lea(rscratch1, src);
  1.2818 -    Assembler::movsd(dst, Address(rscratch1, 0));
  1.2819 -  }
  1.2820 -}
  1.2821 -
  1.2822 -void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) {
  1.2823 -  if (reachable(src)) {
  1.2824 -    Assembler::movss(dst, as_Address(src));
  1.2825 -  } else {
  1.2826 -    lea(rscratch1, src);
  1.2827 -    Assembler::movss(dst, Address(rscratch1, 0));
  1.2828 -  }
  1.2829 -}
  1.2830 -
  1.2831 -void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) {
  1.2832 -  if (reachable(src)) {
  1.2833 -    Assembler::mulsd(dst, as_Address(src));
  1.2834 -  } else {
  1.2835 -    lea(rscratch1, src);
  1.2836 -    Assembler::mulsd(dst, Address(rscratch1, 0));
  1.2837 -  }
  1.2838 -}
  1.2839 -
  1.2840 -void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) {
  1.2841 -  if (reachable(src)) {
  1.2842 -    Assembler::mulss(dst, as_Address(src));
  1.2843 -  } else {
  1.2844 -    lea(rscratch1, src);
  1.2845 -    Assembler::mulss(dst, Address(rscratch1, 0));
  1.2846 -  }
  1.2847 -}
  1.2848 -
  1.2849 -void MacroAssembler::null_check(Register reg, int offset) {
  1.2850 -  if (needs_explicit_null_check(offset)) {
  1.2851 -    // provoke OS NULL exception if reg = NULL by
  1.2852 -    // accessing M[reg] w/o changing any (non-CC) registers
  1.2853 -    // NOTE: cmpl is plenty here to provoke a segv
  1.2854 -    cmpptr(rax, Address(reg, 0));
  1.2855 -    // Note: should probably use testl(rax, Address(reg, 0));
  1.2856 -    //       may be shorter code (however, this version of
  1.2857 -    //       testl needs to be implemented first)
  1.2858 -  } else {
  1.2859 -    // nothing to do, (later) access of M[reg + offset]
  1.2860 -    // will provoke OS NULL exception if reg = NULL
  1.2861 -  }
  1.2862 -}
  1.2863 -
  1.2864 -void MacroAssembler::os_breakpoint() {
  1.2865 -  // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
  1.2866 -  // (e.g., MSVC can't call ps() otherwise)
  1.2867 -  call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
  1.2868 -}
  1.2869 -
  1.2870 -void MacroAssembler::pop_CPU_state() {
  1.2871 -  pop_FPU_state();
  1.2872 -  pop_IU_state();
  1.2873 -}
  1.2874 -
  1.2875 -void MacroAssembler::pop_FPU_state() {
  1.2876 -  NOT_LP64(frstor(Address(rsp, 0));)
  1.2877 -  LP64_ONLY(fxrstor(Address(rsp, 0));)
  1.2878 -  addptr(rsp, FPUStateSizeInWords * wordSize);
  1.2879 -}
  1.2880 -
  1.2881 -void MacroAssembler::pop_IU_state() {
  1.2882 -  popa();
  1.2883 -  LP64_ONLY(addq(rsp, 8));
  1.2884 -  popf();
  1.2885 -}
  1.2886 -
  1.2887 -// Save Integer and Float state
  1.2888 -// Warning: Stack must be 16 byte aligned (64bit)
  1.2889 -void MacroAssembler::push_CPU_state() {
  1.2890 -  push_IU_state();
  1.2891 -  push_FPU_state();
  1.2892 -}
  1.2893 -
  1.2894 -void MacroAssembler::push_FPU_state() {
  1.2895 -  subptr(rsp, FPUStateSizeInWords * wordSize);
  1.2896 -#ifndef _LP64
  1.2897 -  fnsave(Address(rsp, 0));
  1.2898 -  fwait();
  1.2899 -#else
  1.2900 -  fxsave(Address(rsp, 0));
  1.2901 -#endif // LP64
  1.2902 -}
  1.2903 -
  1.2904 -void MacroAssembler::push_IU_state() {
  1.2905 -  // Push flags first because pusha kills them
  1.2906 -  pushf();
  1.2907 -  // Make sure rsp stays 16-byte aligned
  1.2908 -  LP64_ONLY(subq(rsp, 8));
  1.2909 -  pusha();
  1.2910 -}
  1.2911 -
  1.2912 -void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) {
  1.2913 -  // determine java_thread register
  1.2914 -  if (!java_thread->is_valid()) {
  1.2915 -    java_thread = rdi;
  1.2916 -    get_thread(java_thread);
  1.2917 -  }
  1.2918 -  // we must set sp to zero to clear frame
  1.2919 -  movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
  1.2920 -  if (clear_fp) {
  1.2921 -    movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
  1.2922 -  }
  1.2923 -
  1.2924 -  if (clear_pc)
  1.2925 -    movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
  1.2926 -
  1.2927 -}
  1.2928 -
  1.2929 -void MacroAssembler::restore_rax(Register tmp) {
  1.2930 -  if (tmp == noreg) pop(rax);
  1.2931 -  else if (tmp != rax) mov(rax, tmp);
  1.2932 -}
  1.2933 -
  1.2934 -void MacroAssembler::round_to(Register reg, int modulus) {
  1.2935 -  addptr(reg, modulus - 1);
  1.2936 -  andptr(reg, -modulus);
  1.2937 -}
  1.2938 -
  1.2939 -void MacroAssembler::save_rax(Register tmp) {
  1.2940 -  if (tmp == noreg) push(rax);
  1.2941 -  else if (tmp != rax) mov(tmp, rax);
  1.2942 -}
  1.2943 -
  1.2944 -// Write serialization page so VM thread can do a pseudo remote membar.
  1.2945 -// We use the current thread pointer to calculate a thread specific
  1.2946 -// offset to write to within the page. This minimizes bus traffic
  1.2947 -// due to cache line collision.
  1.2948 -void MacroAssembler::serialize_memory(Register thread, Register tmp) {
  1.2949 -  movl(tmp, thread);
  1.2950 -  shrl(tmp, os::get_serialize_page_shift_count());
  1.2951 -  andl(tmp, (os::vm_page_size() - sizeof(int)));
  1.2952 -
  1.2953 -  Address index(noreg, tmp, Address::times_1);
  1.2954 -  ExternalAddress page(os::get_memory_serialize_page());
  1.2955 -
  1.2956 -  // Size of store must match masking code above
  1.2957 -  movl(as_Address(ArrayAddress(page, index)), tmp);
  1.2958 -}
  1.2959 -
  1.2960 -// Calls to C land
  1.2961 -//
  1.2962 -// When entering C land, the rbp, & rsp of the last Java frame have to be recorded
  1.2963 -// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
  1.2964 -// has to be reset to 0. This is required to allow proper stack traversal.
  1.2965 -void MacroAssembler::set_last_Java_frame(Register java_thread,
  1.2966 -                                         Register last_java_sp,
  1.2967 -                                         Register last_java_fp,
  1.2968 -                                         address  last_java_pc) {
  1.2969 -  // determine java_thread register
  1.2970 -  if (!java_thread->is_valid()) {
  1.2971 -    java_thread = rdi;
  1.2972 -    get_thread(java_thread);
  1.2973 -  }
  1.2974 -  // determine last_java_sp register
  1.2975 -  if (!last_java_sp->is_valid()) {
  1.2976 -    last_java_sp = rsp;
  1.2977 -  }
  1.2978 -
  1.2979 -  // last_java_fp is optional
  1.2980 -
  1.2981 -  if (last_java_fp->is_valid()) {
  1.2982 -    movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp);
  1.2983 -  }
  1.2984 -
  1.2985 -  // last_java_pc is optional
  1.2986 -
  1.2987 -  if (last_java_pc != NULL) {
  1.2988 -    lea(Address(java_thread,
  1.2989 -                 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()),
  1.2990 -        InternalAddress(last_java_pc));
  1.2991 -
  1.2992 -  }
  1.2993 -  movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
  1.2994 -}
  1.2995 -
  1.2996 -void MacroAssembler::shlptr(Register dst, int imm8) {
  1.2997 -  LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8));
  1.2998 -}
  1.2999 -
  1.3000 -void MacroAssembler::shrptr(Register dst, int imm8) {
  1.3001 -  LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8));
  1.3002 -}
  1.3003 -
  1.3004 -void MacroAssembler::sign_extend_byte(Register reg) {
  1.3005 -  if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) {
  1.3006 -    movsbl(reg, reg); // movsxb
  1.3007 -  } else {
  1.3008 -    shll(reg, 24);
  1.3009 -    sarl(reg, 24);
  1.3010 -  }
  1.3011 -}
  1.3012 -
  1.3013 -void MacroAssembler::sign_extend_short(Register reg) {
  1.3014 -  if (LP64_ONLY(true ||) VM_Version::is_P6()) {
  1.3015 -    movswl(reg, reg); // movsxw
  1.3016 -  } else {
  1.3017 -    shll(reg, 16);
  1.3018 -    sarl(reg, 16);
  1.3019 -  }
  1.3020 -}
  1.3021 -
  1.3022 -void MacroAssembler::testl(Register dst, AddressLiteral src) {
  1.3023 -  assert(reachable(src), "Address should be reachable");
  1.3024 -  testl(dst, as_Address(src));
  1.3025 -}
  1.3026 -
  1.3027 -void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) {
  1.3028 -  if (reachable(src)) {
  1.3029 -    Assembler::sqrtsd(dst, as_Address(src));
  1.3030 -  } else {
  1.3031 -    lea(rscratch1, src);
  1.3032 -    Assembler::sqrtsd(dst, Address(rscratch1, 0));
  1.3033 -  }
  1.3034 -}
  1.3035 -
  1.3036 -void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) {
  1.3037 -  if (reachable(src)) {
  1.3038 -    Assembler::sqrtss(dst, as_Address(src));
  1.3039 -  } else {
  1.3040 -    lea(rscratch1, src);
  1.3041 -    Assembler::sqrtss(dst, Address(rscratch1, 0));
  1.3042 -  }
  1.3043 -}
  1.3044 -
  1.3045 -void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) {
  1.3046 -  if (reachable(src)) {
  1.3047 -    Assembler::subsd(dst, as_Address(src));
  1.3048 -  } else {
  1.3049 -    lea(rscratch1, src);
  1.3050 -    Assembler::subsd(dst, Address(rscratch1, 0));
  1.3051 -  }
  1.3052 -}
  1.3053 -
  1.3054 -void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) {
  1.3055 -  if (reachable(src)) {
  1.3056 -    Assembler::subss(dst, as_Address(src));
  1.3057 -  } else {
  1.3058 -    lea(rscratch1, src);
  1.3059 -    Assembler::subss(dst, Address(rscratch1, 0));
  1.3060 -  }
  1.3061 -}
  1.3062 -
  1.3063 -void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
  1.3064 -  if (reachable(src)) {
  1.3065 -    Assembler::ucomisd(dst, as_Address(src));
  1.3066 -  } else {
  1.3067 -    lea(rscratch1, src);
  1.3068 -    Assembler::ucomisd(dst, Address(rscratch1, 0));
  1.3069 -  }
  1.3070 -}
  1.3071 -
  1.3072 -void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) {
  1.3073 -  if (reachable(src)) {
  1.3074 -    Assembler::ucomiss(dst, as_Address(src));
  1.3075 -  } else {
  1.3076 -    lea(rscratch1, src);
  1.3077 -    Assembler::ucomiss(dst, Address(rscratch1, 0));
  1.3078 -  }
  1.3079 -}
  1.3080 -
  1.3081 -void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {
  1.3082 -  // Used in sign-bit flipping with aligned address.
  1.3083 -  assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
  1.3084 -  if (reachable(src)) {
  1.3085 -    Assembler::xorpd(dst, as_Address(src));
  1.3086 -  } else {
  1.3087 -    lea(rscratch1, src);
  1.3088 -    Assembler::xorpd(dst, Address(rscratch1, 0));
  1.3089 -  }
  1.3090 -}
  1.3091 -
  1.3092 -void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
  1.3093 -  // Used in sign-bit flipping with aligned address.
  1.3094 -  assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
  1.3095 -  if (reachable(src)) {
  1.3096 -    Assembler::xorps(dst, as_Address(src));
  1.3097 -  } else {
  1.3098 -    lea(rscratch1, src);
  1.3099 -    Assembler::xorps(dst, Address(rscratch1, 0));
  1.3100 -  }
  1.3101 -}
  1.3102 -
  1.3103 -void MacroAssembler::pshufb(XMMRegister dst, AddressLiteral src) {
  1.3104 -  // Used in sign-bit flipping with aligned address.
  1.3105 -  assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
  1.3106 -  if (reachable(src)) {
  1.3107 -    Assembler::pshufb(dst, as_Address(src));
  1.3108 -  } else {
  1.3109 -    lea(rscratch1, src);
  1.3110 -    Assembler::pshufb(dst, Address(rscratch1, 0));
  1.3111 -  }
  1.3112 -}
  1.3113 -
  1.3114 -// AVX 3-operands instructions
  1.3115 -
  1.3116 -void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
  1.3117 -  if (reachable(src)) {
  1.3118 -    vaddsd(dst, nds, as_Address(src));
  1.3119 -  } else {
  1.3120 -    lea(rscratch1, src);
  1.3121 -    vaddsd(dst, nds, Address(rscratch1, 0));
  1.3122 -  }
  1.3123 -}
  1.3124 -
  1.3125 -void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
  1.3126 -  if (reachable(src)) {
  1.3127 -    vaddss(dst, nds, as_Address(src));
  1.3128 -  } else {
  1.3129 -    lea(rscratch1, src);
  1.3130 -    vaddss(dst, nds, Address(rscratch1, 0));
  1.3131 -  }
  1.3132 -}
  1.3133 -
  1.3134 -void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
  1.3135 -  if (reachable(src)) {
  1.3136 -    vandpd(dst, nds, as_Address(src), vector256);
  1.3137 -  } else {
  1.3138 -    lea(rscratch1, src);
  1.3139 -    vandpd(dst, nds, Address(rscratch1, 0), vector256);
  1.3140 -  }
  1.3141 -}
  1.3142 -
  1.3143 -void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
  1.3144 -  if (reachable(src)) {
  1.3145 -    vandps(dst, nds, as_Address(src), vector256);
  1.3146 -  } else {
  1.3147 -    lea(rscratch1, src);
  1.3148 -    vandps(dst, nds, Address(rscratch1, 0), vector256);
  1.3149 -  }
  1.3150 -}
  1.3151 -
  1.3152 -void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
  1.3153 -  if (reachable(src)) {
  1.3154 -    vdivsd(dst, nds, as_Address(src));
  1.3155 -  } else {
  1.3156 -    lea(rscratch1, src);
  1.3157 -    vdivsd(dst, nds, Address(rscratch1, 0));
  1.3158 -  }
  1.3159 -}
  1.3160 -
  1.3161 -void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
  1.3162 -  if (reachable(src)) {
  1.3163 -    vdivss(dst, nds, as_Address(src));
  1.3164 -  } else {
  1.3165 -    lea(rscratch1, src);
  1.3166 -    vdivss(dst, nds, Address(rscratch1, 0));
  1.3167 -  }
  1.3168 -}
  1.3169 -
  1.3170 -void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
  1.3171 -  if (reachable(src)) {
  1.3172 -    vmulsd(dst, nds, as_Address(src));
  1.3173 -  } else {
  1.3174 -    lea(rscratch1, src);
  1.3175 -    vmulsd(dst, nds, Address(rscratch1, 0));
  1.3176 -  }
  1.3177 -}
  1.3178 -
  1.3179 -void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
  1.3180 -  if (reachable(src)) {
  1.3181 -    vmulss(dst, nds, as_Address(src));
  1.3182 -  } else {
  1.3183 -    lea(rscratch1, src);
  1.3184 -    vmulss(dst, nds, Address(rscratch1, 0));
  1.3185 -  }
  1.3186 -}
  1.3187 -
  1.3188 -void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
  1.3189 -  if (reachable(src)) {
  1.3190 -    vsubsd(dst, nds, as_Address(src));
  1.3191 -  } else {
  1.3192 -    lea(rscratch1, src);
  1.3193 -    vsubsd(dst, nds, Address(rscratch1, 0));
  1.3194 -  }
  1.3195 -}
  1.3196 -
  1.3197 -void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
  1.3198 -  if (reachable(src)) {
  1.3199 -    vsubss(dst, nds, as_Address(src));
  1.3200 -  } else {
  1.3201 -    lea(rscratch1, src);
  1.3202 -    vsubss(dst, nds, Address(rscratch1, 0));
  1.3203 -  }
  1.3204 -}
  1.3205 -
  1.3206 -void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
  1.3207 -  if (reachable(src)) {
  1.3208 -    vxorpd(dst, nds, as_Address(src), vector256);
  1.3209 -  } else {
  1.3210 -    lea(rscratch1, src);
  1.3211 -    vxorpd(dst, nds, Address(rscratch1, 0), vector256);
  1.3212 -  }
  1.3213 -}
  1.3214 -
  1.3215 -void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
  1.3216 -  if (reachable(src)) {
  1.3217 -    vxorps(dst, nds, as_Address(src), vector256);
  1.3218 -  } else {
  1.3219 -    lea(rscratch1, src);
  1.3220 -    vxorps(dst, nds, Address(rscratch1, 0), vector256);
  1.3221 -  }
  1.3222 -}
  1.3223 -
  1.3224 -
  1.3225 -//////////////////////////////////////////////////////////////////////////////////
  1.3226 -#ifndef SERIALGC
  1.3227 -
  1.3228 -void MacroAssembler::g1_write_barrier_pre(Register obj,
  1.3229 -                                          Register pre_val,
  1.3230 -                                          Register thread,
  1.3231 -                                          Register tmp,
  1.3232 -                                          bool tosca_live,
  1.3233 -                                          bool expand_call) {
  1.3234 -
  1.3235 -  // If expand_call is true then we expand the call_VM_leaf macro
  1.3236 -  // directly to skip generating the check by
  1.3237 -  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
  1.3238 -
  1.3239 -#ifdef _LP64
  1.3240 -  assert(thread == r15_thread, "must be");
  1.3241 -#endif // _LP64
  1.3242 -
  1.3243 -  Label done;
  1.3244 -  Label runtime;
  1.3245 -
  1.3246 -  assert(pre_val != noreg, "check this code");
  1.3247 -
  1.3248 -  if (obj != noreg) {
  1.3249 -    assert_different_registers(obj, pre_val, tmp);
  1.3250 -    assert(pre_val != rax, "check this code");
  1.3251 -  }
  1.3252 -
  1.3253 -  Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
  1.3254 -                                       PtrQueue::byte_offset_of_active()));
  1.3255 -  Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
  1.3256 -                                       PtrQueue::byte_offset_of_index()));
  1.3257 -  Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
  1.3258 -                                       PtrQueue::byte_offset_of_buf()));
  1.3259 -
  1.3260 -
  1.3261 -  // Is marking active?
  1.3262 -  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
  1.3263 -    cmpl(in_progress, 0);
  1.3264 -  } else {
  1.3265 -    assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
  1.3266 -    cmpb(in_progress, 0);
  1.3267 -  }
  1.3268 -  jcc(Assembler::equal, done);
  1.3269 -
  1.3270 -  // Do we need to load the previous value?
  1.3271 -  if (obj != noreg) {
  1.3272 -    load_heap_oop(pre_val, Address(obj, 0));
  1.3273 -  }
  1.3274 -
  1.3275 -  // Is the previous value null?
  1.3276 -  cmpptr(pre_val, (int32_t) NULL_WORD);
  1.3277 -  jcc(Assembler::equal, done);
  1.3278 -
  1.3279 -  // Can we store original value in the thread's buffer?
  1.3280 -  // Is index == 0?
  1.3281 -  // (The index field is typed as size_t.)
  1.3282 -
  1.3283 -  movptr(tmp, index);                   // tmp := *index_adr
  1.3284 -  cmpptr(tmp, 0);                       // tmp == 0?
  1.3285 -  jcc(Assembler::equal, runtime);       // If yes, goto runtime
  1.3286 -
  1.3287 -  subptr(tmp, wordSize);                // tmp := tmp - wordSize
  1.3288 -  movptr(index, tmp);                   // *index_adr := tmp
  1.3289 -  addptr(tmp, buffer);                  // tmp := tmp + *buffer_adr
  1.3290 -
  1.3291 -  // Record the previous value
  1.3292 -  movptr(Address(tmp, 0), pre_val);
  1.3293 -  jmp(done);
  1.3294 -
  1.3295 -  bind(runtime);
  1.3296 -  // save the live input values
  1.3297 -  if(tosca_live) push(rax);
  1.3298 -
  1.3299 -  if (obj != noreg && obj != rax)
  1.3300 -    push(obj);
  1.3301 -
  1.3302 -  if (pre_val != rax)
  1.3303 -    push(pre_val);
  1.3304 -
  1.3305 -  // Calling the runtime using the regular call_VM_leaf mechanism generates
  1.3306 -  // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
  1.3307 -  // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
  1.3308 -  //
  1.3309 -  // If we care generating the pre-barrier without a frame (e.g. in the
  1.3310 -  // intrinsified Reference.get() routine) then ebp might be pointing to
  1.3311 -  // the caller frame and so this check will most likely fail at runtime.
  1.3312 -  //
  1.3313 -  // Expanding the call directly bypasses the generation of the check.
  1.3314 -  // So when we do not have have a full interpreter frame on the stack
  1.3315 -  // expand_call should be passed true.
  1.3316 -
  1.3317 -  NOT_LP64( push(thread); )
  1.3318 -
  1.3319 -  if (expand_call) {
  1.3320 -    LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); )
  1.3321 -    pass_arg1(this, thread);
  1.3322 -    pass_arg0(this, pre_val);
  1.3323 -    MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
  1.3324 -  } else {
  1.3325 -    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
  1.3326 -  }
  1.3327 -
  1.3328 -  NOT_LP64( pop(thread); )
  1.3329 -
  1.3330 -  // save the live input values
  1.3331 -  if (pre_val != rax)
  1.3332 -    pop(pre_val);
  1.3333 -
  1.3334 -  if (obj != noreg && obj != rax)
  1.3335 -    pop(obj);
  1.3336 -
  1.3337 -  if(tosca_live) pop(rax);
  1.3338 -
  1.3339 -  bind(done);
  1.3340 -}
  1.3341 -
  1.3342 -void MacroAssembler::g1_write_barrier_post(Register store_addr,
  1.3343 -                                           Register new_val,
  1.3344 -                                           Register thread,
  1.3345 -                                           Register tmp,
  1.3346 -                                           Register tmp2) {
  1.3347 -#ifdef _LP64
  1.3348 -  assert(thread == r15_thread, "must be");
  1.3349 -#endif // _LP64
  1.3350 -
  1.3351 -  Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
  1.3352 -                                       PtrQueue::byte_offset_of_index()));
  1.3353 -  Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
  1.3354 -                                       PtrQueue::byte_offset_of_buf()));
  1.3355 -
  1.3356 -  BarrierSet* bs = Universe::heap()->barrier_set();
  1.3357 -  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
  1.3358 -  Label done;
  1.3359 -  Label runtime;
  1.3360 -
  1.3361 -  // Does store cross heap regions?
  1.3362 -
  1.3363 -  movptr(tmp, store_addr);
  1.3364 -  xorptr(tmp, new_val);
  1.3365 -  shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
  1.3366 -  jcc(Assembler::equal, done);
  1.3367 -
  1.3368 -  // crosses regions, storing NULL?
  1.3369 -
  1.3370 -  cmpptr(new_val, (int32_t) NULL_WORD);
  1.3371 -  jcc(Assembler::equal, done);
  1.3372 -
  1.3373 -  // storing region crossing non-NULL, is card already dirty?
  1.3374 -
  1.3375 -  ExternalAddress cardtable((address) ct->byte_map_base);
  1.3376 -  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
  1.3377 -#ifdef _LP64
  1.3378 -  const Register card_addr = tmp;
  1.3379 -
  1.3380 -  movq(card_addr, store_addr);
  1.3381 -  shrq(card_addr, CardTableModRefBS::card_shift);
  1.3382 -
  1.3383 -  lea(tmp2, cardtable);
  1.3384 -
  1.3385 -  // get the address of the card
  1.3386 -  addq(card_addr, tmp2);
  1.3387 -#else
  1.3388 -  const Register card_index = tmp;
  1.3389 -
  1.3390 -  movl(card_index, store_addr);
  1.3391 -  shrl(card_index, CardTableModRefBS::card_shift);
  1.3392 -
  1.3393 -  Address index(noreg, card_index, Address::times_1);
  1.3394 -  const Register card_addr = tmp;
  1.3395 -  lea(card_addr, as_Address(ArrayAddress(cardtable, index)));
  1.3396 -#endif
  1.3397 -  cmpb(Address(card_addr, 0), 0);
  1.3398 -  jcc(Assembler::equal, done);
  1.3399 -
  1.3400 -  // storing a region crossing, non-NULL oop, card is clean.
  1.3401 -  // dirty card and log.
  1.3402 -
  1.3403 -  movb(Address(card_addr, 0), 0);
  1.3404 -
  1.3405 -  cmpl(queue_index, 0);
  1.3406 -  jcc(Assembler::equal, runtime);
  1.3407 -  subl(queue_index, wordSize);
  1.3408 -  movptr(tmp2, buffer);
  1.3409 -#ifdef _LP64
  1.3410 -  movslq(rscratch1, queue_index);
  1.3411 -  addq(tmp2, rscratch1);
  1.3412 -  movq(Address(tmp2, 0), card_addr);
  1.3413 -#else
  1.3414 -  addl(tmp2, queue_index);
  1.3415 -  movl(Address(tmp2, 0), card_index);
  1.3416 -#endif
  1.3417 -  jmp(done);
  1.3418 -
  1.3419 -  bind(runtime);
  1.3420 -  // save the live input values
  1.3421 -  push(store_addr);
  1.3422 -  push(new_val);
  1.3423 -#ifdef _LP64
  1.3424 -  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
  1.3425 -#else
  1.3426 -  push(thread);
  1.3427 -  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
  1.3428 -  pop(thread);
  1.3429 -#endif
  1.3430 -  pop(new_val);
  1.3431 -  pop(store_addr);
  1.3432 -
  1.3433 -  bind(done);
  1.3434 -}
  1.3435 -
  1.3436 -#endif // SERIALGC
  1.3437 -//////////////////////////////////////////////////////////////////////////////////
  1.3438 -
  1.3439 -
  1.3440 -void MacroAssembler::store_check(Register obj) {
  1.3441 -  // Does a store check for the oop in register obj. The content of
  1.3442 -  // register obj is destroyed afterwards.
  1.3443 -  store_check_part_1(obj);
  1.3444 -  store_check_part_2(obj);
  1.3445 -}
  1.3446 -
  1.3447 -void MacroAssembler::store_check(Register obj, Address dst) {
  1.3448 -  store_check(obj);
  1.3449 -}
  1.3450 -
  1.3451 -
  1.3452 -// split the store check operation so that other instructions can be scheduled inbetween
  1.3453 -void MacroAssembler::store_check_part_1(Register obj) {
  1.3454 -  BarrierSet* bs = Universe::heap()->barrier_set();
  1.3455 -  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
  1.3456 -  shrptr(obj, CardTableModRefBS::card_shift);
  1.3457 -}
  1.3458 -
  1.3459 -void MacroAssembler::store_check_part_2(Register obj) {
  1.3460 -  BarrierSet* bs = Universe::heap()->barrier_set();
  1.3461 -  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
  1.3462 -  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
  1.3463 -  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
  1.3464 -
  1.3465 -  // The calculation for byte_map_base is as follows:
  1.3466 -  // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift);
  1.3467 -  // So this essentially converts an address to a displacement and
  1.3468 -  // it will never need to be relocated. On 64bit however the value may be too
  1.3469 -  // large for a 32bit displacement
  1.3470 -
  1.3471 -  intptr_t disp = (intptr_t) ct->byte_map_base;
  1.3472 -  if (is_simm32(disp)) {
  1.3473 -    Address cardtable(noreg, obj, Address::times_1, disp);
  1.3474 -    movb(cardtable, 0);
  1.3475 -  } else {
  1.3476 -    // By doing it as an ExternalAddress disp could be converted to a rip-relative
  1.3477 -    // displacement and done in a single instruction given favorable mapping and
  1.3478 -    // a smarter version of as_Address. Worst case it is two instructions which
  1.3479 -    // is no worse off then loading disp into a register and doing as a simple
  1.3480 -    // Address() as above.
  1.3481 -    // We can't do as ExternalAddress as the only style since if disp == 0 we'll
  1.3482 -    // assert since NULL isn't acceptable in a reloci (see 6644928). In any case
  1.3483 -    // in some cases we'll get a single instruction version.
  1.3484 -
  1.3485 -    ExternalAddress cardtable((address)disp);
  1.3486 -    Address index(noreg, obj, Address::times_1);
  1.3487 -    movb(as_Address(ArrayAddress(cardtable, index)), 0);
  1.3488 -  }
  1.3489 -}
  1.3490 -
  1.3491 -void MacroAssembler::subptr(Register dst, int32_t imm32) {
  1.3492 -  LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32));
  1.3493 -}
  1.3494 -
  1.3495 -// Force generation of a 4 byte immediate value even if it fits into 8bit
  1.3496 -void MacroAssembler::subptr_imm32(Register dst, int32_t imm32) {
  1.3497 -  LP64_ONLY(subq_imm32(dst, imm32)) NOT_LP64(subl_imm32(dst, imm32));
  1.3498 -}
  1.3499 -
  1.3500 -void MacroAssembler::subptr(Register dst, Register src) {
  1.3501 -  LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src));
  1.3502 -}
  1.3503 -
  1.3504 -// C++ bool manipulation
  1.3505 -void MacroAssembler::testbool(Register dst) {
  1.3506 -  if(sizeof(bool) == 1)
  1.3507 -    testb(dst, 0xff);
  1.3508 -  else if(sizeof(bool) == 2) {
  1.3509 -    // testw implementation needed for two byte bools
  1.3510 -    ShouldNotReachHere();
  1.3511 -  } else if(sizeof(bool) == 4)
  1.3512 -    testl(dst, dst);
  1.3513 -  else
  1.3514 -    // unsupported
  1.3515 -    ShouldNotReachHere();
  1.3516 -}
  1.3517 -
  1.3518 -void MacroAssembler::testptr(Register dst, Register src) {
  1.3519 -  LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src));
  1.3520 -}
  1.3521 -
  1.3522 -// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
  1.3523 -void MacroAssembler::tlab_allocate(Register obj,
  1.3524 -                                   Register var_size_in_bytes,
  1.3525 -                                   int con_size_in_bytes,
  1.3526 -                                   Register t1,
  1.3527 -                                   Register t2,
  1.3528 -                                   Label& slow_case) {
  1.3529 -  assert_different_registers(obj, t1, t2);
  1.3530 -  assert_different_registers(obj, var_size_in_bytes, t1);
  1.3531 -  Register end = t2;
  1.3532 -  Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread);
  1.3533 -
  1.3534 -  verify_tlab();
  1.3535 -
  1.3536 -  NOT_LP64(get_thread(thread));
  1.3537 -
  1.3538 -  movptr(obj, Address(thread, JavaThread::tlab_top_offset()));
  1.3539 -  if (var_size_in_bytes == noreg) {
  1.3540 -    lea(end, Address(obj, con_size_in_bytes));
  1.3541 -  } else {
  1.3542 -    lea(end, Address(obj, var_size_in_bytes, Address::times_1));
  1.3543 -  }
  1.3544 -  cmpptr(end, Address(thread, JavaThread::tlab_end_offset()));
  1.3545 -  jcc(Assembler::above, slow_case);
  1.3546 -
  1.3547 -  // update the tlab top pointer
  1.3548 -  movptr(Address(thread, JavaThread::tlab_top_offset()), end);
  1.3549 -
  1.3550 -  // recover var_size_in_bytes if necessary
  1.3551 -  if (var_size_in_bytes == end) {
  1.3552 -    subptr(var_size_in_bytes, obj);
  1.3553 -  }
  1.3554 -  verify_tlab();
  1.3555 -}
  1.3556 -
  1.3557 -// Preserves rbx, and rdx.
  1.3558 -Register MacroAssembler::tlab_refill(Label& retry,
  1.3559 -                                     Label& try_eden,
  1.3560 -                                     Label& slow_case) {
  1.3561 -  Register top = rax;
  1.3562 -  Register t1  = rcx;
  1.3563 -  Register t2  = rsi;
  1.3564 -  Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread);
  1.3565 -  assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx);
  1.3566 -  Label do_refill, discard_tlab;
  1.3567 -
  1.3568 -  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
  1.3569 -    // No allocation in the shared eden.
  1.3570 -    jmp(slow_case);
  1.3571 -  }
  1.3572 -
  1.3573 -  NOT_LP64(get_thread(thread_reg));
  1.3574 -
  1.3575 -  movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
  1.3576 -  movptr(t1,  Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
  1.3577 -
  1.3578 -  // calculate amount of free space
  1.3579 -  subptr(t1, top);
  1.3580 -  shrptr(t1, LogHeapWordSize);
  1.3581 -
  1.3582 -  // Retain tlab and allocate object in shared space if
  1.3583 -  // the amount free in the tlab is too large to discard.
  1.3584 -  cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())));
  1.3585 -  jcc(Assembler::lessEqual, discard_tlab);
  1.3586 -
  1.3587 -  // Retain
  1.3588 -  // %%% yuck as movptr...
  1.3589 -  movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment());
  1.3590 -  addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2);
  1.3591 -  if (TLABStats) {
  1.3592 -    // increment number of slow_allocations
  1.3593 -    addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1);
  1.3594 -  }
  1.3595 -  jmp(try_eden);
  1.3596 -
  1.3597 -  bind(discard_tlab);
  1.3598 -  if (TLABStats) {
  1.3599 -    // increment number of refills
  1.3600 -    addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1);
  1.3601 -    // accumulate wastage -- t1 is amount free in tlab
  1.3602 -    addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1);
  1.3603 -  }
  1.3604 -
  1.3605 -  // if tlab is currently allocated (top or end != null) then
  1.3606 -  // fill [top, end + alignment_reserve) with array object
  1.3607 -  testptr(top, top);
  1.3608 -  jcc(Assembler::zero, do_refill);
  1.3609 -
  1.3610 -  // set up the mark word
  1.3611 -  movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2));
  1.3612 -  // set the length to the remaining space
  1.3613 -  subptr(t1, typeArrayOopDesc::header_size(T_INT));
  1.3614 -  addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve());
  1.3615 -  shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint)));
  1.3616 -  movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1);
  1.3617 -  // set klass to intArrayKlass
  1.3618 -  // dubious reloc why not an oop reloc?
  1.3619 -  movptr(t1, ExternalAddress((address)Universe::intArrayKlassObj_addr()));
  1.3620 -  // store klass last.  concurrent gcs assumes klass length is valid if
  1.3621 -  // klass field is not null.
  1.3622 -  store_klass(top, t1);
  1.3623 -
  1.3624 -  movptr(t1, top);
  1.3625 -  subptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
  1.3626 -  incr_allocated_bytes(thread_reg, t1, 0);
  1.3627 -
  1.3628 -  // refill the tlab with an eden allocation
  1.3629 -  bind(do_refill);
  1.3630 -  movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
  1.3631 -  shlptr(t1, LogHeapWordSize);
  1.3632 -  // allocate new tlab, address returned in top
  1.3633 -  eden_allocate(top, t1, 0, t2, slow_case);
  1.3634 -
  1.3635 -  // Check that t1 was preserved in eden_allocate.
  1.3636 -#ifdef ASSERT
  1.3637 -  if (UseTLAB) {
  1.3638 -    Label ok;
  1.3639 -    Register tsize = rsi;
  1.3640 -    assert_different_registers(tsize, thread_reg, t1);
  1.3641 -    push(tsize);
  1.3642 -    movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
  1.3643 -    shlptr(tsize, LogHeapWordSize);
  1.3644 -    cmpptr(t1, tsize);
  1.3645 -    jcc(Assembler::equal, ok);
  1.3646 -    STOP("assert(t1 != tlab size)");
  1.3647 -    should_not_reach_here();
  1.3648 -
  1.3649 -    bind(ok);
  1.3650 -    pop(tsize);
  1.3651 -  }
  1.3652 -#endif
  1.3653 -  movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top);
  1.3654 -  movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top);
  1.3655 -  addptr(top, t1);
  1.3656 -  subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
  1.3657 -  movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top);
  1.3658 -  verify_tlab();
  1.3659 -  jmp(retry);
  1.3660 -
  1.3661 -  return thread_reg; // for use by caller
  1.3662 -}
  1.3663 -
  1.3664 -void MacroAssembler::incr_allocated_bytes(Register thread,
  1.3665 -                                          Register var_size_in_bytes,
  1.3666 -                                          int con_size_in_bytes,
  1.3667 -                                          Register t1) {
  1.3668 -  if (!thread->is_valid()) {
  1.3669 -#ifdef _LP64
  1.3670 -    thread = r15_thread;
  1.3671 -#else
  1.3672 -    assert(t1->is_valid(), "need temp reg");
  1.3673 -    thread = t1;
  1.3674 -    get_thread(thread);
  1.3675 -#endif
  1.3676 -  }
  1.3677 -
  1.3678 -#ifdef _LP64
  1.3679 -  if (var_size_in_bytes->is_valid()) {
  1.3680 -    addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
  1.3681 -  } else {
  1.3682 -    addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
  1.3683 -  }
  1.3684 -#else
  1.3685 -  if (var_size_in_bytes->is_valid()) {
  1.3686 -    addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
  1.3687 -  } else {
  1.3688 -    addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
  1.3689 -  }
  1.3690 -  adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0);
  1.3691 -#endif
  1.3692 -}
  1.3693 -
  1.3694 -void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) {
  1.3695 -  pusha();
  1.3696 -
  1.3697 -  // if we are coming from c1, xmm registers may be live
  1.3698 -  int off = 0;
  1.3699 -  if (UseSSE == 1)  {
  1.3700 -    subptr(rsp, sizeof(jdouble)*8);
  1.3701 -    movflt(Address(rsp,off++*sizeof(jdouble)),xmm0);
  1.3702 -    movflt(Address(rsp,off++*sizeof(jdouble)),xmm1);
  1.3703 -    movflt(Address(rsp,off++*sizeof(jdouble)),xmm2);
  1.3704 -    movflt(Address(rsp,off++*sizeof(jdouble)),xmm3);
  1.3705 -    movflt(Address(rsp,off++*sizeof(jdouble)),xmm4);
  1.3706 -    movflt(Address(rsp,off++*sizeof(jdouble)),xmm5);
  1.3707 -    movflt(Address(rsp,off++*sizeof(jdouble)),xmm6);
  1.3708 -    movflt(Address(rsp,off++*sizeof(jdouble)),xmm7);
  1.3709 -  } else if (UseSSE >= 2)  {
  1.3710 -#ifdef COMPILER2
  1.3711 -    if (MaxVectorSize > 16) {
  1.3712 -      assert(UseAVX > 0, "256bit vectors are supported only with AVX");
  1.3713 -      // Save upper half of YMM registes
  1.3714 -      subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
  1.3715 -      vextractf128h(Address(rsp,  0),xmm0);
  1.3716 -      vextractf128h(Address(rsp, 16),xmm1);
  1.3717 -      vextractf128h(Address(rsp, 32),xmm2);
  1.3718 -      vextractf128h(Address(rsp, 48),xmm3);
  1.3719 -      vextractf128h(Address(rsp, 64),xmm4);
  1.3720 -      vextractf128h(Address(rsp, 80),xmm5);
  1.3721 -      vextractf128h(Address(rsp, 96),xmm6);
  1.3722 -      vextractf128h(Address(rsp,112),xmm7);
  1.3723 -#ifdef _LP64
  1.3724 -      vextractf128h(Address(rsp,128),xmm8);
  1.3725 -      vextractf128h(Address(rsp,144),xmm9);
  1.3726 -      vextractf128h(Address(rsp,160),xmm10);
  1.3727 -      vextractf128h(Address(rsp,176),xmm11);
  1.3728 -      vextractf128h(Address(rsp,192),xmm12);
  1.3729 -      vextractf128h(Address(rsp,208),xmm13);
  1.3730 -      vextractf128h(Address(rsp,224),xmm14);
  1.3731 -      vextractf128h(Address(rsp,240),xmm15);
  1.3732 -#endif
  1.3733 -    }
  1.3734 -#endif
  1.3735 -    // Save whole 128bit (16 bytes) XMM regiters
  1.3736 -    subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
  1.3737 -    movdqu(Address(rsp,off++*16),xmm0);
  1.3738 -    movdqu(Address(rsp,off++*16),xmm1);
  1.3739 -    movdqu(Address(rsp,off++*16),xmm2);
  1.3740 -    movdqu(Address(rsp,off++*16),xmm3);
  1.3741 -    movdqu(Address(rsp,off++*16),xmm4);
  1.3742 -    movdqu(Address(rsp,off++*16),xmm5);
  1.3743 -    movdqu(Address(rsp,off++*16),xmm6);
  1.3744 -    movdqu(Address(rsp,off++*16),xmm7);
  1.3745 -#ifdef _LP64
  1.3746 -    movdqu(Address(rsp,off++*16),xmm8);
  1.3747 -    movdqu(Address(rsp,off++*16),xmm9);
  1.3748 -    movdqu(Address(rsp,off++*16),xmm10);
  1.3749 -    movdqu(Address(rsp,off++*16),xmm11);
  1.3750 -    movdqu(Address(rsp,off++*16),xmm12);
  1.3751 -    movdqu(Address(rsp,off++*16),xmm13);
  1.3752 -    movdqu(Address(rsp,off++*16),xmm14);
  1.3753 -    movdqu(Address(rsp,off++*16),xmm15);
  1.3754 -#endif
  1.3755 -  }
  1.3756 -
  1.3757 -  // Preserve registers across runtime call
  1.3758 -  int incoming_argument_and_return_value_offset = -1;
  1.3759 -  if (num_fpu_regs_in_use > 1) {
  1.3760 -    // Must preserve all other FPU regs (could alternatively convert
  1.3761 -    // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash
  1.3762 -    // FPU state, but can not trust C compiler)
  1.3763 -    NEEDS_CLEANUP;
  1.3764 -    // NOTE that in this case we also push the incoming argument(s) to
  1.3765 -    // the stack and restore it later; we also use this stack slot to
  1.3766 -    // hold the return value from dsin, dcos etc.
  1.3767 -    for (int i = 0; i < num_fpu_regs_in_use; i++) {
  1.3768 -      subptr(rsp, sizeof(jdouble));
  1.3769 -      fstp_d(Address(rsp, 0));
  1.3770 -    }
  1.3771 -    incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
  1.3772 -    for (int i = nb_args-1; i >= 0; i--) {
  1.3773 -      fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble)));
  1.3774 -    }
  1.3775 -  }
  1.3776 -
  1.3777 -  subptr(rsp, nb_args*sizeof(jdouble));
  1.3778 -  for (int i = 0; i < nb_args; i++) {
  1.3779 -    fstp_d(Address(rsp, i*sizeof(jdouble)));
  1.3780 -  }
  1.3781 -
  1.3782 -#ifdef _LP64
  1.3783 -  if (nb_args > 0) {
  1.3784 -    movdbl(xmm0, Address(rsp, 0));
  1.3785 -  }
  1.3786 -  if (nb_args > 1) {
  1.3787 -    movdbl(xmm1, Address(rsp, sizeof(jdouble)));
  1.3788 -  }
  1.3789 -  assert(nb_args <= 2, "unsupported number of args");
  1.3790 -#endif // _LP64
  1.3791 -
  1.3792 -  // NOTE: we must not use call_VM_leaf here because that requires a
  1.3793 -  // complete interpreter frame in debug mode -- same bug as 4387334
  1.3794 -  // MacroAssembler::call_VM_leaf_base is perfectly safe and will
  1.3795 -  // do proper 64bit abi
  1.3796 -
  1.3797 -  NEEDS_CLEANUP;
  1.3798 -  // Need to add stack banging before this runtime call if it needs to
  1.3799 -  // be taken; however, there is no generic stack banging routine at
  1.3800 -  // the MacroAssembler level
  1.3801 -
  1.3802 -  MacroAssembler::call_VM_leaf_base(runtime_entry, 0);
  1.3803 -
  1.3804 -#ifdef _LP64
  1.3805 -  movsd(Address(rsp, 0), xmm0);
  1.3806 -  fld_d(Address(rsp, 0));
  1.3807 -#endif // _LP64
  1.3808 -  addptr(rsp, sizeof(jdouble) * nb_args);
  1.3809 -  if (num_fpu_regs_in_use > 1) {
  1.3810 -    // Must save return value to stack and then restore entire FPU
  1.3811 -    // stack except incoming arguments
  1.3812 -    fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
  1.3813 -    for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) {
  1.3814 -      fld_d(Address(rsp, 0));
  1.3815 -      addptr(rsp, sizeof(jdouble));
  1.3816 -    }
  1.3817 -    fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble)));
  1.3818 -    addptr(rsp, sizeof(jdouble) * nb_args);
  1.3819 -  }
  1.3820 -
  1.3821 -  off = 0;
  1.3822 -  if (UseSSE == 1)  {
  1.3823 -    movflt(xmm0, Address(rsp,off++*sizeof(jdouble)));
  1.3824 -    movflt(xmm1, Address(rsp,off++*sizeof(jdouble)));
  1.3825 -    movflt(xmm2, Address(rsp,off++*sizeof(jdouble)));
  1.3826 -    movflt(xmm3, Address(rsp,off++*sizeof(jdouble)));
  1.3827 -    movflt(xmm4, Address(rsp,off++*sizeof(jdouble)));
  1.3828 -    movflt(xmm5, Address(rsp,off++*sizeof(jdouble)));
  1.3829 -    movflt(xmm6, Address(rsp,off++*sizeof(jdouble)));
  1.3830 -    movflt(xmm7, Address(rsp,off++*sizeof(jdouble)));
  1.3831 -    addptr(rsp, sizeof(jdouble)*8);
  1.3832 -  } else if (UseSSE >= 2)  {
  1.3833 -    // Restore whole 128bit (16 bytes) XMM regiters
  1.3834 -    movdqu(xmm0, Address(rsp,off++*16));
  1.3835 -    movdqu(xmm1, Address(rsp,off++*16));
  1.3836 -    movdqu(xmm2, Address(rsp,off++*16));
  1.3837 -    movdqu(xmm3, Address(rsp,off++*16));
  1.3838 -    movdqu(xmm4, Address(rsp,off++*16));
  1.3839 -    movdqu(xmm5, Address(rsp,off++*16));
  1.3840 -    movdqu(xmm6, Address(rsp,off++*16));
  1.3841 -    movdqu(xmm7, Address(rsp,off++*16));
  1.3842 -#ifdef _LP64
  1.3843 -    movdqu(xmm8, Address(rsp,off++*16));
  1.3844 -    movdqu(xmm9, Address(rsp,off++*16));
  1.3845 -    movdqu(xmm10, Address(rsp,off++*16));
  1.3846 -    movdqu(xmm11, Address(rsp,off++*16));
  1.3847 -    movdqu(xmm12, Address(rsp,off++*16));
  1.3848 -    movdqu(xmm13, Address(rsp,off++*16));
  1.3849 -    movdqu(xmm14, Address(rsp,off++*16));
  1.3850 -    movdqu(xmm15, Address(rsp,off++*16));
  1.3851 -#endif
  1.3852 -    addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
  1.3853 -#ifdef COMPILER2
  1.3854 -    if (MaxVectorSize > 16) {
  1.3855 -      // Restore upper half of YMM registes.
  1.3856 -      vinsertf128h(xmm0, Address(rsp,  0));
  1.3857 -      vinsertf128h(xmm1, Address(rsp, 16));
  1.3858 -      vinsertf128h(xmm2, Address(rsp, 32));
  1.3859 -      vinsertf128h(xmm3, Address(rsp, 48));
  1.3860 -      vinsertf128h(xmm4, Address(rsp, 64));
  1.3861 -      vinsertf128h(xmm5, Address(rsp, 80));
  1.3862 -      vinsertf128h(xmm6, Address(rsp, 96));
  1.3863 -      vinsertf128h(xmm7, Address(rsp,112));
  1.3864 -#ifdef _LP64
  1.3865 -      vinsertf128h(xmm8, Address(rsp,128));
  1.3866 -      vinsertf128h(xmm9, Address(rsp,144));
  1.3867 -      vinsertf128h(xmm10, Address(rsp,160));
  1.3868 -      vinsertf128h(xmm11, Address(rsp,176));
  1.3869 -      vinsertf128h(xmm12, Address(rsp,192));
  1.3870 -      vinsertf128h(xmm13, Address(rsp,208));
  1.3871 -      vinsertf128h(xmm14, Address(rsp,224));
  1.3872 -      vinsertf128h(xmm15, Address(rsp,240));
  1.3873 -#endif
  1.3874 -      addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
  1.3875 -    }
  1.3876 -#endif
  1.3877 -  }
  1.3878 -  popa();
  1.3879 -}
  1.3880 -
  1.3881 -static const double     pi_4 =  0.7853981633974483;
  1.3882 -
  1.3883 -void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
  1.3884 -  // A hand-coded argument reduction for values in fabs(pi/4, pi/2)
  1.3885 -  // was attempted in this code; unfortunately it appears that the
  1.3886 -  // switch to 80-bit precision and back causes this to be
  1.3887 -  // unprofitable compared with simply performing a runtime call if
  1.3888 -  // the argument is out of the (-pi/4, pi/4) range.
  1.3889 -
  1.3890 -  Register tmp = noreg;
  1.3891 -  if (!VM_Version::supports_cmov()) {
  1.3892 -    // fcmp needs a temporary so preserve rbx,
  1.3893 -    tmp = rbx;
  1.3894 -    push(tmp);
  1.3895 -  }
  1.3896 -
  1.3897 -  Label slow_case, done;
  1.3898 -
  1.3899 -  ExternalAddress pi4_adr = (address)&pi_4;
  1.3900 -  if (reachable(pi4_adr)) {
  1.3901 -    // x ?<= pi/4
  1.3902 -    fld_d(pi4_adr);
  1.3903 -    fld_s(1);                // Stack:  X  PI/4  X
  1.3904 -    fabs();                  // Stack: |X| PI/4  X
  1.3905 -    fcmp(tmp);
  1.3906 -    jcc(Assembler::above, slow_case);
  1.3907 -
  1.3908 -    // fastest case: -pi/4 <= x <= pi/4
  1.3909 -    switch(trig) {
  1.3910 -    case 's':
  1.3911 -      fsin();
  1.3912 -      break;
  1.3913 -    case 'c':
  1.3914 -      fcos();
  1.3915 -      break;
  1.3916 -    case 't':
  1.3917 -      ftan();
  1.3918 -      break;
  1.3919 -    default:
  1.3920 -      assert(false, "bad intrinsic");
  1.3921 -      break;
  1.3922 -    }
  1.3923 -    jmp(done);
  1.3924 -  }
  1.3925 -
  1.3926 -  // slow case: runtime call
  1.3927 -  bind(slow_case);
  1.3928 -
  1.3929 -  switch(trig) {
  1.3930 -  case 's':
  1.3931 -    {
  1.3932 -      fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use);
  1.3933 -    }
  1.3934 -    break;
  1.3935 -  case 'c':
  1.3936 -    {
  1.3937 -      fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use);
  1.3938 -    }
  1.3939 -    break;
  1.3940 -  case 't':
  1.3941 -    {
  1.3942 -      fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use);
  1.3943 -    }
  1.3944 -    break;
  1.3945 -  default:
  1.3946 -    assert(false, "bad intrinsic");
  1.3947 -    break;
  1.3948 -  }
  1.3949 -
  1.3950 -  // Come here with result in F-TOS
  1.3951 -  bind(done);
  1.3952 -
  1.3953 -  if (tmp != noreg) {
  1.3954 -    pop(tmp);
  1.3955 -  }
  1.3956 -}
  1.3957 -
  1.3958 -
  1.3959 -// Look up the method for a megamorphic invokeinterface call.
  1.3960 -// The target method is determined by <intf_klass, itable_index>.
  1.3961 -// The receiver klass is in recv_klass.
  1.3962 -// On success, the result will be in method_result, and execution falls through.
  1.3963 -// On failure, execution transfers to the given label.
  1.3964 -void MacroAssembler::lookup_interface_method(Register recv_klass,
  1.3965 -                                             Register intf_klass,
  1.3966 -                                             RegisterOrConstant itable_index,
  1.3967 -                                             Register method_result,
  1.3968 -                                             Register scan_temp,
  1.3969 -                                             Label& L_no_such_interface) {
  1.3970 -  assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
  1.3971 -  assert(itable_index.is_constant() || itable_index.as_register() == method_result,
  1.3972 -         "caller must use same register for non-constant itable index as for method");
  1.3973 -
  1.3974 -  // Compute start of first itableOffsetEntry (which is at the end of the vtable)
  1.3975 -  int vtable_base = InstanceKlass::vtable_start_offset() * wordSize;
  1.3976 -  int itentry_off = itableMethodEntry::method_offset_in_bytes();
  1.3977 -  int scan_step   = itableOffsetEntry::size() * wordSize;
  1.3978 -  int vte_size    = vtableEntry::size() * wordSize;
  1.3979 -  Address::ScaleFactor times_vte_scale = Address::times_ptr;
  1.3980 -  assert(vte_size == wordSize, "else adjust times_vte_scale");
  1.3981 -
  1.3982 -  movl(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize));
  1.3983 -
  1.3984 -  // %%% Could store the aligned, prescaled offset in the klassoop.
  1.3985 -  lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base));
  1.3986 -  if (HeapWordsPerLong > 1) {
  1.3987 -    // Round up to align_object_offset boundary
  1.3988 -    // see code for InstanceKlass::start_of_itable!
  1.3989 -    round_to(scan_temp, BytesPerLong);
  1.3990 -  }
  1.3991 -
  1.3992 -  // Adjust recv_klass by scaled itable_index, so we can free itable_index.
  1.3993 -  assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
  1.3994 -  lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off));
  1.3995 -
  1.3996 -  // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {
  1.3997 -  //   if (scan->interface() == intf) {
  1.3998 -  //     result = (klass + scan->offset() + itable_index);
  1.3999 -  //   }
  1.4000 -  // }
  1.4001 -  Label search, found_method;
  1.4002 -
  1.4003 -  for (int peel = 1; peel >= 0; peel--) {
  1.4004 -    movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
  1.4005 -    cmpptr(intf_klass, method_result);
  1.4006 -
  1.4007 -    if (peel) {
  1.4008 -      jccb(Assembler::equal, found_method);
  1.4009 -    } else {
  1.4010 -      jccb(Assembler::notEqual, search);
  1.4011 -      // (invert the test to fall through to found_method...)
  1.4012 -    }
  1.4013 -
  1.4014 -    if (!peel)  break;
  1.4015 -
  1.4016 -    bind(search);
  1.4017 -
  1.4018 -    // Check that the previous entry is non-null.  A null entry means that
  1.4019 -    // the receiver class doesn't implement the interface, and wasn't the
  1.4020 -    // same as when the caller was compiled.
  1.4021 -    testptr(method_result, method_result);
  1.4022 -    jcc(Assembler::zero, L_no_such_interface);
  1.4023 -    addptr(scan_temp, scan_step);
  1.4024 -  }
  1.4025 -
  1.4026 -  bind(found_method);
  1.4027 -
  1.4028 -  // Got a hit.
  1.4029 -  movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
  1.4030 -  movptr(method_result, Address(recv_klass, scan_temp, Address::times_1));
  1.4031 -}
  1.4032 -
  1.4033 -
  1.4034 -// virtual method calling
  1.4035 -void MacroAssembler::lookup_virtual_method(Register recv_klass,
  1.4036 -                                           RegisterOrConstant vtable_index,
  1.4037 -                                           Register method_result) {
  1.4038 -  const int base = InstanceKlass::vtable_start_offset() * wordSize;
  1.4039 -  assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
  1.4040 -  Address vtable_entry_addr(recv_klass,
  1.4041 -                            vtable_index, Address::times_ptr,
  1.4042 -                            base + vtableEntry::method_offset_in_bytes());
  1.4043 -  movptr(method_result, vtable_entry_addr);
  1.4044 -}
  1.4045 -
  1.4046 -
  1.4047 -void MacroAssembler::check_klass_subtype(Register sub_klass,
  1.4048 -                           Register super_klass,
  1.4049 -                           Register temp_reg,
  1.4050 -                           Label& L_success) {
  1.4051 -  Label L_failure;
  1.4052 -  check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_failure, NULL);
  1.4053 -  check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
  1.4054 -  bind(L_failure);
  1.4055 -}
  1.4056 -
  1.4057 -
  1.4058 -void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
  1.4059 -                                                   Register super_klass,
  1.4060 -                                                   Register temp_reg,
  1.4061 -                                                   Label* L_success,
  1.4062 -                                                   Label* L_failure,
  1.4063 -                                                   Label* L_slow_path,
  1.4064 -                                        RegisterOrConstant super_check_offset) {
  1.4065 -  assert_different_registers(sub_klass, super_klass, temp_reg);
  1.4066 -  bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
  1.4067 -  if (super_check_offset.is_register()) {
  1.4068 -    assert_different_registers(sub_klass, super_klass,
  1.4069 -                               super_check_offset.as_register());
  1.4070 -  } else if (must_load_sco) {
  1.4071 -    assert(temp_reg != noreg, "supply either a temp or a register offset");
  1.4072 -  }
  1.4073 -
  1.4074 -  Label L_fallthrough;
  1.4075 -  int label_nulls = 0;
  1.4076 -  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
  1.4077 -  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
  1.4078 -  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
  1.4079 -  assert(label_nulls <= 1, "at most one NULL in the batch");
  1.4080 -
  1.4081 -  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
  1.4082 -  int sco_offset = in_bytes(Klass::super_check_offset_offset());
  1.4083 -  Address super_check_offset_addr(super_klass, sco_offset);
  1.4084 -
  1.4085 -  // Hacked jcc, which "knows" that L_fallthrough, at least, is in
  1.4086 -  // range of a jccb.  If this routine grows larger, reconsider at
  1.4087 -  // least some of these.
  1.4088 -#define local_jcc(assembler_cond, label)                                \
  1.4089 -  if (&(label) == &L_fallthrough)  jccb(assembler_cond, label);         \
  1.4090 -  else                             jcc( assembler_cond, label) /*omit semi*/
  1.4091 -
  1.4092 -  // Hacked jmp, which may only be used just before L_fallthrough.
  1.4093 -#define final_jmp(label)                                                \
  1.4094 -  if (&(label) == &L_fallthrough) { /*do nothing*/ }                    \
  1.4095 -  else                            jmp(label)                /*omit semi*/
  1.4096 -
  1.4097 -  // If the pointers are equal, we are done (e.g., String[] elements).
  1.4098 -  // This self-check enables sharing of secondary supertype arrays among
  1.4099 -  // non-primary types such as array-of-interface.  Otherwise, each such
  1.4100 -  // type would need its own customized SSA.
  1.4101 -  // We move this check to the front of the fast path because many
  1.4102 -  // type checks are in fact trivially successful in this manner,
  1.4103 -  // so we get a nicely predicted branch right at the start of the check.
  1.4104 -  cmpptr(sub_klass, super_klass);
  1.4105 -  local_jcc(Assembler::equal, *L_success);
  1.4106 -
  1.4107 -  // Check the supertype display:
  1.4108 -  if (must_load_sco) {
  1.4109 -    // Positive movl does right thing on LP64.
  1.4110 -    movl(temp_reg, super_check_offset_addr);
  1.4111 -    super_check_offset = RegisterOrConstant(temp_reg);
  1.4112 -  }
  1.4113 -  Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
  1.4114 -  cmpptr(super_klass, super_check_addr); // load displayed supertype
  1.4115 -
  1.4116 -  // This check has worked decisively for primary supers.
  1.4117 -  // Secondary supers are sought in the super_cache ('super_cache_addr').
  1.4118 -  // (Secondary supers are interfaces and very deeply nested subtypes.)
  1.4119 -  // This works in the same check above because of a tricky aliasing
  1.4120 -  // between the super_cache and the primary super display elements.
  1.4121 -  // (The 'super_check_addr' can address either, as the case requires.)
  1.4122 -  // Note that the cache is updated below if it does not help us find
  1.4123 -  // what we need immediately.
  1.4124 -  // So if it was a primary super, we can just fail immediately.
  1.4125 -  // Otherwise, it's the slow path for us (no success at this point).
  1.4126 -
  1.4127 -  if (super_check_offset.is_register()) {
  1.4128 -    local_jcc(Assembler::equal, *L_success);
  1.4129 -    cmpl(super_check_offset.as_register(), sc_offset);
  1.4130 -    if (L_failure == &L_fallthrough) {
  1.4131 -      local_jcc(Assembler::equal, *L_slow_path);
  1.4132 -    } else {
  1.4133 -      local_jcc(Assembler::notEqual, *L_failure);
  1.4134 -      final_jmp(*L_slow_path);
  1.4135 -    }
  1.4136 -  } else if (super_check_offset.as_constant() == sc_offset) {
  1.4137 -    // Need a slow path; fast failure is impossible.
  1.4138 -    if (L_slow_path == &L_fallthrough) {
  1.4139 -      local_jcc(Assembler::equal, *L_success);
  1.4140 -    } else {
  1.4141 -      local_jcc(Assembler::notEqual, *L_slow_path);
  1.4142 -      final_jmp(*L_success);
  1.4143 -    }
  1.4144 -  } else {
  1.4145 -    // No slow path; it's a fast decision.
  1.4146 -    if (L_failure == &L_fallthrough) {
  1.4147 -      local_jcc(Assembler::equal, *L_success);
  1.4148 -    } else {
  1.4149 -      local_jcc(Assembler::notEqual, *L_failure);
  1.4150 -      final_jmp(*L_success);
  1.4151 -    }
  1.4152 -  }
  1.4153 -
  1.4154 -  bind(L_fallthrough);
  1.4155 -
  1.4156 -#undef local_jcc
  1.4157 -#undef final_jmp
  1.4158 -}
  1.4159 -
  1.4160 -
  1.4161 -void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
  1.4162 -                                                   Register super_klass,
  1.4163 -                                                   Register temp_reg,
  1.4164 -                                                   Register temp2_reg,
  1.4165 -                                                   Label* L_success,
  1.4166 -                                                   Label* L_failure,
  1.4167 -                                                   bool set_cond_codes) {
  1.4168 -  assert_different_registers(sub_klass, super_klass, temp_reg);
  1.4169 -  if (temp2_reg != noreg)
  1.4170 -    assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
  1.4171 -#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
  1.4172 -
  1.4173 -  Label L_fallthrough;
  1.4174 -  int label_nulls = 0;
  1.4175 -  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
  1.4176 -  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
  1.4177 -  assert(label_nulls <= 1, "at most one NULL in the batch");
  1.4178 -
  1.4179 -  // a couple of useful fields in sub_klass:
  1.4180 -  int ss_offset = in_bytes(Klass::secondary_supers_offset());
  1.4181 -  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
  1.4182 -  Address secondary_supers_addr(sub_klass, ss_offset);
  1.4183 -  Address super_cache_addr(     sub_klass, sc_offset);
  1.4184 -
  1.4185 -  // Do a linear scan of the secondary super-klass chain.
  1.4186 -  // This code is rarely used, so simplicity is a virtue here.
  1.4187 -  // The repne_scan instruction uses fixed registers, which we must spill.
  1.4188 -  // Don't worry too much about pre-existing connections with the input regs.
  1.4189 -
  1.4190 -  assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super)
  1.4191 -  assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter)
  1.4192 -
  1.4193 -  // Get super_klass value into rax (even if it was in rdi or rcx).
  1.4194 -  bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false;
  1.4195 -  if (super_klass != rax || UseCompressedOops) {
  1.4196 -    if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; }
  1.4197 -    mov(rax, super_klass);
  1.4198 -  }
  1.4199 -  if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; }
  1.4200 -  if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; }
  1.4201 -
  1.4202 -#ifndef PRODUCT
  1.4203 -  int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
  1.4204 -  ExternalAddress pst_counter_addr((address) pst_counter);
  1.4205 -  NOT_LP64(  incrementl(pst_counter_addr) );
  1.4206 -  LP64_ONLY( lea(rcx, pst_counter_addr) );
  1.4207 -  LP64_ONLY( incrementl(Address(rcx, 0)) );
  1.4208 -#endif //PRODUCT
  1.4209 -
  1.4210 -  // We will consult the secondary-super array.
  1.4211 -  movptr(rdi, secondary_supers_addr);
  1.4212 -  // Load the array length.  (Positive movl does right thing on LP64.)
  1.4213 -  movl(rcx, Address(rdi, Array<Klass*>::length_offset_in_bytes()));
  1.4214 -  // Skip to start of data.
  1.4215 -  addptr(rdi, Array<Klass*>::base_offset_in_bytes());
  1.4216 -
  1.4217 -  // Scan RCX words at [RDI] for an occurrence of RAX.
  1.4218 -  // Set NZ/Z based on last compare.
  1.4219 -  // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does
  1.4220 -  // not change flags (only scas instruction which is repeated sets flags).
  1.4221 -  // Set Z = 0 (not equal) before 'repne' to indicate that class was not found.
  1.4222 -
  1.4223 -    testptr(rax,rax); // Set Z = 0
  1.4224 -    repne_scan();
  1.4225 -
  1.4226 -  // Unspill the temp. registers:
  1.4227 -  if (pushed_rdi)  pop(rdi);
  1.4228 -  if (pushed_rcx)  pop(rcx);
  1.4229 -  if (pushed_rax)  pop(rax);
  1.4230 -
  1.4231 -  if (set_cond_codes) {
  1.4232 -    // Special hack for the AD files:  rdi is guaranteed non-zero.
  1.4233 -    assert(!pushed_rdi, "rdi must be left non-NULL");
  1.4234 -    // Also, the condition codes are properly set Z/NZ on succeed/failure.
  1.4235 -  }
  1.4236 -
  1.4237 -  if (L_failure == &L_fallthrough)
  1.4238 -        jccb(Assembler::notEqual, *L_failure);
  1.4239 -  else  jcc(Assembler::notEqual, *L_failure);
  1.4240 -
  1.4241 -  // Success.  Cache the super we found and proceed in triumph.
  1.4242 -  movptr(super_cache_addr, super_klass);
  1.4243 -
  1.4244 -  if (L_success != &L_fallthrough) {
  1.4245 -    jmp(*L_success);
  1.4246 -  }
  1.4247 -
  1.4248 -#undef IS_A_TEMP
  1.4249 -
  1.4250 -  bind(L_fallthrough);
  1.4251 -}
  1.4252 -
  1.4253 -
  1.4254 -void MacroAssembler::cmov32(Condition cc, Register dst, Address src) {
  1.4255 -  if (VM_Version::supports_cmov()) {
  1.4256 -    cmovl(cc, dst, src);
  1.4257 -  } else {
  1.4258 -    Label L;
  1.4259 -    jccb(negate_condition(cc), L);
  1.4260 -    movl(dst, src);
  1.4261 -    bind(L);
  1.4262 -  }
  1.4263 -}
  1.4264 -
  1.4265 -void MacroAssembler::cmov32(Condition cc, Register dst, Register src) {
  1.4266 -  if (VM_Version::supports_cmov()) {
  1.4267 -    cmovl(cc, dst, src);
  1.4268 -  } else {
  1.4269 -    Label L;
  1.4270 -    jccb(negate_condition(cc), L);
  1.4271 -    movl(dst, src);
  1.4272 -    bind(L);
  1.4273 -  }
  1.4274 -}
  1.4275 -
  1.4276 -void MacroAssembler::verify_oop(Register reg, const char* s) {
  1.4277 -  if (!VerifyOops) return;
  1.4278 -
  1.4279 -  // Pass register number to verify_oop_subroutine
  1.4280 -  char* b = new char[strlen(s) + 50];
  1.4281 -  sprintf(b, "verify_oop: %s: %s", reg->name(), s);
  1.4282 -  BLOCK_COMMENT("verify_oop {");
  1.4283 -#ifdef _LP64
  1.4284 -  push(rscratch1);                    // save r10, trashed by movptr()
  1.4285 -#endif
  1.4286 -  push(rax);                          // save rax,
  1.4287 -  push(reg);                          // pass register argument
  1.4288 -  ExternalAddress buffer((address) b);
  1.4289 -  // avoid using pushptr, as it modifies scratch registers
  1.4290 -  // and our contract is not to modify anything
  1.4291 -  movptr(rax, buffer.addr());
  1.4292 -  push(rax);
  1.4293 -  // call indirectly to solve generation ordering problem
  1.4294 -  movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
  1.4295 -  call(rax);
  1.4296 -  // Caller pops the arguments (oop, message) and restores rax, r10
  1.4297 -  BLOCK_COMMENT("} verify_oop");
  1.4298 -}
  1.4299 -
  1.4300 -
  1.4301 -RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
  1.4302 -                                                      Register tmp,
  1.4303 -                                                      int offset) {
  1.4304 -  intptr_t value = *delayed_value_addr;
  1.4305 -  if (value != 0)
  1.4306 -    return RegisterOrConstant(value + offset);
  1.4307 -
  1.4308 -  // load indirectly to solve generation ordering problem
  1.4309 -  movptr(tmp, ExternalAddress((address) delayed_value_addr));
  1.4310 -
  1.4311 -#ifdef ASSERT
  1.4312 -  { Label L;
  1.4313 -    testptr(tmp, tmp);
  1.4314 -    if (WizardMode) {
  1.4315 -      jcc(Assembler::notZero, L);
  1.4316 -      char* buf = new char[40];
  1.4317 -      sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]);
  1.4318 -      STOP(buf);
  1.4319 -    } else {
  1.4320 -      jccb(Assembler::notZero, L);
  1.4321 -      hlt();
  1.4322 -    }
  1.4323 -    bind(L);
  1.4324 -  }
  1.4325 -#endif
  1.4326 -
  1.4327 -  if (offset != 0)
  1.4328 -    addptr(tmp, offset);
  1.4329 -
  1.4330 -  return RegisterOrConstant(tmp);
  1.4331 -}
  1.4332 -
  1.4333 -
  1.4334 -Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
  1.4335 -                                         int extra_slot_offset) {
  1.4336 -  // cf. TemplateTable::prepare_invoke(), if (load_receiver).
  1.4337 -  int stackElementSize = Interpreter::stackElementSize;
  1.4338 -  int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
  1.4339 -#ifdef ASSERT
  1.4340 -  int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
  1.4341 -  assert(offset1 - offset == stackElementSize, "correct arithmetic");
  1.4342 -#endif
  1.4343 -  Register             scale_reg    = noreg;
  1.4344 -  Address::ScaleFactor scale_factor = Address::no_scale;
  1.4345 -  if (arg_slot.is_constant()) {
  1.4346 -    offset += arg_slot.as_constant() * stackElementSize;
  1.4347 -  } else {
  1.4348 -    scale_reg    = arg_slot.as_register();
  1.4349 -    scale_factor = Address::times(stackElementSize);
  1.4350 -  }
  1.4351 -  offset += wordSize;           // return PC is on stack
  1.4352 -  return Address(rsp, scale_reg, scale_factor, offset);
  1.4353 -}
  1.4354 -
  1.4355 -
  1.4356 -void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
  1.4357 -  if (!VerifyOops) return;
  1.4358 -
  1.4359 -  // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord);
  1.4360 -  // Pass register number to verify_oop_subroutine
  1.4361 -  char* b = new char[strlen(s) + 50];
  1.4362 -  sprintf(b, "verify_oop_addr: %s", s);
  1.4363 -
  1.4364 -#ifdef _LP64
  1.4365 -  push(rscratch1);                    // save r10, trashed by movptr()
  1.4366 -#endif
  1.4367 -  push(rax);                          // save rax,
  1.4368 -  // addr may contain rsp so we will have to adjust it based on the push
  1.4369 -  // we just did (and on 64 bit we do two pushes)
  1.4370 -  // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which
  1.4371 -  // stores rax into addr which is backwards of what was intended.
  1.4372 -  if (addr.uses(rsp)) {
  1.4373 -    lea(rax, addr);
  1.4374 -    pushptr(Address(rax, LP64_ONLY(2 *) BytesPerWord));
  1.4375 -  } else {
  1.4376 -    pushptr(addr);
  1.4377 -  }
  1.4378 -
  1.4379 -  ExternalAddress buffer((address) b);
  1.4380 -  // pass msg argument
  1.4381 -  // avoid using pushptr, as it modifies scratch registers
  1.4382 -  // and our contract is not to modify anything
  1.4383 -  movptr(rax, buffer.addr());
  1.4384 -  push(rax);
  1.4385 -
  1.4386 -  // call indirectly to solve generation ordering problem
  1.4387 -  movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
  1.4388 -  call(rax);
  1.4389 -  // Caller pops the arguments (addr, message) and restores rax, r10.
  1.4390 -}
  1.4391 -
  1.4392 -void MacroAssembler::verify_tlab() {
  1.4393 -#ifdef ASSERT
  1.4394 -  if (UseTLAB && VerifyOops) {
  1.4395 -    Label next, ok;
  1.4396 -    Register t1 = rsi;
  1.4397 -    Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread);
  1.4398 -
  1.4399 -    push(t1);
  1.4400 -    NOT_LP64(push(thread_reg));
  1.4401 -    NOT_LP64(get_thread(thread_reg));
  1.4402 -
  1.4403 -    movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
  1.4404 -    cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
  1.4405 -    jcc(Assembler::aboveEqual, next);
  1.4406 -    STOP("assert(top >= start)");
  1.4407 -    should_not_reach_here();
  1.4408 -
  1.4409 -    bind(next);
  1.4410 -    movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
  1.4411 -    cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
  1.4412 -    jcc(Assembler::aboveEqual, ok);
  1.4413 -    STOP("assert(top <= end)");
  1.4414 -    should_not_reach_here();
  1.4415 -
  1.4416 -    bind(ok);
  1.4417 -    NOT_LP64(pop(thread_reg));
  1.4418 -    pop(t1);
  1.4419 -  }
  1.4420 -#endif
  1.4421 -}
  1.4422 -
  1.4423 -class ControlWord {
  1.4424 - public:
  1.4425 -  int32_t _value;
  1.4426 -
  1.4427 -  int  rounding_control() const        { return  (_value >> 10) & 3      ; }
  1.4428 -  int  precision_control() const       { return  (_value >>  8) & 3      ; }
  1.4429 -  bool precision() const               { return ((_value >>  5) & 1) != 0; }
  1.4430 -  bool underflow() const               { return ((_value >>  4) & 1) != 0; }
  1.4431 -  bool overflow() const                { return ((_value >>  3) & 1) != 0; }
  1.4432 -  bool zero_divide() const             { return ((_value >>  2) & 1) != 0; }
  1.4433 -  bool denormalized() const            { return ((_value >>  1) & 1) != 0; }
  1.4434 -  bool invalid() const                 { return ((_value >>  0) & 1) != 0; }
  1.4435 -
  1.4436 -  void print() const {
  1.4437 -    // rounding control
  1.4438 -    const char* rc;
  1.4439 -    switch (rounding_control()) {
  1.4440 -      case 0: rc = "round near"; break;
  1.4441 -      case 1: rc = "round down"; break;
  1.4442 -      case 2: rc = "round up  "; break;
  1.4443 -      case 3: rc = "chop      "; break;
  1.4444 -    };
  1.4445 -    // precision control
  1.4446 -    const char* pc;
  1.4447 -    switch (precision_control()) {
  1.4448 -      case 0: pc = "24 bits "; break;
  1.4449 -      case 1: pc = "reserved"; break;
  1.4450 -      case 2: pc = "53 bits "; break;
  1.4451 -      case 3: pc = "64 bits "; break;
  1.4452 -    };
  1.4453 -    // flags
  1.4454 -    char f[9];
  1.4455 -    f[0] = ' ';
  1.4456 -    f[1] = ' ';
  1.4457 -    f[2] = (precision   ()) ? 'P' : 'p';
  1.4458 -    f[3] = (underflow   ()) ? 'U' : 'u';
  1.4459 -    f[4] = (overflow    ()) ? 'O' : 'o';
  1.4460 -    f[5] = (zero_divide ()) ? 'Z' : 'z';
  1.4461 -    f[6] = (denormalized()) ? 'D' : 'd';
  1.4462 -    f[7] = (invalid     ()) ? 'I' : 'i';
  1.4463 -    f[8] = '\x0';
  1.4464 -    // output
  1.4465 -    printf("%04x  masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc);
  1.4466 -  }
  1.4467 -
  1.4468 -};
  1.4469 -
  1.4470 -class StatusWord {
  1.4471 - public:
  1.4472 -  int32_t _value;
  1.4473 -
  1.4474 -  bool busy() const                    { return ((_value >> 15) & 1) != 0; }
  1.4475 -  bool C3() const                      { return ((_value >> 14) & 1) != 0; }
  1.4476 -  bool C2() const                      { return ((_value >> 10) & 1) != 0; }
  1.4477 -  bool C1() const                      { return ((_value >>  9) & 1) != 0; }
  1.4478 -  bool C0() const                      { return ((_value >>  8) & 1) != 0; }
  1.4479 -  int  top() const                     { return  (_value >> 11) & 7      ; }
  1.4480 -  bool error_status() const            { return ((_value >>  7) & 1) != 0; }
  1.4481 -  bool stack_fault() const             { return ((_value >>  6) & 1) != 0; }
  1.4482 -  bool precision() const               { return ((_value >>  5) & 1) != 0; }
  1.4483 -  bool underflow() const               { return ((_value >>  4) & 1) != 0; }
  1.4484 -  bool overflow() const                { return ((_value >>  3) & 1) != 0; }
  1.4485 -  bool zero_divide() const             { return ((_value >>  2) & 1) != 0; }
  1.4486 -  bool denormalized() const            { return ((_value >>  1) & 1) != 0; }
  1.4487 -  bool invalid() const                 { return ((_value >>  0) & 1) != 0; }
  1.4488 -
  1.4489 -  void print() const {
  1.4490 -    // condition codes
  1.4491 -    char c[5];
  1.4492 -    c[0] = (C3()) ? '3' : '-';
  1.4493 -    c[1] = (C2()) ? '2' : '-';
  1.4494 -    c[2] = (C1()) ? '1' : '-';
  1.4495 -    c[3] = (C0()) ? '0' : '-';
  1.4496 -    c[4] = '\x0';
  1.4497 -    // flags
  1.4498 -    char f[9];
  1.4499 -    f[0] = (error_status()) ? 'E' : '-';
  1.4500 -    f[1] = (stack_fault ()) ? 'S' : '-';
  1.4501 -    f[2] = (precision   ()) ? 'P' : '-';
  1.4502 -    f[3] = (underflow   ()) ? 'U' : '-';
  1.4503 -    f[4] = (overflow    ()) ? 'O' : '-';
  1.4504 -    f[5] = (zero_divide ()) ? 'Z' : '-';
  1.4505 -    f[6] = (denormalized()) ? 'D' : '-';
  1.4506 -    f[7] = (invalid     ()) ? 'I' : '-';
  1.4507 -    f[8] = '\x0';
  1.4508 -    // output
  1.4509 -    printf("%04x  flags = %s, cc =  %s, top = %d", _value & 0xFFFF, f, c, top());
  1.4510 -  }
  1.4511 -
  1.4512 -};
  1.4513 -
  1.4514 -class TagWord {
  1.4515 - public:
  1.4516 -  int32_t _value;
  1.4517 -
  1.4518 -  int tag_at(int i) const              { return (_value >> (i*2)) & 3; }
  1.4519 -
  1.4520 -  void print() const {
  1.4521 -    printf("%04x", _value & 0xFFFF);
  1.4522 -  }
  1.4523 -
  1.4524 -};
  1.4525 -
  1.4526 -class FPU_Register {
  1.4527 - public:
  1.4528 -  int32_t _m0;
  1.4529 -  int32_t _m1;
  1.4530 -  int16_t _ex;
  1.4531 -
  1.4532 -  bool is_indefinite() const           {
  1.4533 -    return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0;
  1.4534 -  }
  1.4535 -
  1.4536 -  void print() const {
  1.4537 -    char  sign = (_ex < 0) ? '-' : '+';
  1.4538 -    const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : "   ";
  1.4539 -    printf("%c%04hx.%08x%08x  %s", sign, _ex, _m1, _m0, kind);
  1.4540 -  };
  1.4541 -
  1.4542 -};
  1.4543 -
  1.4544 -class FPU_State {
  1.4545 - public:
  1.4546 -  enum {
  1.4547 -    register_size       = 10,
  1.4548 -    number_of_registers =  8,
  1.4549 -    register_mask       =  7
  1.4550 -  };
  1.4551 -
  1.4552 -  ControlWord  _control_word;
  1.4553 -  StatusWord   _status_word;
  1.4554 -  TagWord      _tag_word;
  1.4555 -  int32_t      _error_offset;
  1.4556 -  int32_t      _error_selector;
  1.4557 -  int32_t      _data_offset;
  1.4558 -  int32_t      _data_selector;
  1.4559 -  int8_t       _register[register_size * number_of_registers];
  1.4560 -
  1.4561 -  int tag_for_st(int i) const          { return _tag_word.tag_at((_status_word.top() + i) & register_mask); }
  1.4562 -  FPU_Register* st(int i) const        { return (FPU_Register*)&_register[register_size * i]; }
  1.4563 -
  1.4564 -  const char* tag_as_string(int tag) const {
  1.4565 -    switch (tag) {
  1.4566 -      case 0: return "valid";
  1.4567 -      case 1: return "zero";
  1.4568 -      case 2: return "special";
  1.4569 -      case 3: return "empty";
  1.4570 -    }
  1.4571 -    ShouldNotReachHere();
  1.4572 -    return NULL;
  1.4573 -  }
  1.4574 -
  1.4575 -  void print() const {
  1.4576 -    // print computation registers
  1.4577 -    { int t = _status_word.top();
  1.4578 -      for (int i = 0; i < number_of_registers; i++) {
  1.4579 -        int j = (i - t) & register_mask;
  1.4580 -        printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j);
  1.4581 -        st(j)->print();
  1.4582 -        printf(" %s\n", tag_as_string(_tag_word.tag_at(i)));
  1.4583 -      }
  1.4584 -    }
  1.4585 -    printf("\n");
  1.4586 -    // print control registers
  1.4587 -    printf("ctrl = "); _control_word.print(); printf("\n");
  1.4588 -    printf("stat = "); _status_word .print(); printf("\n");
  1.4589 -    printf("tags = "); _tag_word    .print(); printf("\n");
  1.4590 -  }
  1.4591 -
  1.4592 -};
  1.4593 -
  1.4594 -class Flag_Register {
  1.4595 - public:
  1.4596 -  int32_t _value;
  1.4597 -
  1.4598 -  bool overflow() const                { return ((_value >> 11) & 1) != 0; }
  1.4599 -  bool direction() const               { return ((_value >> 10) & 1) != 0; }
  1.4600 -  bool sign() const                    { return ((_value >>  7) & 1) != 0; }
  1.4601 -  bool zero() const                    { return ((_value >>  6) & 1) != 0; }
  1.4602 -  bool auxiliary_carry() const         { return ((_value >>  4) & 1) != 0; }
  1.4603 -  bool parity() const                  { return ((_value >>  2) & 1) != 0; }
  1.4604 -  bool carry() const                   { return ((_value >>  0) & 1) != 0; }
  1.4605 -
  1.4606 -  void print() const {
  1.4607 -    // flags
  1.4608 -    char f[8];
  1.4609 -    f[0] = (overflow       ()) ? 'O' : '-';
  1.4610 -    f[1] = (direction      ()) ? 'D' : '-';
  1.4611 -    f[2] = (sign           ()) ? 'S' : '-';
  1.4612 -    f[3] = (zero           ()) ? 'Z' : '-';
  1.4613 -    f[4] = (auxiliary_carry()) ? 'A' : '-';
  1.4614 -    f[5] = (parity         ()) ? 'P' : '-';
  1.4615 -    f[6] = (carry          ()) ? 'C' : '-';
  1.4616 -    f[7] = '\x0';
  1.4617 -    // output
  1.4618 -    printf("%08x  flags = %s", _value, f);
  1.4619 -  }
  1.4620 -
  1.4621 -};
  1.4622 -
  1.4623 -class IU_Register {
  1.4624 - public:
  1.4625 -  int32_t _value;
  1.4626 -
  1.4627 -  void print() const {
  1.4628 -    printf("%08x  %11d", _value, _value);
  1.4629 -  }
  1.4630 -
  1.4631 -};
  1.4632 -
  1.4633 -class IU_State {
  1.4634 - public:
  1.4635 -  Flag_Register _eflags;
  1.4636 -  IU_Register   _rdi;
  1.4637 -  IU_Register   _rsi;
  1.4638 -  IU_Register   _rbp;
  1.4639 -  IU_Register   _rsp;
  1.4640 -  IU_Register   _rbx;
  1.4641 -  IU_Register   _rdx;
  1.4642 -  IU_Register   _rcx;
  1.4643 -  IU_Register   _rax;
  1.4644 -
  1.4645 -  void print() const {
  1.4646 -    // computation registers
  1.4647 -    printf("rax,  = "); _rax.print(); printf("\n");
  1.4648 -    printf("rbx,  = "); _rbx.print(); printf("\n");
  1.4649 -    printf("rcx  = "); _rcx.print(); printf("\n");
  1.4650 -    printf("rdx  = "); _rdx.print(); printf("\n");
  1.4651 -    printf("rdi  = "); _rdi.print(); printf("\n");
  1.4652 -    printf("rsi  = "); _rsi.print(); printf("\n");
  1.4653 -    printf("rbp,  = "); _rbp.print(); printf("\n");
  1.4654 -    printf("rsp  = "); _rsp.print(); printf("\n");
  1.4655 -    printf("\n");
  1.4656 -    // control registers
  1.4657 -    printf("flgs = "); _eflags.print(); printf("\n");
  1.4658 -  }
  1.4659 -};
  1.4660 -
  1.4661 -
  1.4662 -class CPU_State {
  1.4663 - public:
  1.4664 -  FPU_State _fpu_state;
  1.4665 -  IU_State  _iu_state;
  1.4666 -
  1.4667 -  void print() const {
  1.4668 -    printf("--------------------------------------------------\n");
  1.4669 -    _iu_state .print();
  1.4670 -    printf("\n");
  1.4671 -    _fpu_state.print();
  1.4672 -    printf("--------------------------------------------------\n");
  1.4673 -  }
  1.4674 -
  1.4675 -};
  1.4676 -
  1.4677 -
  1.4678 -static void _print_CPU_state(CPU_State* state) {
  1.4679 -  state->print();
  1.4680 -};
  1.4681 -
  1.4682 -
  1.4683 -void MacroAssembler::print_CPU_state() {
  1.4684 -  push_CPU_state();
  1.4685 -  push(rsp);                // pass CPU state
  1.4686 -  call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state)));
  1.4687 -  addptr(rsp, wordSize);       // discard argument
  1.4688 -  pop_CPU_state();
  1.4689 -}
  1.4690 -
  1.4691 -
  1.4692 -static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) {
  1.4693 -  static int counter = 0;
  1.4694 -  FPU_State* fs = &state->_fpu_state;
  1.4695 -  counter++;
  1.4696 -  // For leaf calls, only verify that the top few elements remain empty.
  1.4697 -  // We only need 1 empty at the top for C2 code.
  1.4698 -  if( stack_depth < 0 ) {
  1.4699 -    if( fs->tag_for_st(7) != 3 ) {
  1.4700 -      printf("FPR7 not empty\n");
  1.4701 -      state->print();
  1.4702 -      assert(false, "error");
  1.4703 -      return false;
  1.4704 -    }
  1.4705 -    return true;                // All other stack states do not matter
  1.4706 -  }
  1.4707 -
  1.4708 -  assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std,
  1.4709 -         "bad FPU control word");
  1.4710 -
  1.4711 -  // compute stack depth
  1.4712 -  int i = 0;
  1.4713 -  while (i < FPU_State::number_of_registers && fs->tag_for_st(i)  < 3) i++;
  1.4714 -  int d = i;
  1.4715 -  while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++;
  1.4716 -  // verify findings
  1.4717 -  if (i != FPU_State::number_of_registers) {
  1.4718 -    // stack not contiguous
  1.4719 -    printf("%s: stack not contiguous at ST%d\n", s, i);
  1.4720 -    state->print();
  1.4721 -    assert(false, "error");
  1.4722 -    return false;
  1.4723 -  }
  1.4724 -  // check if computed stack depth corresponds to expected stack depth
  1.4725 -  if (stack_depth < 0) {
  1.4726 -    // expected stack depth is -stack_depth or less
  1.4727 -    if (d > -stack_depth) {
  1.4728 -      // too many elements on the stack
  1.4729 -      printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d);
  1.4730 -      state->print();
  1.4731 -      assert(false, "error");
  1.4732 -      return false;
  1.4733 -    }
  1.4734 -  } else {
  1.4735 -    // expected stack depth is stack_depth
  1.4736 -    if (d != stack_depth) {
  1.4737 -      // wrong stack depth
  1.4738 -      printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d);
  1.4739 -      state->print();
  1.4740 -      assert(false, "error");
  1.4741 -      return false;
  1.4742 -    }
  1.4743 -  }
  1.4744 -  // everything is cool
  1.4745 -  return true;
  1.4746 -}
  1.4747 -
  1.4748 -
  1.4749 -void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
  1.4750 -  if (!VerifyFPU) return;
  1.4751 -  push_CPU_state();
  1.4752 -  push(rsp);                // pass CPU state
  1.4753 -  ExternalAddress msg((address) s);
  1.4754 -  // pass message string s
  1.4755 -  pushptr(msg.addr());
  1.4756 -  push(stack_depth);        // pass stack depth
  1.4757 -  call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU)));
  1.4758 -  addptr(rsp, 3 * wordSize);   // discard arguments
  1.4759 -  // check for error
  1.4760 -  { Label L;
  1.4761 -    testl(rax, rax);
  1.4762 -    jcc(Assembler::notZero, L);
  1.4763 -    int3();                  // break if error condition
  1.4764 -    bind(L);
  1.4765 -  }
  1.4766 -  pop_CPU_state();
  1.4767 -}
  1.4768 -
  1.4769 -void MacroAssembler::load_klass(Register dst, Register src) {
  1.4770 -#ifdef _LP64
  1.4771 -  if (UseCompressedKlassPointers) {
  1.4772 -    movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
  1.4773 -    decode_klass_not_null(dst);
  1.4774 -  } else
  1.4775 -#endif
  1.4776 -    movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
  1.4777 -}
  1.4778 -
  1.4779 -void MacroAssembler::load_prototype_header(Register dst, Register src) {
  1.4780 -#ifdef _LP64
  1.4781 -  if (UseCompressedKlassPointers) {
  1.4782 -    assert (Universe::heap() != NULL, "java heap should be initialized");
  1.4783 -    movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
  1.4784 -    if (Universe::narrow_klass_shift() != 0) {
  1.4785 -      assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  1.4786 -      assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
  1.4787 -      movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset()));
  1.4788 -    } else {
  1.4789 -      movq(dst, Address(dst, Klass::prototype_header_offset()));
  1.4790 -    }
  1.4791 -  } else
  1.4792 -#endif
  1.4793 -  {
  1.4794 -    movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
  1.4795 -    movptr(dst, Address(dst, Klass::prototype_header_offset()));
  1.4796 -  }
  1.4797 -}
  1.4798 -
  1.4799 -void MacroAssembler::store_klass(Register dst, Register src) {
  1.4800 -#ifdef _LP64
  1.4801 -  if (UseCompressedKlassPointers) {
  1.4802 -    encode_klass_not_null(src);
  1.4803 -    movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
  1.4804 -  } else
  1.4805 -#endif
  1.4806 -    movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
  1.4807 -}
  1.4808 -
  1.4809 -void MacroAssembler::load_heap_oop(Register dst, Address src) {
  1.4810 -#ifdef _LP64
  1.4811 -  // FIXME: Must change all places where we try to load the klass.
  1.4812 -  if (UseCompressedOops) {
  1.4813 -    movl(dst, src);
  1.4814 -    decode_heap_oop(dst);
  1.4815 -  } else
  1.4816 -#endif
  1.4817 -    movptr(dst, src);
  1.4818 -}
  1.4819 -
  1.4820 -// Doesn't do verfication, generates fixed size code
  1.4821 -void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) {
  1.4822 -#ifdef _LP64
  1.4823 -  if (UseCompressedOops) {
  1.4824 -    movl(dst, src);
  1.4825 -    decode_heap_oop_not_null(dst);
  1.4826 -  } else
  1.4827 -#endif
  1.4828 -    movptr(dst, src);
  1.4829 -}
  1.4830 -
  1.4831 -void MacroAssembler::store_heap_oop(Address dst, Register src) {
  1.4832 -#ifdef _LP64
  1.4833 -  if (UseCompressedOops) {
  1.4834 -    assert(!dst.uses(src), "not enough registers");
  1.4835 -    encode_heap_oop(src);
  1.4836 -    movl(dst, src);
  1.4837 -  } else
  1.4838 -#endif
  1.4839 -    movptr(dst, src);
  1.4840 -}
  1.4841 -
  1.4842 -void MacroAssembler::cmp_heap_oop(Register src1, Address src2, Register tmp) {
  1.4843 -  assert_different_registers(src1, tmp);
  1.4844 -#ifdef _LP64
  1.4845 -  if (UseCompressedOops) {
  1.4846 -    bool did_push = false;
  1.4847 -    if (tmp == noreg) {
  1.4848 -      tmp = rax;
  1.4849 -      push(tmp);
  1.4850 -      did_push = true;
  1.4851 -      assert(!src2.uses(rsp), "can't push");
  1.4852 -    }
  1.4853 -    load_heap_oop(tmp, src2);
  1.4854 -    cmpptr(src1, tmp);
  1.4855 -    if (did_push)  pop(tmp);
  1.4856 -  } else
  1.4857 -#endif
  1.4858 -    cmpptr(src1, src2);
  1.4859 -}
  1.4860 -
  1.4861 -// Used for storing NULLs.
  1.4862 -void MacroAssembler::store_heap_oop_null(Address dst) {
  1.4863 -#ifdef _LP64
  1.4864 -  if (UseCompressedOops) {
  1.4865 -    movl(dst, (int32_t)NULL_WORD);
  1.4866 -  } else {
  1.4867 -    movslq(dst, (int32_t)NULL_WORD);
  1.4868 -  }
  1.4869 -#else
  1.4870 -  movl(dst, (int32_t)NULL_WORD);
  1.4871 -#endif
  1.4872 -}
  1.4873 -
  1.4874 -#ifdef _LP64
  1.4875 -void MacroAssembler::store_klass_gap(Register dst, Register src) {
  1.4876 -  if (UseCompressedKlassPointers) {
  1.4877 -    // Store to klass gap in destination
  1.4878 -    movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src);
  1.4879 -  }
  1.4880 -}
  1.4881 -
  1.4882 -#ifdef ASSERT
  1.4883 -void MacroAssembler::verify_heapbase(const char* msg) {
  1.4884 -  assert (UseCompressedOops || UseCompressedKlassPointers, "should be compressed");
  1.4885 -  assert (Universe::heap() != NULL, "java heap should be initialized");
  1.4886 -  if (CheckCompressedOops) {
  1.4887 -    Label ok;
  1.4888 -    push(rscratch1); // cmpptr trashes rscratch1
  1.4889 -    cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()));
  1.4890 -    jcc(Assembler::equal, ok);
  1.4891 -    STOP(msg);
  1.4892 -    bind(ok);
  1.4893 -    pop(rscratch1);
  1.4894 -  }
  1.4895 -}
  1.4896 -#endif
  1.4897 -
  1.4898 -// Algorithm must match oop.inline.hpp encode_heap_oop.
  1.4899 -void MacroAssembler::encode_heap_oop(Register r) {
  1.4900 -#ifdef ASSERT
  1.4901 -  verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
  1.4902 -#endif
  1.4903 -  verify_oop(r, "broken oop in encode_heap_oop");
  1.4904 -  if (Universe::narrow_oop_base() == NULL) {
  1.4905 -    if (Universe::narrow_oop_shift() != 0) {
  1.4906 -      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  1.4907 -      shrq(r, LogMinObjAlignmentInBytes);
  1.4908 -    }
  1.4909 -    return;
  1.4910 -  }
  1.4911 -  testq(r, r);
  1.4912 -  cmovq(Assembler::equal, r, r12_heapbase);
  1.4913 -  subq(r, r12_heapbase);
  1.4914 -  shrq(r, LogMinObjAlignmentInBytes);
  1.4915 -}
  1.4916 -
  1.4917 -void MacroAssembler::encode_heap_oop_not_null(Register r) {
  1.4918 -#ifdef ASSERT
  1.4919 -  verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?");
  1.4920 -  if (CheckCompressedOops) {
  1.4921 -    Label ok;
  1.4922 -    testq(r, r);
  1.4923 -    jcc(Assembler::notEqual, ok);
  1.4924 -    STOP("null oop passed to encode_heap_oop_not_null");
  1.4925 -    bind(ok);
  1.4926 -  }
  1.4927 -#endif
  1.4928 -  verify_oop(r, "broken oop in encode_heap_oop_not_null");
  1.4929 -  if (Universe::narrow_oop_base() != NULL) {
  1.4930 -    subq(r, r12_heapbase);
  1.4931 -  }
  1.4932 -  if (Universe::narrow_oop_shift() != 0) {
  1.4933 -    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  1.4934 -    shrq(r, LogMinObjAlignmentInBytes);
  1.4935 -  }
  1.4936 -}
  1.4937 -
  1.4938 -void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
  1.4939 -#ifdef ASSERT
  1.4940 -  verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?");
  1.4941 -  if (CheckCompressedOops) {
  1.4942 -    Label ok;
  1.4943 -    testq(src, src);
  1.4944 -    jcc(Assembler::notEqual, ok);
  1.4945 -    STOP("null oop passed to encode_heap_oop_not_null2");
  1.4946 -    bind(ok);
  1.4947 -  }
  1.4948 -#endif
  1.4949 -  verify_oop(src, "broken oop in encode_heap_oop_not_null2");
  1.4950 -  if (dst != src) {
  1.4951 -    movq(dst, src);
  1.4952 -  }
  1.4953 -  if (Universe::narrow_oop_base() != NULL) {
  1.4954 -    subq(dst, r12_heapbase);
  1.4955 -  }
  1.4956 -  if (Universe::narrow_oop_shift() != 0) {
  1.4957 -    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  1.4958 -    shrq(dst, LogMinObjAlignmentInBytes);
  1.4959 -  }
  1.4960 -}
  1.4961 -
  1.4962 -void  MacroAssembler::decode_heap_oop(Register r) {
  1.4963 -#ifdef ASSERT
  1.4964 -  verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
  1.4965 -#endif
  1.4966 -  if (Universe::narrow_oop_base() == NULL) {
  1.4967 -    if (Universe::narrow_oop_shift() != 0) {
  1.4968 -      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  1.4969 -      shlq(r, LogMinObjAlignmentInBytes);
  1.4970 -    }
  1.4971 -  } else {
  1.4972 -    Label done;
  1.4973 -    shlq(r, LogMinObjAlignmentInBytes);
  1.4974 -    jccb(Assembler::equal, done);
  1.4975 -    addq(r, r12_heapbase);
  1.4976 -    bind(done);
  1.4977 -  }
  1.4978 -  verify_oop(r, "broken oop in decode_heap_oop");
  1.4979 -}
  1.4980 -
  1.4981 -void  MacroAssembler::decode_heap_oop_not_null(Register r) {
  1.4982 -  // Note: it will change flags
  1.4983 -  assert (UseCompressedOops, "should only be used for compressed headers");
  1.4984 -  assert (Universe::heap() != NULL, "java heap should be initialized");
  1.4985 -  // Cannot assert, unverified entry point counts instructions (see .ad file)
  1.4986 -  // vtableStubs also counts instructions in pd_code_size_limit.
  1.4987 -  // Also do not verify_oop as this is called by verify_oop.
  1.4988 -  if (Universe::narrow_oop_shift() != 0) {
  1.4989 -    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  1.4990 -    shlq(r, LogMinObjAlignmentInBytes);
  1.4991 -    if (Universe::narrow_oop_base() != NULL) {
  1.4992 -      addq(r, r12_heapbase);
  1.4993 -    }
  1.4994 -  } else {
  1.4995 -    assert (Universe::narrow_oop_base() == NULL, "sanity");
  1.4996 -  }
  1.4997 -}
  1.4998 -
  1.4999 -void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
  1.5000 -  // Note: it will change flags
  1.5001 -  assert (UseCompressedOops, "should only be used for compressed headers");
  1.5002 -  assert (Universe::heap() != NULL, "java heap should be initialized");
  1.5003 -  // Cannot assert, unverified entry point counts instructions (see .ad file)
  1.5004 -  // vtableStubs also counts instructions in pd_code_size_limit.
  1.5005 -  // Also do not verify_oop as this is called by verify_oop.
  1.5006 -  if (Universe::narrow_oop_shift() != 0) {
  1.5007 -    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  1.5008 -    if (LogMinObjAlignmentInBytes == Address::times_8) {
  1.5009 -      leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
  1.5010 -    } else {
  1.5011 -      if (dst != src) {
  1.5012 -        movq(dst, src);
  1.5013 -      }
  1.5014 -      shlq(dst, LogMinObjAlignmentInBytes);
  1.5015 -      if (Universe::narrow_oop_base() != NULL) {
  1.5016 -        addq(dst, r12_heapbase);
  1.5017 -      }
  1.5018 -    }
  1.5019 -  } else {
  1.5020 -    assert (Universe::narrow_oop_base() == NULL, "sanity");
  1.5021 -    if (dst != src) {
  1.5022 -      movq(dst, src);
  1.5023 -    }
  1.5024 -  }
  1.5025 -}
  1.5026 -
  1.5027 -void MacroAssembler::encode_klass_not_null(Register r) {
  1.5028 -  assert(Metaspace::is_initialized(), "metaspace should be initialized");
  1.5029 -#ifdef ASSERT
  1.5030 -  verify_heapbase("MacroAssembler::encode_klass_not_null: heap base corrupted?");
  1.5031 -#endif
  1.5032 -  if (Universe::narrow_klass_base() != NULL) {
  1.5033 -    subq(r, r12_heapbase);
  1.5034 -  }
  1.5035 -  if (Universe::narrow_klass_shift() != 0) {
  1.5036 -    assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  1.5037 -    shrq(r, LogKlassAlignmentInBytes);
  1.5038 -  }
  1.5039 -}
  1.5040 -
  1.5041 -void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
  1.5042 -  assert(Metaspace::is_initialized(), "metaspace should be initialized");
  1.5043 -#ifdef ASSERT
  1.5044 -  verify_heapbase("MacroAssembler::encode_klass_not_null2: heap base corrupted?");
  1.5045 -#endif
  1.5046 -  if (dst != src) {
  1.5047 -    movq(dst, src);
  1.5048 -  }
  1.5049 -  if (Universe::narrow_klass_base() != NULL) {
  1.5050 -    subq(dst, r12_heapbase);
  1.5051 -  }
  1.5052 -  if (Universe::narrow_klass_shift() != 0) {
  1.5053 -    assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  1.5054 -    shrq(dst, LogKlassAlignmentInBytes);
  1.5055 -  }
  1.5056 -}
  1.5057 -
  1.5058 -void  MacroAssembler::decode_klass_not_null(Register r) {
  1.5059 -  assert(Metaspace::is_initialized(), "metaspace should be initialized");
  1.5060 -  // Note: it will change flags
  1.5061 -  assert (UseCompressedKlassPointers, "should only be used for compressed headers");
  1.5062 -  // Cannot assert, unverified entry point counts instructions (see .ad file)
  1.5063 -  // vtableStubs also counts instructions in pd_code_size_limit.
  1.5064 -  // Also do not verify_oop as this is called by verify_oop.
  1.5065 -  if (Universe::narrow_klass_shift() != 0) {
  1.5066 -    assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  1.5067 -    shlq(r, LogKlassAlignmentInBytes);
  1.5068 -    if (Universe::narrow_klass_base() != NULL) {
  1.5069 -      addq(r, r12_heapbase);
  1.5070 -    }
  1.5071 -  } else {
  1.5072 -    assert (Universe::narrow_klass_base() == NULL, "sanity");
  1.5073 -  }
  1.5074 -}
  1.5075 -
  1.5076 -void  MacroAssembler::decode_klass_not_null(Register dst, Register src) {
  1.5077 -  assert(Metaspace::is_initialized(), "metaspace should be initialized");
  1.5078 -  // Note: it will change flags
  1.5079 -  assert (UseCompressedKlassPointers, "should only be used for compressed headers");
  1.5080 -  // Cannot assert, unverified entry point counts instructions (see .ad file)
  1.5081 -  // vtableStubs also counts instructions in pd_code_size_limit.
  1.5082 -  // Also do not verify_oop as this is called by verify_oop.
  1.5083 -  if (Universe::narrow_klass_shift() != 0) {
  1.5084 -    assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  1.5085 -    assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
  1.5086 -    leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
  1.5087 -  } else {
  1.5088 -    assert (Universe::narrow_klass_base() == NULL, "sanity");
  1.5089 -    if (dst != src) {
  1.5090 -      movq(dst, src);
  1.5091 -    }
  1.5092 -  }
  1.5093 -}
  1.5094 -
  1.5095 -void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
  1.5096 -  assert (UseCompressedOops, "should only be used for compressed headers");
  1.5097 -  assert (Universe::heap() != NULL, "java heap should be initialized");
  1.5098 -  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
  1.5099 -  int oop_index = oop_recorder()->find_index(obj);
  1.5100 -  RelocationHolder rspec = oop_Relocation::spec(oop_index);
  1.5101 -  mov_narrow_oop(dst, oop_index, rspec);
  1.5102 -}
  1.5103 -
  1.5104 -void  MacroAssembler::set_narrow_oop(Address dst, jobject obj) {
  1.5105 -  assert (UseCompressedOops, "should only be used for compressed headers");
  1.5106 -  assert (Universe::heap() != NULL, "java heap should be initialized");
  1.5107 -  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
  1.5108 -  int oop_index = oop_recorder()->find_index(obj);
  1.5109 -  RelocationHolder rspec = oop_Relocation::spec(oop_index);
  1.5110 -  mov_narrow_oop(dst, oop_index, rspec);
  1.5111 -}
  1.5112 -
  1.5113 -void  MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
  1.5114 -  assert (UseCompressedKlassPointers, "should only be used for compressed headers");
  1.5115 -  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
  1.5116 -  int klass_index = oop_recorder()->find_index(k);
  1.5117 -  RelocationHolder rspec = metadata_Relocation::spec(klass_index);
  1.5118 -  mov_narrow_oop(dst, oopDesc::encode_klass(k), rspec);
  1.5119 -}
  1.5120 -
  1.5121 -void  MacroAssembler::set_narrow_klass(Address dst, Klass* k) {
  1.5122 -  assert (UseCompressedKlassPointers, "should only be used for compressed headers");
  1.5123 -  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
  1.5124 -  int klass_index = oop_recorder()->find_index(k);
  1.5125 -  RelocationHolder rspec = metadata_Relocation::spec(klass_index);
  1.5126 -  mov_narrow_oop(dst, oopDesc::encode_klass(k), rspec);
  1.5127 -}
  1.5128 -
  1.5129 -void  MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) {
  1.5130 -  assert (UseCompressedOops, "should only be used for compressed headers");
  1.5131 -  assert (Universe::heap() != NULL, "java heap should be initialized");
  1.5132 -  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
  1.5133 -  int oop_index = oop_recorder()->find_index(obj);
  1.5134 -  RelocationHolder rspec = oop_Relocation::spec(oop_index);
  1.5135 -  Assembler::cmp_narrow_oop(dst, oop_index, rspec);
  1.5136 -}
  1.5137 -
  1.5138 -void  MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) {
  1.5139 -  assert (UseCompressedOops, "should only be used for compressed headers");
  1.5140 -  assert (Universe::heap() != NULL, "java heap should be initialized");
  1.5141 -  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
  1.5142 -  int oop_index = oop_recorder()->find_index(obj);
  1.5143 -  RelocationHolder rspec = oop_Relocation::spec(oop_index);
  1.5144 -  Assembler::cmp_narrow_oop(dst, oop_index, rspec);
  1.5145 -}
  1.5146 -
  1.5147 -void  MacroAssembler::cmp_narrow_klass(Register dst, Klass* k) {
  1.5148 -  assert (UseCompressedKlassPointers, "should only be used for compressed headers");
  1.5149 -  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
  1.5150 -  int klass_index = oop_recorder()->find_index(k);
  1.5151 -  RelocationHolder rspec = metadata_Relocation::spec(klass_index);
  1.5152 -  Assembler::cmp_narrow_oop(dst, oopDesc::encode_klass(k), rspec);
  1.5153 -}
  1.5154 -
  1.5155 -void  MacroAssembler::cmp_narrow_klass(Address dst, Klass* k) {
  1.5156 -  assert (UseCompressedKlassPointers, "should only be used for compressed headers");
  1.5157 -  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
  1.5158 -  int klass_index = oop_recorder()->find_index(k);
  1.5159 -  RelocationHolder rspec = metadata_Relocation::spec(klass_index);
  1.5160 -  Assembler::cmp_narrow_oop(dst, oopDesc::encode_klass(k), rspec);
  1.5161 -}
  1.5162 -
  1.5163 -void MacroAssembler::reinit_heapbase() {
  1.5164 -  if (UseCompressedOops || UseCompressedKlassPointers) {
  1.5165 -    movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()));
  1.5166 -  }
  1.5167 -}
  1.5168 -#endif // _LP64
  1.5169 -
  1.5170 -
  1.5171 -// C2 compiled method's prolog code.
  1.5172 -void MacroAssembler::verified_entry(int framesize, bool stack_bang, bool fp_mode_24b) {
  1.5173 -
  1.5174 -  // WARNING: Initial instruction MUST be 5 bytes or longer so that
  1.5175 -  // NativeJump::patch_verified_entry will be able to patch out the entry
  1.5176 -  // code safely. The push to verify stack depth is ok at 5 bytes,
  1.5177 -  // the frame allocation can be either 3 or 6 bytes. So if we don't do
  1.5178 -  // stack bang then we must use the 6 byte frame allocation even if
  1.5179 -  // we have no frame. :-(
  1.5180 -
  1.5181 -  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  1.5182 -  // Remove word for return addr
  1.5183 -  framesize -= wordSize;
  1.5184 -
  1.5185 -  // Calls to C2R adapters often do not accept exceptional returns.
  1.5186 -  // We require that their callers must bang for them.  But be careful, because
  1.5187 -  // some VM calls (such as call site linkage) can use several kilobytes of
  1.5188 -  // stack.  But the stack safety zone should account for that.
  1.5189 -  // See bugs 4446381, 4468289, 4497237.
  1.5190 -  if (stack_bang) {
  1.5191 -    generate_stack_overflow_check(framesize);
  1.5192 -
  1.5193 -    // We always push rbp, so that on return to interpreter rbp, will be
  1.5194 -    // restored correctly and we can correct the stack.
  1.5195 -    push(rbp);
  1.5196 -    // Remove word for ebp
  1.5197 -    framesize -= wordSize;
  1.5198 -
  1.5199 -    // Create frame
  1.5200 -    if (framesize) {
  1.5201 -      subptr(rsp, framesize);
  1.5202 -    }
  1.5203 -  } else {
  1.5204 -    // Create frame (force generation of a 4 byte immediate value)
  1.5205 -    subptr_imm32(rsp, framesize);
  1.5206 -
  1.5207 -    // Save RBP register now.
  1.5208 -    framesize -= wordSize;
  1.5209 -    movptr(Address(rsp, framesize), rbp);
  1.5210 -  }
  1.5211 -
  1.5212 -  if (VerifyStackAtCalls) { // Majik cookie to verify stack depth
  1.5213 -    framesize -= wordSize;
  1.5214 -    movptr(Address(rsp, framesize), (int32_t)0xbadb100d);
  1.5215 -  }
  1.5216 -
  1.5217 -#ifndef _LP64
  1.5218 -  // If method sets FPU control word do it now
  1.5219 -  if (fp_mode_24b) {
  1.5220 -    fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
  1.5221 -  }
  1.5222 -  if (UseSSE >= 2 && VerifyFPU) {
  1.5223 -    verify_FPU(0, "FPU stack must be clean on entry");
  1.5224 -  }
  1.5225 -#endif
  1.5226 -
  1.5227 -#ifdef ASSERT
  1.5228 -  if (VerifyStackAtCalls) {
  1.5229 -    Label L;
  1.5230 -    push(rax);
  1.5231 -    mov(rax, rsp);
  1.5232 -    andptr(rax, StackAlignmentInBytes-1);
  1.5233 -    cmpptr(rax, StackAlignmentInBytes-wordSize);
  1.5234 -    pop(rax);
  1.5235 -    jcc(Assembler::equal, L);
  1.5236 -    STOP("Stack is not properly aligned!");
  1.5237 -    bind(L);
  1.5238 -  }
  1.5239 -#endif
  1.5240 -
  1.5241 -}
  1.5242 -
  1.5243 -
  1.5244 -// IndexOf for constant substrings with size >= 8 chars
  1.5245 -// which don't need to be loaded through stack.
  1.5246 -void MacroAssembler::string_indexofC8(Register str1, Register str2,
  1.5247 -                                      Register cnt1, Register cnt2,
  1.5248 -                                      int int_cnt2,  Register result,
  1.5249 -                                      XMMRegister vec, Register tmp) {
  1.5250 -  ShortBranchVerifier sbv(this);
  1.5251 -  assert(UseSSE42Intrinsics, "SSE4.2 is required");
  1.5252 -
  1.5253 -  // This method uses pcmpestri inxtruction with bound registers
  1.5254 -  //   inputs:
  1.5255 -  //     xmm - substring
  1.5256 -  //     rax - substring length (elements count)
  1.5257 -  //     mem - scanned string
  1.5258 -  //     rdx - string length (elements count)
  1.5259 -  //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
  1.5260 -  //   outputs:
  1.5261 -  //     rcx - matched index in string
  1.5262 -  assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
  1.5263 -
  1.5264 -  Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR,
  1.5265 -        RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR,
  1.5266 -        MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE;
  1.5267 -
  1.5268 -  // Note, inline_string_indexOf() generates checks:
  1.5269 -  // if (substr.count > string.count) return -1;
  1.5270 -  // if (substr.count == 0) return 0;
  1.5271 -  assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars");
  1.5272 -
  1.5273 -  // Load substring.
  1.5274 -  movdqu(vec, Address(str2, 0));
  1.5275 -  movl(cnt2, int_cnt2);
  1.5276 -  movptr(result, str1); // string addr
  1.5277 -
  1.5278 -  if (int_cnt2 > 8) {
  1.5279 -    jmpb(SCAN_TO_SUBSTR);
  1.5280 -
  1.5281 -    // Reload substr for rescan, this code
  1.5282 -    // is executed only for large substrings (> 8 chars)
  1.5283 -    bind(RELOAD_SUBSTR);
  1.5284 -    movdqu(vec, Address(str2, 0));
  1.5285 -    negptr(cnt2); // Jumped here with negative cnt2, convert to positive
  1.5286 -
  1.5287 -    bind(RELOAD_STR);
  1.5288 -    // We came here after the beginning of the substring was
  1.5289 -    // matched but the rest of it was not so we need to search
  1.5290 -    // again. Start from the next element after the previous match.
  1.5291 -
  1.5292 -    // cnt2 is number of substring reminding elements and
  1.5293 -    // cnt1 is number of string reminding elements when cmp failed.
  1.5294 -    // Restored cnt1 = cnt1 - cnt2 + int_cnt2
  1.5295 -    subl(cnt1, cnt2);
  1.5296 -    addl(cnt1, int_cnt2);
  1.5297 -    movl(cnt2, int_cnt2); // Now restore cnt2
  1.5298 -
  1.5299 -    decrementl(cnt1);     // Shift to next element
  1.5300 -    cmpl(cnt1, cnt2);
  1.5301 -    jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
  1.5302 -
  1.5303 -    addptr(result, 2);
  1.5304 -
  1.5305 -  } // (int_cnt2 > 8)
  1.5306 -
  1.5307 -  // Scan string for start of substr in 16-byte vectors
  1.5308 -  bind(SCAN_TO_SUBSTR);
  1.5309 -  pcmpestri(vec, Address(result, 0), 0x0d);
  1.5310 -  jccb(Assembler::below, FOUND_CANDIDATE);   // CF == 1
  1.5311 -  subl(cnt1, 8);
  1.5312 -  jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
  1.5313 -  cmpl(cnt1, cnt2);
  1.5314 -  jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
  1.5315 -  addptr(result, 16);
  1.5316 -  jmpb(SCAN_TO_SUBSTR);
  1.5317 -
  1.5318 -  // Found a potential substr
  1.5319 -  bind(FOUND_CANDIDATE);
  1.5320 -  // Matched whole vector if first element matched (tmp(rcx) == 0).
  1.5321 -  if (int_cnt2 == 8) {
  1.5322 -    jccb(Assembler::overflow, RET_FOUND);    // OF == 1
  1.5323 -  } else { // int_cnt2 > 8
  1.5324 -    jccb(Assembler::overflow, FOUND_SUBSTR);
  1.5325 -  }
  1.5326 -  // After pcmpestri tmp(rcx) contains matched element index
  1.5327 -  // Compute start addr of substr
  1.5328 -  lea(result, Address(result, tmp, Address::times_2));
  1.5329 -
  1.5330 -  // Make sure string is still long enough
  1.5331 -  subl(cnt1, tmp);
  1.5332 -  cmpl(cnt1, cnt2);
  1.5333 -  if (int_cnt2 == 8) {
  1.5334 -    jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
  1.5335 -  } else { // int_cnt2 > 8
  1.5336 -    jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD);
  1.5337 -  }
  1.5338 -  // Left less then substring.
  1.5339 -
  1.5340 -  bind(RET_NOT_FOUND);
  1.5341 -  movl(result, -1);
  1.5342 -  jmpb(EXIT);
  1.5343 -
  1.5344 -  if (int_cnt2 > 8) {
  1.5345 -    // This code is optimized for the case when whole substring
  1.5346 -    // is matched if its head is matched.
  1.5347 -    bind(MATCH_SUBSTR_HEAD);
  1.5348 -    pcmpestri(vec, Address(result, 0), 0x0d);
  1.5349 -    // Reload only string if does not match
  1.5350 -    jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0
  1.5351 -
  1.5352 -    Label CONT_SCAN_SUBSTR;
  1.5353 -    // Compare the rest of substring (> 8 chars).
  1.5354 -    bind(FOUND_SUBSTR);
  1.5355 -    // First 8 chars are already matched.
  1.5356 -    negptr(cnt2);
  1.5357 -    addptr(cnt2, 8);
  1.5358 -
  1.5359 -    bind(SCAN_SUBSTR);
  1.5360 -    subl(cnt1, 8);
  1.5361 -    cmpl(cnt2, -8); // Do not read beyond substring
  1.5362 -    jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR);
  1.5363 -    // Back-up strings to avoid reading beyond substring:
  1.5364 -    // cnt1 = cnt1 - cnt2 + 8
  1.5365 -    addl(cnt1, cnt2); // cnt2 is negative
  1.5366 -    addl(cnt1, 8);
  1.5367 -    movl(cnt2, 8); negptr(cnt2);
  1.5368 -    bind(CONT_SCAN_SUBSTR);
  1.5369 -    if (int_cnt2 < (int)G) {
  1.5370 -      movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2));
  1.5371 -      pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d);
  1.5372 -    } else {
  1.5373 -      // calculate index in register to avoid integer overflow (int_cnt2*2)
  1.5374 -      movl(tmp, int_cnt2);
  1.5375 -      addptr(tmp, cnt2);
  1.5376 -      movdqu(vec, Address(str2, tmp, Address::times_2, 0));
  1.5377 -      pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d);
  1.5378 -    }
  1.5379 -    // Need to reload strings pointers if not matched whole vector
  1.5380 -    jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
  1.5381 -    addptr(cnt2, 8);
  1.5382 -    jcc(Assembler::negative, SCAN_SUBSTR);
  1.5383 -    // Fall through if found full substring
  1.5384 -
  1.5385 -  } // (int_cnt2 > 8)
  1.5386 -
  1.5387 -  bind(RET_FOUND);
  1.5388 -  // Found result if we matched full small substring.
  1.5389 -  // Compute substr offset
  1.5390 -  subptr(result, str1);
  1.5391 -  shrl(result, 1); // index
  1.5392 -  bind(EXIT);
  1.5393 -
  1.5394 -} // string_indexofC8
  1.5395 -
  1.5396 -// Small strings are loaded through stack if they cross page boundary.
  1.5397 -void MacroAssembler::string_indexof(Register str1, Register str2,
  1.5398 -                                    Register cnt1, Register cnt2,
  1.5399 -                                    int int_cnt2,  Register result,
  1.5400 -                                    XMMRegister vec, Register tmp) {
  1.5401 -  ShortBranchVerifier sbv(this);
  1.5402 -  assert(UseSSE42Intrinsics, "SSE4.2 is required");
  1.5403 -  //
  1.5404 -  // int_cnt2 is length of small (< 8 chars) constant substring
  1.5405 -  // or (-1) for non constant substring in which case its length
  1.5406 -  // is in cnt2 register.
  1.5407 -  //
  1.5408 -  // Note, inline_string_indexOf() generates checks:
  1.5409 -  // if (substr.count > string.count) return -1;
  1.5410 -  // if (substr.count == 0) return 0;
  1.5411 -  //
  1.5412 -  assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0");
  1.5413 -
  1.5414 -  // This method uses pcmpestri inxtruction with bound registers
  1.5415 -  //   inputs:
  1.5416 -  //     xmm - substring
  1.5417 -  //     rax - substring length (elements count)
  1.5418 -  //     mem - scanned string
  1.5419 -  //     rdx - string length (elements count)
  1.5420 -  //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
  1.5421 -  //   outputs:
  1.5422 -  //     rcx - matched index in string
  1.5423 -  assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
  1.5424 -
  1.5425 -  Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR,
  1.5426 -        RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR,
  1.5427 -        FOUND_CANDIDATE;
  1.5428 -
  1.5429 -  { //========================================================
  1.5430 -    // We don't know where these strings are located
  1.5431 -    // and we can't read beyond them. Load them through stack.
  1.5432 -    Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR;
  1.5433 -
  1.5434 -    movptr(tmp, rsp); // save old SP
  1.5435 -
  1.5436 -    if (int_cnt2 > 0) {     // small (< 8 chars) constant substring
  1.5437 -      if (int_cnt2 == 1) {  // One char
  1.5438 -        load_unsigned_short(result, Address(str2, 0));
  1.5439 -        movdl(vec, result); // move 32 bits
  1.5440 -      } else if (int_cnt2 == 2) { // Two chars
  1.5441 -        movdl(vec, Address(str2, 0)); // move 32 bits
  1.5442 -      } else if (int_cnt2 == 4) { // Four chars
  1.5443 -        movq(vec, Address(str2, 0));  // move 64 bits
  1.5444 -      } else { // cnt2 = { 3, 5, 6, 7 }
  1.5445 -        // Array header size is 12 bytes in 32-bit VM
  1.5446 -        // + 6 bytes for 3 chars == 18 bytes,
  1.5447 -        // enough space to load vec and shift.
  1.5448 -        assert(HeapWordSize*TypeArrayKlass::header_size() >= 12,"sanity");
  1.5449 -        movdqu(vec, Address(str2, (int_cnt2*2)-16));
  1.5450 -        psrldq(vec, 16-(int_cnt2*2));
  1.5451 -      }
  1.5452 -    } else { // not constant substring
  1.5453 -      cmpl(cnt2, 8);
  1.5454 -      jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough
  1.5455 -
  1.5456 -      // We can read beyond string if srt+16 does not cross page boundary
  1.5457 -      // since heaps are aligned and mapped by pages.
  1.5458 -      assert(os::vm_page_size() < (int)G, "default page should be small");
  1.5459 -      movl(result, str2); // We need only low 32 bits
  1.5460 -      andl(result, (os::vm_page_size()-1));
  1.5461 -      cmpl(result, (os::vm_page_size()-16));
  1.5462 -      jccb(Assembler::belowEqual, CHECK_STR);
  1.5463 -
  1.5464 -      // Move small strings to stack to allow load 16 bytes into vec.
  1.5465 -      subptr(rsp, 16);
  1.5466 -      int stk_offset = wordSize-2;
  1.5467 -      push(cnt2);
  1.5468 -
  1.5469 -      bind(COPY_SUBSTR);
  1.5470 -      load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2));
  1.5471 -      movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);
  1.5472 -      decrement(cnt2);
  1.5473 -      jccb(Assembler::notZero, COPY_SUBSTR);
  1.5474 -
  1.5475 -      pop(cnt2);
  1.5476 -      movptr(str2, rsp);  // New substring address
  1.5477 -    } // non constant
  1.5478 -
  1.5479 -    bind(CHECK_STR);
  1.5480 -    cmpl(cnt1, 8);
  1.5481 -    jccb(Assembler::aboveEqual, BIG_STRINGS);
  1.5482 -
  1.5483 -    // Check cross page boundary.
  1.5484 -    movl(result, str1); // We need only low 32 bits
  1.5485 -    andl(result, (os::vm_page_size()-1));
  1.5486 -    cmpl(result, (os::vm_page_size()-16));
  1.5487 -    jccb(Assembler::belowEqual, BIG_STRINGS);
  1.5488 -
  1.5489 -    subptr(rsp, 16);
  1.5490 -    int stk_offset = -2;
  1.5491 -    if (int_cnt2 < 0) { // not constant
  1.5492 -      push(cnt2);
  1.5493 -      stk_offset += wordSize;
  1.5494 -    }
  1.5495 -    movl(cnt2, cnt1);
  1.5496 -
  1.5497 -    bind(COPY_STR);
  1.5498 -    load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2));
  1.5499 -    movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);
  1.5500 -    decrement(cnt2);
  1.5501 -    jccb(Assembler::notZero, COPY_STR);
  1.5502 -
  1.5503 -    if (int_cnt2 < 0) { // not constant
  1.5504 -      pop(cnt2);
  1.5505 -    }
  1.5506 -    movptr(str1, rsp);  // New string address
  1.5507 -
  1.5508 -    bind(BIG_STRINGS);
  1.5509 -    // Load substring.
  1.5510 -    if (int_cnt2 < 0) { // -1
  1.5511 -      movdqu(vec, Address(str2, 0));
  1.5512 -      push(cnt2);       // substr count
  1.5513 -      push(str2);       // substr addr
  1.5514 -      push(str1);       // string addr
  1.5515 -    } else {
  1.5516 -      // Small (< 8 chars) constant substrings are loaded already.
  1.5517 -      movl(cnt2, int_cnt2);
  1.5518 -    }
  1.5519 -    push(tmp);  // original SP
  1.5520 -
  1.5521 -  } // Finished loading
  1.5522 -
  1.5523 -  //========================================================
  1.5524 -  // Start search
  1.5525 -  //
  1.5526 -
  1.5527 -  movptr(result, str1); // string addr
  1.5528 -
  1.5529 -  if (int_cnt2  < 0) {  // Only for non constant substring
  1.5530 -    jmpb(SCAN_TO_SUBSTR);
  1.5531 -
  1.5532 -    // SP saved at sp+0
  1.5533 -    // String saved at sp+1*wordSize
  1.5534 -    // Substr saved at sp+2*wordSize
  1.5535 -    // Substr count saved at sp+3*wordSize
  1.5536 -
  1.5537 -    // Reload substr for rescan, this code
  1.5538 -    // is executed only for large substrings (> 8 chars)
  1.5539 -    bind(RELOAD_SUBSTR);
  1.5540 -    movptr(str2, Address(rsp, 2*wordSize));
  1.5541 -    movl(cnt2, Address(rsp, 3*wordSize));
  1.5542 -    movdqu(vec, Address(str2, 0));
  1.5543 -    // We came here after the beginning of the substring was
  1.5544 -    // matched but the rest of it was not so we need to search
  1.5545 -    // again. Start from the next element after the previous match.
  1.5546 -    subptr(str1, result); // Restore counter
  1.5547 -    shrl(str1, 1);
  1.5548 -    addl(cnt1, str1);
  1.5549 -    decrementl(cnt1);   // Shift to next element
  1.5550 -    cmpl(cnt1, cnt2);
  1.5551 -    jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
  1.5552 -
  1.5553 -    addptr(result, 2);
  1.5554 -  } // non constant
  1.5555 -
  1.5556 -  // Scan string for start of substr in 16-byte vectors
  1.5557 -  bind(SCAN_TO_SUBSTR);
  1.5558 -  assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
  1.5559 -  pcmpestri(vec, Address(result, 0), 0x0d);
  1.5560 -  jccb(Assembler::below, FOUND_CANDIDATE);   // CF == 1
  1.5561 -  subl(cnt1, 8);
  1.5562 -  jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
  1.5563 -  cmpl(cnt1, cnt2);
  1.5564 -  jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
  1.5565 -  addptr(result, 16);
  1.5566 -
  1.5567 -  bind(ADJUST_STR);
  1.5568 -  cmpl(cnt1, 8); // Do not read beyond string
  1.5569 -  jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
  1.5570 -  // Back-up string to avoid reading beyond string.
  1.5571 -  lea(result, Address(result, cnt1, Address::times_2, -16));
  1.5572 -  movl(cnt1, 8);
  1.5573 -  jmpb(SCAN_TO_SUBSTR);
  1.5574 -
  1.5575 -  // Found a potential substr
  1.5576 -  bind(FOUND_CANDIDATE);
  1.5577 -  // After pcmpestri tmp(rcx) contains matched element index
  1.5578 -
  1.5579 -  // Make sure string is still long enough
  1.5580 -  subl(cnt1, tmp);
  1.5581 -  cmpl(cnt1, cnt2);
  1.5582 -  jccb(Assembler::greaterEqual, FOUND_SUBSTR);
  1.5583 -  // Left less then substring.
  1.5584 -
  1.5585 -  bind(RET_NOT_FOUND);
  1.5586 -  movl(result, -1);
  1.5587 -  jmpb(CLEANUP);
  1.5588 -
  1.5589 -  bind(FOUND_SUBSTR);
  1.5590 -  // Compute start addr of substr
  1.5591 -  lea(result, Address(result, tmp, Address::times_2));
  1.5592 -
  1.5593 -  if (int_cnt2 > 0) { // Constant substring
  1.5594 -    // Repeat search for small substring (< 8 chars)
  1.5595 -    // from new point without reloading substring.
  1.5596 -    // Have to check that we don't read beyond string.
  1.5597 -    cmpl(tmp, 8-int_cnt2);
  1.5598 -    jccb(Assembler::greater, ADJUST_STR);
  1.5599 -    // Fall through if matched whole substring.
  1.5600 -  } else { // non constant
  1.5601 -    assert(int_cnt2 == -1, "should be != 0");
  1.5602 -
  1.5603 -    addl(tmp, cnt2);
  1.5604 -    // Found result if we matched whole substring.
  1.5605 -    cmpl(tmp, 8);
  1.5606 -    jccb(Assembler::lessEqual, RET_FOUND);
  1.5607 -
  1.5608 -    // Repeat search for small substring (<= 8 chars)
  1.5609 -    // from new point 'str1' without reloading substring.
  1.5610 -    cmpl(cnt2, 8);
  1.5611 -    // Have to check that we don't read beyond string.
  1.5612 -    jccb(Assembler::lessEqual, ADJUST_STR);
  1.5613 -
  1.5614 -    Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG;
  1.5615 -    // Compare the rest of substring (> 8 chars).
  1.5616 -    movptr(str1, result);
  1.5617 -
  1.5618 -    cmpl(tmp, cnt2);
  1.5619 -    // First 8 chars are already matched.
  1.5620 -    jccb(Assembler::equal, CHECK_NEXT);
  1.5621 -
  1.5622 -    bind(SCAN_SUBSTR);
  1.5623 -    pcmpestri(vec, Address(str1, 0), 0x0d);
  1.5624 -    // Need to reload strings pointers if not matched whole vector
  1.5625 -    jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
  1.5626 -
  1.5627 -    bind(CHECK_NEXT);
  1.5628 -    subl(cnt2, 8);
  1.5629 -    jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring
  1.5630 -    addptr(str1, 16);
  1.5631 -    addptr(str2, 16);
  1.5632 -    subl(cnt1, 8);
  1.5633 -    cmpl(cnt2, 8); // Do not read beyond substring
  1.5634 -    jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR);
  1.5635 -    // Back-up strings to avoid reading beyond substring.
  1.5636 -    lea(str2, Address(str2, cnt2, Address::times_2, -16));
  1.5637 -    lea(str1, Address(str1, cnt2, Address::times_2, -16));
  1.5638 -    subl(cnt1, cnt2);
  1.5639 -    movl(cnt2, 8);
  1.5640 -    addl(cnt1, 8);
  1.5641 -    bind(CONT_SCAN_SUBSTR);
  1.5642 -    movdqu(vec, Address(str2, 0));
  1.5643 -    jmpb(SCAN_SUBSTR);
  1.5644 -
  1.5645 -    bind(RET_FOUND_LONG);
  1.5646 -    movptr(str1, Address(rsp, wordSize));
  1.5647 -  } // non constant
  1.5648 -
  1.5649 -  bind(RET_FOUND);
  1.5650 -  // Compute substr offset
  1.5651 -  subptr(result, str1);
  1.5652 -  shrl(result, 1); // index
  1.5653 -
  1.5654 -  bind(CLEANUP);
  1.5655 -  pop(rsp); // restore SP
  1.5656 -
  1.5657 -} // string_indexof
  1.5658 -
  1.5659 -// Compare strings.
  1.5660 -void MacroAssembler::string_compare(Register str1, Register str2,
  1.5661 -                                    Register cnt1, Register cnt2, Register result,
  1.5662 -                                    XMMRegister vec1) {
  1.5663 -  ShortBranchVerifier sbv(this);
  1.5664 -  Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
  1.5665 -
  1.5666 -  // Compute the minimum of the string lengths and the
  1.5667 -  // difference of the string lengths (stack).
  1.5668 -  // Do the conditional move stuff
  1.5669 -  movl(result, cnt1);
  1.5670 -  subl(cnt1, cnt2);
  1.5671 -  push(cnt1);
  1.5672 -  cmov32(Assembler::lessEqual, cnt2, result);
  1.5673 -
  1.5674 -  // Is the minimum length zero?
  1.5675 -  testl(cnt2, cnt2);
  1.5676 -  jcc(Assembler::zero, LENGTH_DIFF_LABEL);
  1.5677 -
  1.5678 -  // Load first characters
  1.5679 -  load_unsigned_short(result, Address(str1, 0));
  1.5680 -  load_unsigned_short(cnt1, Address(str2, 0));
  1.5681 -
  1.5682 -  // Compare first characters
  1.5683 -  subl(result, cnt1);
  1.5684 -  jcc(Assembler::notZero,  POP_LABEL);
  1.5685 -  decrementl(cnt2);
  1.5686 -  jcc(Assembler::zero, LENGTH_DIFF_LABEL);
  1.5687 -
  1.5688 -  {
  1.5689 -    // Check after comparing first character to see if strings are equivalent
  1.5690 -    Label LSkip2;
  1.5691 -    // Check if the strings start at same location
  1.5692 -    cmpptr(str1, str2);
  1.5693 -    jccb(Assembler::notEqual, LSkip2);
  1.5694 -
  1.5695 -    // Check if the length difference is zero (from stack)
  1.5696 -    cmpl(Address(rsp, 0), 0x0);
  1.5697 -    jcc(Assembler::equal,  LENGTH_DIFF_LABEL);
  1.5698 -
  1.5699 -    // Strings might not be equivalent
  1.5700 -    bind(LSkip2);
  1.5701 -  }
  1.5702 -
  1.5703 -  Address::ScaleFactor scale = Address::times_2;
  1.5704 -  int stride = 8;
  1.5705 -
  1.5706 -  // Advance to next element
  1.5707 -  addptr(str1, 16/stride);
  1.5708 -  addptr(str2, 16/stride);
  1.5709 -
  1.5710 -  if (UseSSE42Intrinsics) {
  1.5711 -    Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
  1.5712 -    int pcmpmask = 0x19;
  1.5713 -    // Setup to compare 16-byte vectors
  1.5714 -    movl(result, cnt2);
  1.5715 -    andl(cnt2, ~(stride - 1));   // cnt2 holds the vector count
  1.5716 -    jccb(Assembler::zero, COMPARE_TAIL);
  1.5717 -
  1.5718 -    lea(str1, Address(str1, result, scale));
  1.5719 -    lea(str2, Address(str2, result, scale));
  1.5720 -    negptr(result);
  1.5721 -
  1.5722 -    // pcmpestri
  1.5723 -    //   inputs:
  1.5724 -    //     vec1- substring
  1.5725 -    //     rax - negative string length (elements count)
  1.5726 -    //     mem - scaned string
  1.5727 -    //     rdx - string length (elements count)
  1.5728 -    //     pcmpmask - cmp mode: 11000 (string compare with negated result)
  1.5729 -    //               + 00 (unsigned bytes) or  + 01 (unsigned shorts)
  1.5730 -    //   outputs:
  1.5731 -    //     rcx - first mismatched element index
  1.5732 -    assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri");
  1.5733 -
  1.5734 -    bind(COMPARE_WIDE_VECTORS);
  1.5735 -    movdqu(vec1, Address(str1, result, scale));
  1.5736 -    pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
  1.5737 -    // After pcmpestri cnt1(rcx) contains mismatched element index
  1.5738 -
  1.5739 -    jccb(Assembler::below, VECTOR_NOT_EQUAL);  // CF==1
  1.5740 -    addptr(result, stride);
  1.5741 -    subptr(cnt2, stride);
  1.5742 -    jccb(Assembler::notZero, COMPARE_WIDE_VECTORS);
  1.5743 -
  1.5744 -    // compare wide vectors tail
  1.5745 -    testl(result, result);
  1.5746 -    jccb(Assembler::zero, LENGTH_DIFF_LABEL);
  1.5747 -
  1.5748 -    movl(cnt2, stride);
  1.5749 -    movl(result, stride);
  1.5750 -    negptr(result);
  1.5751 -    movdqu(vec1, Address(str1, result, scale));
  1.5752 -    pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
  1.5753 -    jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL);
  1.5754 -
  1.5755 -    // Mismatched characters in the vectors
  1.5756 -    bind(VECTOR_NOT_EQUAL);
  1.5757 -    addptr(result, cnt1);
  1.5758 -    movptr(cnt2, result);
  1.5759 -    load_unsigned_short(result, Address(str1, cnt2, scale));
  1.5760 -    load_unsigned_short(cnt1, Address(str2, cnt2, scale));
  1.5761 -    subl(result, cnt1);
  1.5762 -    jmpb(POP_LABEL);
  1.5763 -
  1.5764 -    bind(COMPARE_TAIL); // limit is zero
  1.5765 -    movl(cnt2, result);
  1.5766 -    // Fallthru to tail compare
  1.5767 -  }
  1.5768 -
  1.5769 -  // Shift str2 and str1 to the end of the arrays, negate min
  1.5770 -  lea(str1, Address(str1, cnt2, scale, 0));
  1.5771 -  lea(str2, Address(str2, cnt2, scale, 0));
  1.5772 -  negptr(cnt2);
  1.5773 -
  1.5774 -  // Compare the rest of the elements
  1.5775 -  bind(WHILE_HEAD_LABEL);
  1.5776 -  load_unsigned_short(result, Address(str1, cnt2, scale, 0));
  1.5777 -  load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0));
  1.5778 -  subl(result, cnt1);
  1.5779 -  jccb(Assembler::notZero, POP_LABEL);
  1.5780 -  increment(cnt2);
  1.5781 -  jccb(Assembler::notZero, WHILE_HEAD_LABEL);
  1.5782 -
  1.5783 -  // Strings are equal up to min length.  Return the length difference.
  1.5784 -  bind(LENGTH_DIFF_LABEL);
  1.5785 -  pop(result);
  1.5786 -  jmpb(DONE_LABEL);
  1.5787 -
  1.5788 -  // Discard the stored length difference
  1.5789 -  bind(POP_LABEL);
  1.5790 -  pop(cnt1);
  1.5791 -
  1.5792 -  // That's it
  1.5793 -  bind(DONE_LABEL);
  1.5794 -}
  1.5795 -
  1.5796 -// Compare char[] arrays aligned to 4 bytes or substrings.
  1.5797 -void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
  1.5798 -                                        Register limit, Register result, Register chr,
  1.5799 -                                        XMMRegister vec1, XMMRegister vec2) {
  1.5800 -  ShortBranchVerifier sbv(this);
  1.5801 -  Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
  1.5802 -
  1.5803 -  int length_offset  = arrayOopDesc::length_offset_in_bytes();
  1.5804 -  int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);
  1.5805 -
  1.5806 -  // Check the input args
  1.5807 -  cmpptr(ary1, ary2);
  1.5808 -  jcc(Assembler::equal, TRUE_LABEL);
  1.5809 -
  1.5810 -  if (is_array_equ) {
  1.5811 -    // Need additional checks for arrays_equals.
  1.5812 -    testptr(ary1, ary1);
  1.5813 -    jcc(Assembler::zero, FALSE_LABEL);
  1.5814 -    testptr(ary2, ary2);
  1.5815 -    jcc(Assembler::zero, FALSE_LABEL);
  1.5816 -
  1.5817 -    // Check the lengths
  1.5818 -    movl(limit, Address(ary1, length_offset));
  1.5819 -    cmpl(limit, Address(ary2, length_offset));
  1.5820 -    jcc(Assembler::notEqual, FALSE_LABEL);
  1.5821 -  }
  1.5822 -
  1.5823 -  // count == 0
  1.5824 -  testl(limit, limit);
  1.5825 -  jcc(Assembler::zero, TRUE_LABEL);
  1.5826 -
  1.5827 -  if (is_array_equ) {
  1.5828 -    // Load array address
  1.5829 -    lea(ary1, Address(ary1, base_offset));
  1.5830 -    lea(ary2, Address(ary2, base_offset));
  1.5831 -  }
  1.5832 -
  1.5833 -  shll(limit, 1);      // byte count != 0
  1.5834 -  movl(result, limit); // copy
  1.5835 -
  1.5836 -  if (UseSSE42Intrinsics) {
  1.5837 -    // With SSE4.2, use double quad vector compare
  1.5838 -    Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
  1.5839 -
  1.5840 -    // Compare 16-byte vectors
  1.5841 -    andl(result, 0x0000000e);  //   tail count (in bytes)
  1.5842 -    andl(limit, 0xfffffff0);   // vector count (in bytes)
  1.5843 -    jccb(Assembler::zero, COMPARE_TAIL);
  1.5844 -
  1.5845 -    lea(ary1, Address(ary1, limit, Address::times_1));
  1.5846 -    lea(ary2, Address(ary2, limit, Address::times_1));
  1.5847 -    negptr(limit);
  1.5848 -
  1.5849 -    bind(COMPARE_WIDE_VECTORS);
  1.5850 -    movdqu(vec1, Address(ary1, limit, Address::times_1));
  1.5851 -    movdqu(vec2, Address(ary2, limit, Address::times_1));
  1.5852 -    pxor(vec1, vec2);
  1.5853 -
  1.5854 -    ptest(vec1, vec1);
  1.5855 -    jccb(Assembler::notZero, FALSE_LABEL);
  1.5856 -    addptr(limit, 16);
  1.5857 -    jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
  1.5858 -
  1.5859 -    testl(result, result);
  1.5860 -    jccb(Assembler::zero, TRUE_LABEL);
  1.5861 -
  1.5862 -    movdqu(vec1, Address(ary1, result, Address::times_1, -16));
  1.5863 -    movdqu(vec2, Address(ary2, result, Address::times_1, -16));
  1.5864 -    pxor(vec1, vec2);
  1.5865 -
  1.5866 -    ptest(vec1, vec1);
  1.5867 -    jccb(Assembler::notZero, FALSE_LABEL);
  1.5868 -    jmpb(TRUE_LABEL);
  1.5869 -
  1.5870 -    bind(COMPARE_TAIL); // limit is zero
  1.5871 -    movl(limit, result);
  1.5872 -    // Fallthru to tail compare
  1.5873 -  }
  1.5874 -
  1.5875 -  // Compare 4-byte vectors
  1.5876 -  andl(limit, 0xfffffffc); // vector count (in bytes)
  1.5877 -  jccb(Assembler::zero, COMPARE_CHAR);
  1.5878 -
  1.5879 -  lea(ary1, Address(ary1, limit, Address::times_1));
  1.5880 -  lea(ary2, Address(ary2, limit, Address::times_1));
  1.5881 -  negptr(limit);
  1.5882 -
  1.5883 -  bind(COMPARE_VECTORS);
  1.5884 -  movl(chr, Address(ary1, limit, Address::times_1));
  1.5885 -  cmpl(chr, Address(ary2, limit, Address::times_1));
  1.5886 -  jccb(Assembler::notEqual, FALSE_LABEL);
  1.5887 -  addptr(limit, 4);
  1.5888 -  jcc(Assembler::notZero, COMPARE_VECTORS);
  1.5889 -
  1.5890 -  // Compare trailing char (final 2 bytes), if any
  1.5891 -  bind(COMPARE_CHAR);
  1.5892 -  testl(result, 0x2);   // tail  char
  1.5893 -  jccb(Assembler::zero, TRUE_LABEL);
  1.5894 -  load_unsigned_short(chr, Address(ary1, 0));
  1.5895 -  load_unsigned_short(limit, Address(ary2, 0));
  1.5896 -  cmpl(chr, limit);
  1.5897 -  jccb(Assembler::notEqual, FALSE_LABEL);
  1.5898 -
  1.5899 -  bind(TRUE_LABEL);
  1.5900 -  movl(result, 1);   // return true
  1.5901 -  jmpb(DONE);
  1.5902 -
  1.5903 -  bind(FALSE_LABEL);
  1.5904 -  xorl(result, result); // return false
  1.5905 -
  1.5906 -  // That's it
  1.5907 -  bind(DONE);
  1.5908 -}
  1.5909 -
  1.5910 -void MacroAssembler::generate_fill(BasicType t, bool aligned,
  1.5911 -                                   Register to, Register value, Register count,
  1.5912 -                                   Register rtmp, XMMRegister xtmp) {
  1.5913 -  ShortBranchVerifier sbv(this);
  1.5914 -  assert_different_registers(to, value, count, rtmp);
  1.5915 -  Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
  1.5916 -  Label L_fill_2_bytes, L_fill_4_bytes;
  1.5917 -
  1.5918 -  int shift = -1;
  1.5919 -  switch (t) {
  1.5920 -    case T_BYTE:
  1.5921 -      shift = 2;
  1.5922 -      break;
  1.5923 -    case T_SHORT:
  1.5924 -      shift = 1;
  1.5925 -      break;
  1.5926 -    case T_INT:
  1.5927 -      shift = 0;
  1.5928 -      break;
  1.5929 -    default: ShouldNotReachHere();
  1.5930 -  }
  1.5931 -
  1.5932 -  if (t == T_BYTE) {
  1.5933 -    andl(value, 0xff);
  1.5934 -    movl(rtmp, value);
  1.5935 -    shll(rtmp, 8);
  1.5936 -    orl(value, rtmp);
  1.5937 -  }
  1.5938 -  if (t == T_SHORT) {
  1.5939 -    andl(value, 0xffff);
  1.5940 -  }
  1.5941 -  if (t == T_BYTE || t == T_SHORT) {
  1.5942 -    movl(rtmp, value);
  1.5943 -    shll(rtmp, 16);
  1.5944 -    orl(value, rtmp);
  1.5945 -  }
  1.5946 -
  1.5947 -  cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element
  1.5948 -  jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp
  1.5949 -  if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) {
  1.5950 -    // align source address at 4 bytes address boundary
  1.5951 -    if (t == T_BYTE) {
  1.5952 -      // One byte misalignment happens only for byte arrays
  1.5953 -      testptr(to, 1);
  1.5954 -      jccb(Assembler::zero, L_skip_align1);
  1.5955 -      movb(Address(to, 0), value);
  1.5956 -      increment(to);
  1.5957 -      decrement(count);
  1.5958 -      BIND(L_skip_align1);
  1.5959 -    }
  1.5960 -    // Two bytes misalignment happens only for byte and short (char) arrays
  1.5961 -    testptr(to, 2);
  1.5962 -    jccb(Assembler::zero, L_skip_align2);
  1.5963 -    movw(Address(to, 0), value);
  1.5964 -    addptr(to, 2);
  1.5965 -    subl(count, 1<<(shift-1));
  1.5966 -    BIND(L_skip_align2);
  1.5967 -  }
  1.5968 -  if (UseSSE < 2) {
  1.5969 -    Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
  1.5970 -    // Fill 32-byte chunks
  1.5971 -    subl(count, 8 << shift);
  1.5972 -    jcc(Assembler::less, L_check_fill_8_bytes);
  1.5973 -    align(16);
  1.5974 -
  1.5975 -    BIND(L_fill_32_bytes_loop);
  1.5976 -
  1.5977 -    for (int i = 0; i < 32; i += 4) {
  1.5978 -      movl(Address(to, i), value);
  1.5979 -    }
  1.5980 -
  1.5981 -    addptr(to, 32);
  1.5982 -    subl(count, 8 << shift);
  1.5983 -    jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
  1.5984 -    BIND(L_check_fill_8_bytes);
  1.5985 -    addl(count, 8 << shift);
  1.5986 -    jccb(Assembler::zero, L_exit);
  1.5987 -    jmpb(L_fill_8_bytes);
  1.5988 -
  1.5989 -    //
  1.5990 -    // length is too short, just fill qwords
  1.5991 -    //
  1.5992 -    BIND(L_fill_8_bytes_loop);
  1.5993 -    movl(Address(to, 0), value);
  1.5994 -    movl(Address(to, 4), value);
  1.5995 -    addptr(to, 8);
  1.5996 -    BIND(L_fill_8_bytes);
  1.5997 -    subl(count, 1 << (shift + 1));
  1.5998 -    jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
  1.5999 -    // fall through to fill 4 bytes
  1.6000 -  } else {
  1.6001 -    Label L_fill_32_bytes;
  1.6002 -    if (!UseUnalignedLoadStores) {
  1.6003 -      // align to 8 bytes, we know we are 4 byte aligned to start
  1.6004 -      testptr(to, 4);
  1.6005 -      jccb(Assembler::zero, L_fill_32_bytes);
  1.6006 -      movl(Address(to, 0), value);
  1.6007 -      addptr(to, 4);
  1.6008 -      subl(count, 1<<shift);
  1.6009 -    }
  1.6010 -    BIND(L_fill_32_bytes);
  1.6011 -    {
  1.6012 -      assert( UseSSE >= 2, "supported cpu only" );
  1.6013 -      Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
  1.6014 -      // Fill 32-byte chunks
  1.6015 -      movdl(xtmp, value);
  1.6016 -      pshufd(xtmp, xtmp, 0);
  1.6017 -
  1.6018 -      subl(count, 8 << shift);
  1.6019 -      jcc(Assembler::less, L_check_fill_8_bytes);
  1.6020 -      align(16);
  1.6021 -
  1.6022 -      BIND(L_fill_32_bytes_loop);
  1.6023 -
  1.6024 -      if (UseUnalignedLoadStores) {
  1.6025 -        movdqu(Address(to, 0), xtmp);
  1.6026 -        movdqu(Address(to, 16), xtmp);
  1.6027 -      } else {
  1.6028 -        movq(Address(to, 0), xtmp);
  1.6029 -        movq(Address(to, 8), xtmp);
  1.6030 -        movq(Address(to, 16), xtmp);
  1.6031 -        movq(Address(to, 24), xtmp);
  1.6032 -      }
  1.6033 -
  1.6034 -      addptr(to, 32);
  1.6035 -      subl(count, 8 << shift);
  1.6036 -      jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
  1.6037 -      BIND(L_check_fill_8_bytes);
  1.6038 -      addl(count, 8 << shift);
  1.6039 -      jccb(Assembler::zero, L_exit);
  1.6040 -      jmpb(L_fill_8_bytes);
  1.6041 -
  1.6042 -      //
  1.6043 -      // length is too short, just fill qwords
  1.6044 -      //
  1.6045 -      BIND(L_fill_8_bytes_loop);
  1.6046 -      movq(Address(to, 0), xtmp);
  1.6047 -      addptr(to, 8);
  1.6048 -      BIND(L_fill_8_bytes);
  1.6049 -      subl(count, 1 << (shift + 1));
  1.6050 -      jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
  1.6051 -    }
  1.6052 -  }
  1.6053 -  // fill trailing 4 bytes
  1.6054 -  BIND(L_fill_4_bytes);
  1.6055 -  testl(count, 1<<shift);
  1.6056 -  jccb(Assembler::zero, L_fill_2_bytes);
  1.6057 -  movl(Address(to, 0), value);
  1.6058 -  if (t == T_BYTE || t == T_SHORT) {
  1.6059 -    addptr(to, 4);
  1.6060 -    BIND(L_fill_2_bytes);
  1.6061 -    // fill trailing 2 bytes
  1.6062 -    testl(count, 1<<(shift-1));
  1.6063 -    jccb(Assembler::zero, L_fill_byte);
  1.6064 -    movw(Address(to, 0), value);
  1.6065 -    if (t == T_BYTE) {
  1.6066 -      addptr(to, 2);
  1.6067 -      BIND(L_fill_byte);
  1.6068 -      // fill trailing byte
  1.6069 -      testl(count, 1);
  1.6070 -      jccb(Assembler::zero, L_exit);
  1.6071 -      movb(Address(to, 0), value);
  1.6072 -    } else {
  1.6073 -      BIND(L_fill_byte);
  1.6074 -    }
  1.6075 -  } else {
  1.6076 -    BIND(L_fill_2_bytes);
  1.6077 -  }
  1.6078 -  BIND(L_exit);
  1.6079 -}
  1.6080 -#undef BIND
  1.6081 -#undef BLOCK_COMMENT
  1.6082 -
  1.6083 -
  1.6084 -Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
  1.6085 -  switch (cond) {
  1.6086 -    // Note some conditions are synonyms for others
  1.6087 -    case Assembler::zero:         return Assembler::notZero;
  1.6088 -    case Assembler::notZero:      return Assembler::zero;
  1.6089 -    case Assembler::less:         return Assembler::greaterEqual;
  1.6090 -    case Assembler::lessEqual:    return Assembler::greater;
  1.6091 -    case Assembler::greater:      return Assembler::lessEqual;
  1.6092 -    case Assembler::greaterEqual: return Assembler::less;
  1.6093 -    case Assembler::below:        return Assembler::aboveEqual;
  1.6094 -    case Assembler::belowEqual:   return Assembler::above;
  1.6095 -    case Assembler::above:        return Assembler::belowEqual;
  1.6096 -    case Assembler::aboveEqual:   return Assembler::below;
  1.6097 -    case Assembler::overflow:     return Assembler::noOverflow;
  1.6098 -    case Assembler::noOverflow:   return Assembler::overflow;
  1.6099 -    case Assembler::negative:     return Assembler::positive;
  1.6100 -    case Assembler::positive:     return Assembler::negative;
  1.6101 -    case Assembler::parity:       return Assembler::noParity;
  1.6102 -    case Assembler::noParity:     return Assembler::parity;
  1.6103 -  }
  1.6104 -  ShouldNotReachHere(); return Assembler::overflow;
  1.6105 -}
  1.6106 -
  1.6107 -SkipIfEqual::SkipIfEqual(
  1.6108 -    MacroAssembler* masm, const bool* flag_addr, bool value) {
  1.6109 -  _masm = masm;
  1.6110 -  _masm->cmp8(ExternalAddress((address)flag_addr), value);
  1.6111 -  _masm->jcc(Assembler::equal, _label);
  1.6112 -}
  1.6113 -
  1.6114 -SkipIfEqual::~SkipIfEqual() {
  1.6115 -  _masm->bind(_label);
  1.6116 -}

mercurial