src/cpu/x86/vm/assembler_x86.hpp

changeset 739
dc7f315e41f7
parent 631
d1605aabd0a1
child 797
f8199438385b
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/cpu/x86/vm/assembler_x86.hpp	Wed Aug 27 00:21:55 2008 -0700
     1.3 @@ -0,0 +1,2044 @@
     1.4 +/*
     1.5 + * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
     1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.7 + *
     1.8 + * This code is free software; you can redistribute it and/or modify it
     1.9 + * under the terms of the GNU General Public License version 2 only, as
    1.10 + * published by the Free Software Foundation.
    1.11 + *
    1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.15 + * version 2 for more details (a copy is included in the LICENSE file that
    1.16 + * accompanied this code).
    1.17 + *
    1.18 + * You should have received a copy of the GNU General Public License version
    1.19 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.21 + *
    1.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    1.23 + * CA 95054 USA or visit www.sun.com if you need additional information or
    1.24 + * have any questions.
    1.25 + *
    1.26 + */
    1.27 +
    1.28 +class BiasedLockingCounters;
    1.29 +
    1.30 +// Contains all the definitions needed for x86 assembly code generation.
    1.31 +
    1.32 +// Calling convention
    1.33 +class Argument VALUE_OBJ_CLASS_SPEC {
    1.34 + public:
    1.35 +  enum {
    1.36 +#ifdef _LP64
    1.37 +#ifdef _WIN64
    1.38 +    n_int_register_parameters_c   = 4, // rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
    1.39 +    n_float_register_parameters_c = 4,  // xmm0 - xmm3 (c_farg0, c_farg1, ... )
    1.40 +#else
    1.41 +    n_int_register_parameters_c   = 6, // rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
    1.42 +    n_float_register_parameters_c = 8,  // xmm0 - xmm7 (c_farg0, c_farg1, ... )
    1.43 +#endif // _WIN64
    1.44 +    n_int_register_parameters_j   = 6, // j_rarg0, j_rarg1, ...
    1.45 +    n_float_register_parameters_j = 8  // j_farg0, j_farg1, ...
    1.46 +#else
    1.47 +    n_register_parameters = 0   // 0 registers used to pass arguments
    1.48 +#endif // _LP64
    1.49 +  };
    1.50 +};
    1.51 +
    1.52 +
    1.53 +#ifdef _LP64
    1.54 +// Symbolically name the register arguments used by the c calling convention.
    1.55 +// Windows is different from linux/solaris. So much for standards...
    1.56 +
    1.57 +#ifdef _WIN64
    1.58 +
    1.59 +REGISTER_DECLARATION(Register, c_rarg0, rcx);
    1.60 +REGISTER_DECLARATION(Register, c_rarg1, rdx);
    1.61 +REGISTER_DECLARATION(Register, c_rarg2, r8);
    1.62 +REGISTER_DECLARATION(Register, c_rarg3, r9);
    1.63 +
    1.64 +REGISTER_DECLARATION(XMMRegister, c_farg0, xmm0);
    1.65 +REGISTER_DECLARATION(XMMRegister, c_farg1, xmm1);
    1.66 +REGISTER_DECLARATION(XMMRegister, c_farg2, xmm2);
    1.67 +REGISTER_DECLARATION(XMMRegister, c_farg3, xmm3);
    1.68 +
    1.69 +#else
    1.70 +
    1.71 +REGISTER_DECLARATION(Register, c_rarg0, rdi);
    1.72 +REGISTER_DECLARATION(Register, c_rarg1, rsi);
    1.73 +REGISTER_DECLARATION(Register, c_rarg2, rdx);
    1.74 +REGISTER_DECLARATION(Register, c_rarg3, rcx);
    1.75 +REGISTER_DECLARATION(Register, c_rarg4, r8);
    1.76 +REGISTER_DECLARATION(Register, c_rarg5, r9);
    1.77 +
    1.78 +REGISTER_DECLARATION(XMMRegister, c_farg0, xmm0);
    1.79 +REGISTER_DECLARATION(XMMRegister, c_farg1, xmm1);
    1.80 +REGISTER_DECLARATION(XMMRegister, c_farg2, xmm2);
    1.81 +REGISTER_DECLARATION(XMMRegister, c_farg3, xmm3);
    1.82 +REGISTER_DECLARATION(XMMRegister, c_farg4, xmm4);
    1.83 +REGISTER_DECLARATION(XMMRegister, c_farg5, xmm5);
    1.84 +REGISTER_DECLARATION(XMMRegister, c_farg6, xmm6);
    1.85 +REGISTER_DECLARATION(XMMRegister, c_farg7, xmm7);
    1.86 +
    1.87 +#endif // _WIN64
    1.88 +
    1.89 +// Symbolically name the register arguments used by the Java calling convention.
    1.90 +// We have control over the convention for java so we can do what we please.
    1.91 +// What pleases us is to offset the java calling convention so that when
    1.92 +// we call a suitable jni method the arguments are lined up and we don't
    1.93 +// have to do little shuffling. A suitable jni method is non-static and a
    1.94 +// small number of arguments (two fewer args on windows)
    1.95 +//
    1.96 +//        |-------------------------------------------------------|
    1.97 +//        | c_rarg0   c_rarg1  c_rarg2 c_rarg3 c_rarg4 c_rarg5    |
    1.98 +//        |-------------------------------------------------------|
    1.99 +//        | rcx       rdx      r8      r9      rdi*    rsi*       | windows (* not a c_rarg)
   1.100 +//        | rdi       rsi      rdx     rcx     r8      r9         | solaris/linux
   1.101 +//        |-------------------------------------------------------|
   1.102 +//        | j_rarg5   j_rarg0  j_rarg1 j_rarg2 j_rarg3 j_rarg4    |
   1.103 +//        |-------------------------------------------------------|
   1.104 +
   1.105 +REGISTER_DECLARATION(Register, j_rarg0, c_rarg1);
   1.106 +REGISTER_DECLARATION(Register, j_rarg1, c_rarg2);
   1.107 +REGISTER_DECLARATION(Register, j_rarg2, c_rarg3);
   1.108 +// Windows runs out of register args here
   1.109 +#ifdef _WIN64
   1.110 +REGISTER_DECLARATION(Register, j_rarg3, rdi);
   1.111 +REGISTER_DECLARATION(Register, j_rarg4, rsi);
   1.112 +#else
   1.113 +REGISTER_DECLARATION(Register, j_rarg3, c_rarg4);
   1.114 +REGISTER_DECLARATION(Register, j_rarg4, c_rarg5);
   1.115 +#endif /* _WIN64 */
   1.116 +REGISTER_DECLARATION(Register, j_rarg5, c_rarg0);
   1.117 +
   1.118 +REGISTER_DECLARATION(XMMRegister, j_farg0, xmm0);
   1.119 +REGISTER_DECLARATION(XMMRegister, j_farg1, xmm1);
   1.120 +REGISTER_DECLARATION(XMMRegister, j_farg2, xmm2);
   1.121 +REGISTER_DECLARATION(XMMRegister, j_farg3, xmm3);
   1.122 +REGISTER_DECLARATION(XMMRegister, j_farg4, xmm4);
   1.123 +REGISTER_DECLARATION(XMMRegister, j_farg5, xmm5);
   1.124 +REGISTER_DECLARATION(XMMRegister, j_farg6, xmm6);
   1.125 +REGISTER_DECLARATION(XMMRegister, j_farg7, xmm7);
   1.126 +
   1.127 +REGISTER_DECLARATION(Register, rscratch1, r10);  // volatile
   1.128 +REGISTER_DECLARATION(Register, rscratch2, r11);  // volatile
   1.129 +
   1.130 +REGISTER_DECLARATION(Register, r12_heapbase, r12); // callee-saved
   1.131 +REGISTER_DECLARATION(Register, r15_thread, r15); // callee-saved
   1.132 +
   1.133 +#else
   1.134 +// rscratch1 will apear in 32bit code that is dead but of course must compile
   1.135 +// Using noreg ensures if the dead code is incorrectly live and executed it
   1.136 +// will cause an assertion failure
   1.137 +#define rscratch1 noreg
   1.138 +
   1.139 +#endif // _LP64
   1.140 +
   1.141 +// Address is an abstraction used to represent a memory location
   1.142 +// using any of the amd64 addressing modes with one object.
   1.143 +//
   1.144 +// Note: A register location is represented via a Register, not
   1.145 +//       via an address for efficiency & simplicity reasons.
   1.146 +
   1.147 +class ArrayAddress;
   1.148 +
   1.149 +class Address VALUE_OBJ_CLASS_SPEC {
   1.150 + public:
   1.151 +  enum ScaleFactor {
   1.152 +    no_scale = -1,
   1.153 +    times_1  =  0,
   1.154 +    times_2  =  1,
   1.155 +    times_4  =  2,
   1.156 +    times_8  =  3,
   1.157 +    times_ptr = LP64_ONLY(times_8) NOT_LP64(times_4)
   1.158 +  };
   1.159 +
   1.160 + private:
   1.161 +  Register         _base;
   1.162 +  Register         _index;
   1.163 +  ScaleFactor      _scale;
   1.164 +  int              _disp;
   1.165 +  RelocationHolder _rspec;
   1.166 +
   1.167 +  // Easily misused constructors make them private
   1.168 +  // %%% can we make these go away?
   1.169 +  NOT_LP64(Address(address loc, RelocationHolder spec);)
   1.170 +  Address(int disp, address loc, relocInfo::relocType rtype);
   1.171 +  Address(int disp, address loc, RelocationHolder spec);
   1.172 +
   1.173 + public:
   1.174 +
   1.175 + int disp() { return _disp; }
   1.176 +  // creation
   1.177 +  Address()
   1.178 +    : _base(noreg),
   1.179 +      _index(noreg),
   1.180 +      _scale(no_scale),
   1.181 +      _disp(0) {
   1.182 +  }
   1.183 +
   1.184 +  // No default displacement otherwise Register can be implicitly
   1.185 +  // converted to 0(Register) which is quite a different animal.
   1.186 +
   1.187 +  Address(Register base, int disp)
   1.188 +    : _base(base),
   1.189 +      _index(noreg),
   1.190 +      _scale(no_scale),
   1.191 +      _disp(disp) {
   1.192 +  }
   1.193 +
   1.194 +  Address(Register base, Register index, ScaleFactor scale, int disp = 0)
   1.195 +    : _base (base),
   1.196 +      _index(index),
   1.197 +      _scale(scale),
   1.198 +      _disp (disp) {
   1.199 +    assert(!index->is_valid() == (scale == Address::no_scale),
   1.200 +           "inconsistent address");
   1.201 +  }
   1.202 +
   1.203 +  // The following two overloads are used in connection with the
   1.204 +  // ByteSize type (see sizes.hpp).  They simplify the use of
   1.205 +  // ByteSize'd arguments in assembly code. Note that their equivalent
   1.206 +  // for the optimized build are the member functions with int disp
   1.207 +  // argument since ByteSize is mapped to an int type in that case.
   1.208 +  //
   1.209 +  // Note: DO NOT introduce similar overloaded functions for WordSize
   1.210 +  // arguments as in the optimized mode, both ByteSize and WordSize
   1.211 +  // are mapped to the same type and thus the compiler cannot make a
   1.212 +  // distinction anymore (=> compiler errors).
   1.213 +
   1.214 +#ifdef ASSERT
   1.215 +  Address(Register base, ByteSize disp)
   1.216 +    : _base(base),
   1.217 +      _index(noreg),
   1.218 +      _scale(no_scale),
   1.219 +      _disp(in_bytes(disp)) {
   1.220 +  }
   1.221 +
   1.222 +  Address(Register base, Register index, ScaleFactor scale, ByteSize disp)
   1.223 +    : _base(base),
   1.224 +      _index(index),
   1.225 +      _scale(scale),
   1.226 +      _disp(in_bytes(disp)) {
   1.227 +    assert(!index->is_valid() == (scale == Address::no_scale),
   1.228 +           "inconsistent address");
   1.229 +  }
   1.230 +#endif // ASSERT
   1.231 +
   1.232 +  // accessors
   1.233 +  bool uses(Register reg) const {
   1.234 +    return _base == reg || _index == reg;
   1.235 +  }
   1.236 +
   1.237 +  // Convert the raw encoding form into the form expected by the constructor for
   1.238 +  // Address.  An index of 4 (rsp) corresponds to having no index, so convert
   1.239 +  // that to noreg for the Address constructor.
   1.240 +  static Address make_raw(int base, int index, int scale, int disp);
   1.241 +
   1.242 +  static Address make_array(ArrayAddress);
   1.243 +
   1.244 +
   1.245 + private:
   1.246 +  bool base_needs_rex() const {
   1.247 +    return _base != noreg && _base->encoding() >= 8;
   1.248 +  }
   1.249 +
   1.250 +  bool index_needs_rex() const {
   1.251 +    return _index != noreg &&_index->encoding() >= 8;
   1.252 +  }
   1.253 +
   1.254 +  relocInfo::relocType reloc() const { return _rspec.type(); }
   1.255 +
   1.256 +  friend class Assembler;
   1.257 +  friend class MacroAssembler;
   1.258 +  friend class LIR_Assembler; // base/index/scale/disp
   1.259 +};
   1.260 +
   1.261 +//
   1.262 +// AddressLiteral has been split out from Address because operands of this type
   1.263 +// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out
   1.264 +// the few instructions that need to deal with address literals are unique and the
   1.265 +// MacroAssembler does not have to implement every instruction in the Assembler
   1.266 +// in order to search for address literals that may need special handling depending
   1.267 +// on the instruction and the platform. As small step on the way to merging i486/amd64
   1.268 +// directories.
   1.269 +//
   1.270 +class AddressLiteral VALUE_OBJ_CLASS_SPEC {
   1.271 +  friend class ArrayAddress;
   1.272 +  RelocationHolder _rspec;
   1.273 +  // Typically we use AddressLiterals we want to use their rval
   1.274 +  // However in some situations we want the lval (effect address) of the item.
   1.275 +  // We provide a special factory for making those lvals.
   1.276 +  bool _is_lval;
   1.277 +
   1.278 +  // If the target is far we'll need to load the ea of this to
   1.279 +  // a register to reach it. Otherwise if near we can do rip
   1.280 +  // relative addressing.
   1.281 +
   1.282 +  address          _target;
   1.283 +
   1.284 + protected:
   1.285 +  // creation
   1.286 +  AddressLiteral()
   1.287 +    : _is_lval(false),
   1.288 +      _target(NULL)
   1.289 +  {}
   1.290 +
   1.291 +  public:
   1.292 +
   1.293 +
   1.294 +  AddressLiteral(address target, relocInfo::relocType rtype);
   1.295 +
   1.296 +  AddressLiteral(address target, RelocationHolder const& rspec)
   1.297 +    : _rspec(rspec),
   1.298 +      _is_lval(false),
   1.299 +      _target(target)
   1.300 +  {}
   1.301 +
   1.302 +  AddressLiteral addr() {
   1.303 +    AddressLiteral ret = *this;
   1.304 +    ret._is_lval = true;
   1.305 +    return ret;
   1.306 +  }
   1.307 +
   1.308 +
   1.309 + private:
   1.310 +
   1.311 +  address target() { return _target; }
   1.312 +  bool is_lval() { return _is_lval; }
   1.313 +
   1.314 +  relocInfo::relocType reloc() const { return _rspec.type(); }
   1.315 +  const RelocationHolder& rspec() const { return _rspec; }
   1.316 +
   1.317 +  friend class Assembler;
   1.318 +  friend class MacroAssembler;
   1.319 +  friend class Address;
   1.320 +  friend class LIR_Assembler;
   1.321 +};
   1.322 +
   1.323 +// Convience classes
   1.324 +class RuntimeAddress: public AddressLiteral {
   1.325 +
   1.326 +  public:
   1.327 +
   1.328 +  RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {}
   1.329 +
   1.330 +};
   1.331 +
   1.332 +class OopAddress: public AddressLiteral {
   1.333 +
   1.334 +  public:
   1.335 +
   1.336 +  OopAddress(address target) : AddressLiteral(target, relocInfo::oop_type){}
   1.337 +
   1.338 +};
   1.339 +
   1.340 +class ExternalAddress: public AddressLiteral {
   1.341 +
   1.342 +  public:
   1.343 +
   1.344 +  ExternalAddress(address target) : AddressLiteral(target, relocInfo::external_word_type){}
   1.345 +
   1.346 +};
   1.347 +
   1.348 +class InternalAddress: public AddressLiteral {
   1.349 +
   1.350 +  public:
   1.351 +
   1.352 +  InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {}
   1.353 +
   1.354 +};
   1.355 +
   1.356 +// x86 can do array addressing as a single operation since disp can be an absolute
   1.357 +// address amd64 can't. We create a class that expresses the concept but does extra
   1.358 +// magic on amd64 to get the final result
   1.359 +
   1.360 +class ArrayAddress VALUE_OBJ_CLASS_SPEC {
   1.361 +  private:
   1.362 +
   1.363 +  AddressLiteral _base;
   1.364 +  Address        _index;
   1.365 +
   1.366 +  public:
   1.367 +
   1.368 +  ArrayAddress() {};
   1.369 +  ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {};
   1.370 +  AddressLiteral base() { return _base; }
   1.371 +  Address index() { return _index; }
   1.372 +
   1.373 +};
   1.374 +
   1.375 +const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY( 512 / wordSize);
   1.376 +
   1.377 +// The Intel x86/Amd64 Assembler: Pure assembler doing NO optimizations on the instruction
   1.378 +// level (e.g. mov rax, 0 is not translated into xor rax, rax!); i.e., what you write
   1.379 +// is what you get. The Assembler is generating code into a CodeBuffer.
   1.380 +
   1.381 +class Assembler : public AbstractAssembler  {
   1.382 +  friend class AbstractAssembler; // for the non-virtual hack
   1.383 +  friend class LIR_Assembler; // as_Address()
   1.384 +  friend class StubGenerator;
   1.385 +
   1.386 + public:
   1.387 +  enum Condition {                     // The x86 condition codes used for conditional jumps/moves.
   1.388 +    zero          = 0x4,
   1.389 +    notZero       = 0x5,
   1.390 +    equal         = 0x4,
   1.391 +    notEqual      = 0x5,
   1.392 +    less          = 0xc,
   1.393 +    lessEqual     = 0xe,
   1.394 +    greater       = 0xf,
   1.395 +    greaterEqual  = 0xd,
   1.396 +    below         = 0x2,
   1.397 +    belowEqual    = 0x6,
   1.398 +    above         = 0x7,
   1.399 +    aboveEqual    = 0x3,
   1.400 +    overflow      = 0x0,
   1.401 +    noOverflow    = 0x1,
   1.402 +    carrySet      = 0x2,
   1.403 +    carryClear    = 0x3,
   1.404 +    negative      = 0x8,
   1.405 +    positive      = 0x9,
   1.406 +    parity        = 0xa,
   1.407 +    noParity      = 0xb
   1.408 +  };
   1.409 +
   1.410 +  enum Prefix {
   1.411 +    // segment overrides
   1.412 +    CS_segment = 0x2e,
   1.413 +    SS_segment = 0x36,
   1.414 +    DS_segment = 0x3e,
   1.415 +    ES_segment = 0x26,
   1.416 +    FS_segment = 0x64,
   1.417 +    GS_segment = 0x65,
   1.418 +
   1.419 +    REX        = 0x40,
   1.420 +
   1.421 +    REX_B      = 0x41,
   1.422 +    REX_X      = 0x42,
   1.423 +    REX_XB     = 0x43,
   1.424 +    REX_R      = 0x44,
   1.425 +    REX_RB     = 0x45,
   1.426 +    REX_RX     = 0x46,
   1.427 +    REX_RXB    = 0x47,
   1.428 +
   1.429 +    REX_W      = 0x48,
   1.430 +
   1.431 +    REX_WB     = 0x49,
   1.432 +    REX_WX     = 0x4A,
   1.433 +    REX_WXB    = 0x4B,
   1.434 +    REX_WR     = 0x4C,
   1.435 +    REX_WRB    = 0x4D,
   1.436 +    REX_WRX    = 0x4E,
   1.437 +    REX_WRXB   = 0x4F
   1.438 +  };
   1.439 +
   1.440 +  enum WhichOperand {
   1.441 +    // input to locate_operand, and format code for relocations
   1.442 +    imm_operand  = 0,            // embedded 32-bit|64-bit immediate operand
   1.443 +    disp32_operand = 1,          // embedded 32-bit displacement or address
   1.444 +    call32_operand = 2,          // embedded 32-bit self-relative displacement
   1.445 +#ifndef _LP64
   1.446 +    _WhichOperand_limit = 3
   1.447 +#else
   1.448 +     narrow_oop_operand = 3,     // embedded 32-bit immediate narrow oop
   1.449 +    _WhichOperand_limit = 4
   1.450 +#endif
   1.451 +  };
   1.452 +
   1.453 +
   1.454 +
   1.455 +  // NOTE: The general philopsophy of the declarations here is that 64bit versions
   1.456 +  // of instructions are freely declared without the need for wrapping them an ifdef.
   1.457 +  // (Some dangerous instructions are ifdef's out of inappropriate jvm's.)
   1.458 +  // In the .cpp file the implementations are wrapped so that they are dropped out
   1.459 +  // of the resulting jvm. This is done mostly to keep the footprint of KERNEL
   1.460 +  // to the size it was prior to merging up the 32bit and 64bit assemblers.
   1.461 +  //
   1.462 +  // This does mean you'll get a linker/runtime error if you use a 64bit only instruction
   1.463 +  // in a 32bit vm. This is somewhat unfortunate but keeps the ifdef noise down.
   1.464 +
   1.465 +private:
   1.466 +
   1.467 +
   1.468 +  // 64bit prefixes
   1.469 +  int prefix_and_encode(int reg_enc, bool byteinst = false);
   1.470 +  int prefixq_and_encode(int reg_enc);
   1.471 +
   1.472 +  int prefix_and_encode(int dst_enc, int src_enc, bool byteinst = false);
   1.473 +  int prefixq_and_encode(int dst_enc, int src_enc);
   1.474 +
   1.475 +  void prefix(Register reg);
   1.476 +  void prefix(Address adr);
   1.477 +  void prefixq(Address adr);
   1.478 +
   1.479 +  void prefix(Address adr, Register reg,  bool byteinst = false);
   1.480 +  void prefixq(Address adr, Register reg);
   1.481 +
   1.482 +  void prefix(Address adr, XMMRegister reg);
   1.483 +
   1.484 +  void prefetch_prefix(Address src);
   1.485 +
   1.486 +  // Helper functions for groups of instructions
   1.487 +  void emit_arith_b(int op1, int op2, Register dst, int imm8);
   1.488 +
   1.489 +  void emit_arith(int op1, int op2, Register dst, int32_t imm32);
   1.490 +  // only 32bit??
   1.491 +  void emit_arith(int op1, int op2, Register dst, jobject obj);
   1.492 +  void emit_arith(int op1, int op2, Register dst, Register src);
   1.493 +
   1.494 +  void emit_operand(Register reg,
   1.495 +                    Register base, Register index, Address::ScaleFactor scale,
   1.496 +                    int disp,
   1.497 +                    RelocationHolder const& rspec,
   1.498 +                    int rip_relative_correction = 0);
   1.499 +
   1.500 +  void emit_operand(Register reg, Address adr, int rip_relative_correction = 0);
   1.501 +
   1.502 +  // operands that only take the original 32bit registers
   1.503 +  void emit_operand32(Register reg, Address adr);
   1.504 +
   1.505 +  void emit_operand(XMMRegister reg,
   1.506 +                    Register base, Register index, Address::ScaleFactor scale,
   1.507 +                    int disp,
   1.508 +                    RelocationHolder const& rspec);
   1.509 +
   1.510 +  void emit_operand(XMMRegister reg, Address adr);
   1.511 +
   1.512 +  void emit_operand(MMXRegister reg, Address adr);
   1.513 +
   1.514 +  // workaround gcc (3.2.1-7) bug
   1.515 +  void emit_operand(Address adr, MMXRegister reg);
   1.516 +
   1.517 +
   1.518 +  // Immediate-to-memory forms
   1.519 +  void emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32);
   1.520 +
   1.521 +  void emit_farith(int b1, int b2, int i);
   1.522 +
   1.523 +
   1.524 + protected:
   1.525 +  #ifdef ASSERT
   1.526 +  void check_relocation(RelocationHolder const& rspec, int format);
   1.527 +  #endif
   1.528 +
   1.529 +  inline void emit_long64(jlong x);
   1.530 +
   1.531 +  void emit_data(jint data, relocInfo::relocType    rtype, int format);
   1.532 +  void emit_data(jint data, RelocationHolder const& rspec, int format);
   1.533 +  void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0);
   1.534 +  void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0);
   1.535 +
   1.536 +
   1.537 +  bool reachable(AddressLiteral adr) NOT_LP64({ return true;});
   1.538 +
   1.539 +  // These are all easily abused and hence protected
   1.540 +
   1.541 +  void mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec, int format = 0);
   1.542 +
   1.543 +  // 32BIT ONLY SECTION
   1.544 +#ifndef _LP64
   1.545 +  // Make these disappear in 64bit mode since they would never be correct
   1.546 +  void cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec);   // 32BIT ONLY
   1.547 +  void cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec);    // 32BIT ONLY
   1.548 +
   1.549 +  void mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec);     // 32BIT ONLY
   1.550 +
   1.551 +  void push_literal32(int32_t imm32, RelocationHolder const& rspec);                 // 32BIT ONLY
   1.552 +#else
   1.553 +  // 64BIT ONLY SECTION
   1.554 +  void mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec);   // 64BIT ONLY
   1.555 +#endif // _LP64
   1.556 +
   1.557 +  // These are unique in that we are ensured by the caller that the 32bit
   1.558 +  // relative in these instructions will always be able to reach the potentially
   1.559 +  // 64bit address described by entry. Since they can take a 64bit address they
   1.560 +  // don't have the 32 suffix like the other instructions in this class.
   1.561 +
   1.562 +  void call_literal(address entry, RelocationHolder const& rspec);
   1.563 +  void jmp_literal(address entry, RelocationHolder const& rspec);
   1.564 +
   1.565 +  // Avoid using directly section
   1.566 +  // Instructions in this section are actually usable by anyone without danger
   1.567 +  // of failure but have performance issues that are addressed my enhanced
   1.568 +  // instructions which will do the proper thing base on the particular cpu.
   1.569 +  // We protect them because we don't trust you...
   1.570 +
   1.571 +  // Don't use next inc() and dec() methods directly. INC & DEC instructions
   1.572 +  // could cause a partial flag stall since they don't set CF flag.
   1.573 +  // Use MacroAssembler::decrement() & MacroAssembler::increment() methods
   1.574 +  // which call inc() & dec() or add() & sub() in accordance with
   1.575 +  // the product flag UseIncDec value.
   1.576 +
   1.577 +  void decl(Register dst);
   1.578 +  void decl(Address dst);
   1.579 +  void decq(Register dst);
   1.580 +  void decq(Address dst);
   1.581 +
   1.582 +  void incl(Register dst);
   1.583 +  void incl(Address dst);
   1.584 +  void incq(Register dst);
   1.585 +  void incq(Address dst);
   1.586 +
   1.587 +  // New cpus require use of movsd and movss to avoid partial register stall
   1.588 +  // when loading from memory. But for old Opteron use movlpd instead of movsd.
   1.589 +  // The selection is done in MacroAssembler::movdbl() and movflt().
   1.590 +
   1.591 +  // Move Scalar Single-Precision Floating-Point Values
   1.592 +  void movss(XMMRegister dst, Address src);
   1.593 +  void movss(XMMRegister dst, XMMRegister src);
   1.594 +  void movss(Address dst, XMMRegister src);
   1.595 +
   1.596 +  // Move Scalar Double-Precision Floating-Point Values
   1.597 +  void movsd(XMMRegister dst, Address src);
   1.598 +  void movsd(XMMRegister dst, XMMRegister src);
   1.599 +  void movsd(Address dst, XMMRegister src);
   1.600 +  void movlpd(XMMRegister dst, Address src);
   1.601 +
   1.602 +  // New cpus require use of movaps and movapd to avoid partial register stall
   1.603 +  // when moving between registers.
   1.604 +  void movaps(XMMRegister dst, XMMRegister src);
   1.605 +  void movapd(XMMRegister dst, XMMRegister src);
   1.606 +
   1.607 +  // End avoid using directly
   1.608 +
   1.609 +
   1.610 +  // Instruction prefixes
   1.611 +  void prefix(Prefix p);
   1.612 +
   1.613 +  public:
   1.614 +
   1.615 +  // Creation
   1.616 +  Assembler(CodeBuffer* code) : AbstractAssembler(code) {}
   1.617 +
   1.618 +  // Decoding
   1.619 +  static address locate_operand(address inst, WhichOperand which);
   1.620 +  static address locate_next_instruction(address inst);
   1.621 +
   1.622 +  // Utilities
   1.623 +
   1.624 +#ifdef _LP64
   1.625 + static bool is_simm(int64_t x, int nbits) { return -( CONST64(1) << (nbits-1) )  <= x   &&   x  <  ( CONST64(1) << (nbits-1) ); }
   1.626 + static bool is_simm32(int64_t x) { return x == (int64_t)(int32_t)x; }
   1.627 +#else
   1.628 + static bool is_simm(int32_t x, int nbits) { return -( 1 << (nbits-1) )  <= x   &&   x  <  ( 1 << (nbits-1) ); }
   1.629 + static bool is_simm32(int32_t x) { return true; }
   1.630 +#endif // LP64
   1.631 +
   1.632 +  // Generic instructions
   1.633 +  // Does 32bit or 64bit as needed for the platform. In some sense these
   1.634 +  // belong in macro assembler but there is no need for both varieties to exist
   1.635 +
   1.636 +  void lea(Register dst, Address src);
   1.637 +
   1.638 +  void mov(Register dst, Register src);
   1.639 +
   1.640 +  void pusha();
   1.641 +  void popa();
   1.642 +
   1.643 +  void pushf();
   1.644 +  void popf();
   1.645 +
   1.646 +  void push(int32_t imm32);
   1.647 +
   1.648 +  void push(Register src);
   1.649 +
   1.650 +  void pop(Register dst);
   1.651 +
   1.652 +  // These are dummies to prevent surprise implicit conversions to Register
   1.653 +  void push(void* v);
   1.654 +  void pop(void* v);
   1.655 +
   1.656 +
   1.657 +  // These do register sized moves/scans
   1.658 +  void rep_mov();
   1.659 +  void rep_set();
   1.660 +  void repne_scan();
   1.661 +#ifdef _LP64
   1.662 +  void repne_scanl();
   1.663 +#endif
   1.664 +
   1.665 +  // Vanilla instructions in lexical order
   1.666 +
   1.667 +  void adcl(Register dst, int32_t imm32);
   1.668 +  void adcl(Register dst, Address src);
   1.669 +  void adcl(Register dst, Register src);
   1.670 +
   1.671 +  void adcq(Register dst, int32_t imm32);
   1.672 +  void adcq(Register dst, Address src);
   1.673 +  void adcq(Register dst, Register src);
   1.674 +
   1.675 +
   1.676 +  void addl(Address dst, int32_t imm32);
   1.677 +  void addl(Address dst, Register src);
   1.678 +  void addl(Register dst, int32_t imm32);
   1.679 +  void addl(Register dst, Address src);
   1.680 +  void addl(Register dst, Register src);
   1.681 +
   1.682 +  void addq(Address dst, int32_t imm32);
   1.683 +  void addq(Address dst, Register src);
   1.684 +  void addq(Register dst, int32_t imm32);
   1.685 +  void addq(Register dst, Address src);
   1.686 +  void addq(Register dst, Register src);
   1.687 +
   1.688 +
   1.689 +  void addr_nop_4();
   1.690 +  void addr_nop_5();
   1.691 +  void addr_nop_7();
   1.692 +  void addr_nop_8();
   1.693 +
   1.694 +  // Add Scalar Double-Precision Floating-Point Values
   1.695 +  void addsd(XMMRegister dst, Address src);
   1.696 +  void addsd(XMMRegister dst, XMMRegister src);
   1.697 +
   1.698 +  // Add Scalar Single-Precision Floating-Point Values
   1.699 +  void addss(XMMRegister dst, Address src);
   1.700 +  void addss(XMMRegister dst, XMMRegister src);
   1.701 +
   1.702 +  void andl(Register dst, int32_t imm32);
   1.703 +  void andl(Register dst, Address src);
   1.704 +  void andl(Register dst, Register src);
   1.705 +
   1.706 +  void andq(Register dst, int32_t imm32);
   1.707 +  void andq(Register dst, Address src);
   1.708 +  void andq(Register dst, Register src);
   1.709 +
   1.710 +
   1.711 +  // Bitwise Logical AND of Packed Double-Precision Floating-Point Values
   1.712 +  void andpd(XMMRegister dst, Address src);
   1.713 +  void andpd(XMMRegister dst, XMMRegister src);
   1.714 +
   1.715 +  void bswapl(Register reg);
   1.716 +
   1.717 +  void bswapq(Register reg);
   1.718 +
   1.719 +  void call(Label& L, relocInfo::relocType rtype);
   1.720 +  void call(Register reg);  // push pc; pc <- reg
   1.721 +  void call(Address adr);   // push pc; pc <- adr
   1.722 +
   1.723 +  void cdql();
   1.724 +
   1.725 +  void cdqq();
   1.726 +
   1.727 +  void cld() { emit_byte(0xfc); }
   1.728 +
   1.729 +  void clflush(Address adr);
   1.730 +
   1.731 +  void cmovl(Condition cc, Register dst, Register src);
   1.732 +  void cmovl(Condition cc, Register dst, Address src);
   1.733 +
   1.734 +  void cmovq(Condition cc, Register dst, Register src);
   1.735 +  void cmovq(Condition cc, Register dst, Address src);
   1.736 +
   1.737 +
   1.738 +  void cmpb(Address dst, int imm8);
   1.739 +
   1.740 +  void cmpl(Address dst, int32_t imm32);
   1.741 +
   1.742 +  void cmpl(Register dst, int32_t imm32);
   1.743 +  void cmpl(Register dst, Register src);
   1.744 +  void cmpl(Register dst, Address src);
   1.745 +
   1.746 +  void cmpq(Address dst, int32_t imm32);
   1.747 +  void cmpq(Address dst, Register src);
   1.748 +
   1.749 +  void cmpq(Register dst, int32_t imm32);
   1.750 +  void cmpq(Register dst, Register src);
   1.751 +  void cmpq(Register dst, Address src);
   1.752 +
   1.753 +  // these are dummies used to catch attempting to convert NULL to Register
   1.754 +  void cmpl(Register dst, void* junk); // dummy
   1.755 +  void cmpq(Register dst, void* junk); // dummy
   1.756 +
   1.757 +  void cmpw(Address dst, int imm16);
   1.758 +
   1.759 +  void cmpxchg8 (Address adr);
   1.760 +
   1.761 +  void cmpxchgl(Register reg, Address adr);
   1.762 +
   1.763 +  void cmpxchgq(Register reg, Address adr);
   1.764 +
   1.765 +  // Ordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
   1.766 +  void comisd(XMMRegister dst, Address src);
   1.767 +
   1.768 +  // Ordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS
   1.769 +  void comiss(XMMRegister dst, Address src);
   1.770 +
   1.771 +  // Identify processor type and features
   1.772 +  void cpuid() {
   1.773 +    emit_byte(0x0F);
   1.774 +    emit_byte(0xA2);
   1.775 +  }
   1.776 +
   1.777 +  // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
   1.778 +  void cvtsd2ss(XMMRegister dst, XMMRegister src);
   1.779 +
   1.780 +  // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value
   1.781 +  void cvtsi2sdl(XMMRegister dst, Register src);
   1.782 +  void cvtsi2sdq(XMMRegister dst, Register src);
   1.783 +
   1.784 +  // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value
   1.785 +  void cvtsi2ssl(XMMRegister dst, Register src);
   1.786 +  void cvtsi2ssq(XMMRegister dst, Register src);
   1.787 +
   1.788 +  // Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value
   1.789 +  void cvtdq2pd(XMMRegister dst, XMMRegister src);
   1.790 +
   1.791 +  // Convert Packed Signed Doubleword Integers to Packed Single-Precision Floating-Point Value
   1.792 +  void cvtdq2ps(XMMRegister dst, XMMRegister src);
   1.793 +
   1.794 +  // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
   1.795 +  void cvtss2sd(XMMRegister dst, XMMRegister src);
   1.796 +
   1.797 +  // Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer
   1.798 +  void cvttsd2sil(Register dst, Address src);
   1.799 +  void cvttsd2sil(Register dst, XMMRegister src);
   1.800 +  void cvttsd2siq(Register dst, XMMRegister src);
   1.801 +
   1.802 +  // Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer
   1.803 +  void cvttss2sil(Register dst, XMMRegister src);
   1.804 +  void cvttss2siq(Register dst, XMMRegister src);
   1.805 +
   1.806 +  // Divide Scalar Double-Precision Floating-Point Values
   1.807 +  void divsd(XMMRegister dst, Address src);
   1.808 +  void divsd(XMMRegister dst, XMMRegister src);
   1.809 +
   1.810 +  // Divide Scalar Single-Precision Floating-Point Values
   1.811 +  void divss(XMMRegister dst, Address src);
   1.812 +  void divss(XMMRegister dst, XMMRegister src);
   1.813 +
   1.814 +  void emms();
   1.815 +
   1.816 +  void fabs();
   1.817 +
   1.818 +  void fadd(int i);
   1.819 +
   1.820 +  void fadd_d(Address src);
   1.821 +  void fadd_s(Address src);
   1.822 +
   1.823 +  // "Alternate" versions of x87 instructions place result down in FPU
   1.824 +  // stack instead of on TOS
   1.825 +
   1.826 +  void fadda(int i); // "alternate" fadd
   1.827 +  void faddp(int i = 1);
   1.828 +
   1.829 +  void fchs();
   1.830 +
   1.831 +  void fcom(int i);
   1.832 +
   1.833 +  void fcomp(int i = 1);
   1.834 +  void fcomp_d(Address src);
   1.835 +  void fcomp_s(Address src);
   1.836 +
   1.837 +  void fcompp();
   1.838 +
   1.839 +  void fcos();
   1.840 +
   1.841 +  void fdecstp();
   1.842 +
   1.843 +  void fdiv(int i);
   1.844 +  void fdiv_d(Address src);
   1.845 +  void fdivr_s(Address src);
   1.846 +  void fdiva(int i);  // "alternate" fdiv
   1.847 +  void fdivp(int i = 1);
   1.848 +
   1.849 +  void fdivr(int i);
   1.850 +  void fdivr_d(Address src);
   1.851 +  void fdiv_s(Address src);
   1.852 +
   1.853 +  void fdivra(int i); // "alternate" reversed fdiv
   1.854 +
   1.855 +  void fdivrp(int i = 1);
   1.856 +
   1.857 +  void ffree(int i = 0);
   1.858 +
   1.859 +  void fild_d(Address adr);
   1.860 +  void fild_s(Address adr);
   1.861 +
   1.862 +  void fincstp();
   1.863 +
   1.864 +  void finit();
   1.865 +
   1.866 +  void fist_s (Address adr);
   1.867 +  void fistp_d(Address adr);
   1.868 +  void fistp_s(Address adr);
   1.869 +
   1.870 +  void fld1();
   1.871 +
   1.872 +  void fld_d(Address adr);
   1.873 +  void fld_s(Address adr);
   1.874 +  void fld_s(int index);
   1.875 +  void fld_x(Address adr);  // extended-precision (80-bit) format
   1.876 +
   1.877 +  void fldcw(Address src);
   1.878 +
   1.879 +  void fldenv(Address src);
   1.880 +
   1.881 +  void fldlg2();
   1.882 +
   1.883 +  void fldln2();
   1.884 +
   1.885 +  void fldz();
   1.886 +
   1.887 +  void flog();
   1.888 +  void flog10();
   1.889 +
   1.890 +  void fmul(int i);
   1.891 +
   1.892 +  void fmul_d(Address src);
   1.893 +  void fmul_s(Address src);
   1.894 +
   1.895 +  void fmula(int i);  // "alternate" fmul
   1.896 +
   1.897 +  void fmulp(int i = 1);
   1.898 +
   1.899 +  void fnsave(Address dst);
   1.900 +
   1.901 +  void fnstcw(Address src);
   1.902 +
   1.903 +  void fnstsw_ax();
   1.904 +
   1.905 +  void fprem();
   1.906 +  void fprem1();
   1.907 +
   1.908 +  void frstor(Address src);
   1.909 +
   1.910 +  void fsin();
   1.911 +
   1.912 +  void fsqrt();
   1.913 +
   1.914 +  void fst_d(Address adr);
   1.915 +  void fst_s(Address adr);
   1.916 +
   1.917 +  void fstp_d(Address adr);
   1.918 +  void fstp_d(int index);
   1.919 +  void fstp_s(Address adr);
   1.920 +  void fstp_x(Address adr); // extended-precision (80-bit) format
   1.921 +
   1.922 +  void fsub(int i);
   1.923 +  void fsub_d(Address src);
   1.924 +  void fsub_s(Address src);
   1.925 +
   1.926 +  void fsuba(int i);  // "alternate" fsub
   1.927 +
   1.928 +  void fsubp(int i = 1);
   1.929 +
   1.930 +  void fsubr(int i);
   1.931 +  void fsubr_d(Address src);
   1.932 +  void fsubr_s(Address src);
   1.933 +
   1.934 +  void fsubra(int i); // "alternate" reversed fsub
   1.935 +
   1.936 +  void fsubrp(int i = 1);
   1.937 +
   1.938 +  void ftan();
   1.939 +
   1.940 +  void ftst();
   1.941 +
   1.942 +  void fucomi(int i = 1);
   1.943 +  void fucomip(int i = 1);
   1.944 +
   1.945 +  void fwait();
   1.946 +
   1.947 +  void fxch(int i = 1);
   1.948 +
   1.949 +  void fxrstor(Address src);
   1.950 +
   1.951 +  void fxsave(Address dst);
   1.952 +
   1.953 +  void fyl2x();
   1.954 +
   1.955 +  void hlt();
   1.956 +
   1.957 +  void idivl(Register src);
   1.958 +
   1.959 +  void idivq(Register src);
   1.960 +
   1.961 +  void imull(Register dst, Register src);
   1.962 +  void imull(Register dst, Register src, int value);
   1.963 +
   1.964 +  void imulq(Register dst, Register src);
   1.965 +  void imulq(Register dst, Register src, int value);
   1.966 +
   1.967 +
   1.968 +  // jcc is the generic conditional branch generator to run-
   1.969 +  // time routines, jcc is used for branches to labels. jcc
   1.970 +  // takes a branch opcode (cc) and a label (L) and generates
   1.971 +  // either a backward branch or a forward branch and links it
   1.972 +  // to the label fixup chain. Usage:
   1.973 +  //
   1.974 +  // Label L;      // unbound label
   1.975 +  // jcc(cc, L);   // forward branch to unbound label
   1.976 +  // bind(L);      // bind label to the current pc
   1.977 +  // jcc(cc, L);   // backward branch to bound label
   1.978 +  // bind(L);      // illegal: a label may be bound only once
   1.979 +  //
   1.980 +  // Note: The same Label can be used for forward and backward branches
   1.981 +  // but it may be bound only once.
   1.982 +
   1.983 +  void jcc(Condition cc, Label& L,
   1.984 +           relocInfo::relocType rtype = relocInfo::none);
   1.985 +
   1.986 +  // Conditional jump to a 8-bit offset to L.
   1.987 +  // WARNING: be very careful using this for forward jumps.  If the label is
   1.988 +  // not bound within an 8-bit offset of this instruction, a run-time error
   1.989 +  // will occur.
   1.990 +  void jccb(Condition cc, Label& L);
   1.991 +
   1.992 +  void jmp(Address entry);    // pc <- entry
   1.993 +
   1.994 +  // Label operations & relative jumps (PPUM Appendix D)
   1.995 +  void jmp(Label& L, relocInfo::relocType rtype = relocInfo::none);   // unconditional jump to L
   1.996 +
   1.997 +  void jmp(Register entry); // pc <- entry
   1.998 +
   1.999 +  // Unconditional 8-bit offset jump to L.
  1.1000 +  // WARNING: be very careful using this for forward jumps.  If the label is
  1.1001 +  // not bound within an 8-bit offset of this instruction, a run-time error
  1.1002 +  // will occur.
  1.1003 +  void jmpb(Label& L);
  1.1004 +
  1.1005 +  void ldmxcsr( Address src );
  1.1006 +
  1.1007 +  void leal(Register dst, Address src);
  1.1008 +
  1.1009 +  void leaq(Register dst, Address src);
  1.1010 +
  1.1011 +  void lfence() {
  1.1012 +    emit_byte(0x0F);
  1.1013 +    emit_byte(0xAE);
  1.1014 +    emit_byte(0xE8);
  1.1015 +  }
  1.1016 +
  1.1017 +  void lock();
  1.1018 +
  1.1019 +  enum Membar_mask_bits {
  1.1020 +    StoreStore = 1 << 3,
  1.1021 +    LoadStore  = 1 << 2,
  1.1022 +    StoreLoad  = 1 << 1,
  1.1023 +    LoadLoad   = 1 << 0
  1.1024 +  };
  1.1025 +
  1.1026 +  // Serializes memory.
  1.1027 +  void membar(Membar_mask_bits order_constraint) {
  1.1028 +    // We only have to handle StoreLoad and LoadLoad
  1.1029 +    if (order_constraint & StoreLoad) {
  1.1030 +      // MFENCE subsumes LFENCE
  1.1031 +      mfence();
  1.1032 +    } /* [jk] not needed currently: else if (order_constraint & LoadLoad) {
  1.1033 +         lfence();
  1.1034 +    } */
  1.1035 +  }
  1.1036 +
  1.1037 +  void mfence();
  1.1038 +
  1.1039 +  // Moves
  1.1040 +
  1.1041 +  void mov64(Register dst, int64_t imm64);
  1.1042 +
  1.1043 +  void movb(Address dst, Register src);
  1.1044 +  void movb(Address dst, int imm8);
  1.1045 +  void movb(Register dst, Address src);
  1.1046 +
  1.1047 +  void movdl(XMMRegister dst, Register src);
  1.1048 +  void movdl(Register dst, XMMRegister src);
  1.1049 +
  1.1050 +  // Move Double Quadword
  1.1051 +  void movdq(XMMRegister dst, Register src);
  1.1052 +  void movdq(Register dst, XMMRegister src);
  1.1053 +
  1.1054 +  // Move Aligned Double Quadword
  1.1055 +  void movdqa(Address     dst, XMMRegister src);
  1.1056 +  void movdqa(XMMRegister dst, Address src);
  1.1057 +  void movdqa(XMMRegister dst, XMMRegister src);
  1.1058 +
  1.1059 +  void movl(Register dst, int32_t imm32);
  1.1060 +  void movl(Address dst, int32_t imm32);
  1.1061 +  void movl(Register dst, Register src);
  1.1062 +  void movl(Register dst, Address src);
  1.1063 +  void movl(Address dst, Register src);
  1.1064 +
  1.1065 +  // These dummies prevent using movl from converting a zero (like NULL) into Register
  1.1066 +  // by giving the compiler two choices it can't resolve
  1.1067 +
  1.1068 +  void movl(Address  dst, void* junk);
  1.1069 +  void movl(Register dst, void* junk);
  1.1070 +
  1.1071 +#ifdef _LP64
  1.1072 +  void movq(Register dst, Register src);
  1.1073 +  void movq(Register dst, Address src);
  1.1074 +  void movq(Address dst, Register src);
  1.1075 +#endif
  1.1076 +
  1.1077 +  void movq(Address     dst, MMXRegister src );
  1.1078 +  void movq(MMXRegister dst, Address src );
  1.1079 +
  1.1080 +#ifdef _LP64
  1.1081 +  // These dummies prevent using movq from converting a zero (like NULL) into Register
  1.1082 +  // by giving the compiler two choices it can't resolve
  1.1083 +
  1.1084 +  void movq(Address  dst, void* dummy);
  1.1085 +  void movq(Register dst, void* dummy);
  1.1086 +#endif
  1.1087 +
  1.1088 +  // Move Quadword
  1.1089 +  void movq(Address     dst, XMMRegister src);
  1.1090 +  void movq(XMMRegister dst, Address src);
  1.1091 +
  1.1092 +  void movsbl(Register dst, Address src);
  1.1093 +  void movsbl(Register dst, Register src);
  1.1094 +
  1.1095 +#ifdef _LP64
  1.1096 +  // Move signed 32bit immediate to 64bit extending sign
  1.1097 +  void movslq(Address dst, int32_t imm64);
  1.1098 +  void movslq(Register dst, int32_t imm64);
  1.1099 +
  1.1100 +  void movslq(Register dst, Address src);
  1.1101 +  void movslq(Register dst, Register src);
  1.1102 +  void movslq(Register dst, void* src); // Dummy declaration to cause NULL to be ambiguous
  1.1103 +#endif
  1.1104 +
  1.1105 +  void movswl(Register dst, Address src);
  1.1106 +  void movswl(Register dst, Register src);
  1.1107 +
  1.1108 +  void movw(Address dst, int imm16);
  1.1109 +  void movw(Register dst, Address src);
  1.1110 +  void movw(Address dst, Register src);
  1.1111 +
  1.1112 +  void movzbl(Register dst, Address src);
  1.1113 +  void movzbl(Register dst, Register src);
  1.1114 +
  1.1115 +  void movzwl(Register dst, Address src);
  1.1116 +  void movzwl(Register dst, Register src);
  1.1117 +
  1.1118 +  void mull(Address src);
  1.1119 +  void mull(Register src);
  1.1120 +
  1.1121 +  // Multiply Scalar Double-Precision Floating-Point Values
  1.1122 +  void mulsd(XMMRegister dst, Address src);
  1.1123 +  void mulsd(XMMRegister dst, XMMRegister src);
  1.1124 +
  1.1125 +  // Multiply Scalar Single-Precision Floating-Point Values
  1.1126 +  void mulss(XMMRegister dst, Address src);
  1.1127 +  void mulss(XMMRegister dst, XMMRegister src);
  1.1128 +
  1.1129 +  void negl(Register dst);
  1.1130 +
  1.1131 +#ifdef _LP64
  1.1132 +  void negq(Register dst);
  1.1133 +#endif
  1.1134 +
  1.1135 +  void nop(int i = 1);
  1.1136 +
  1.1137 +  void notl(Register dst);
  1.1138 +
  1.1139 +#ifdef _LP64
  1.1140 +  void notq(Register dst);
  1.1141 +#endif
  1.1142 +
  1.1143 +  void orl(Address dst, int32_t imm32);
  1.1144 +  void orl(Register dst, int32_t imm32);
  1.1145 +  void orl(Register dst, Address src);
  1.1146 +  void orl(Register dst, Register src);
  1.1147 +
  1.1148 +  void orq(Address dst, int32_t imm32);
  1.1149 +  void orq(Register dst, int32_t imm32);
  1.1150 +  void orq(Register dst, Address src);
  1.1151 +  void orq(Register dst, Register src);
  1.1152 +
  1.1153 +  void popl(Address dst);
  1.1154 +
  1.1155 +#ifdef _LP64
  1.1156 +  void popq(Address dst);
  1.1157 +#endif
  1.1158 +
  1.1159 +  // Prefetches (SSE, SSE2, 3DNOW only)
  1.1160 +
  1.1161 +  void prefetchnta(Address src);
  1.1162 +  void prefetchr(Address src);
  1.1163 +  void prefetcht0(Address src);
  1.1164 +  void prefetcht1(Address src);
  1.1165 +  void prefetcht2(Address src);
  1.1166 +  void prefetchw(Address src);
  1.1167 +
  1.1168 +  // Shuffle Packed Doublewords
  1.1169 +  void pshufd(XMMRegister dst, XMMRegister src, int mode);
  1.1170 +  void pshufd(XMMRegister dst, Address src,     int mode);
  1.1171 +
  1.1172 +  // Shuffle Packed Low Words
  1.1173 +  void pshuflw(XMMRegister dst, XMMRegister src, int mode);
  1.1174 +  void pshuflw(XMMRegister dst, Address src,     int mode);
  1.1175 +
  1.1176 +  // Shift Right Logical Quadword Immediate
  1.1177 +  void psrlq(XMMRegister dst, int shift);
  1.1178 +
  1.1179 +  // Interleave Low Bytes
  1.1180 +  void punpcklbw(XMMRegister dst, XMMRegister src);
  1.1181 +
  1.1182 +  void pushl(Address src);
  1.1183 +
  1.1184 +  void pushq(Address src);
  1.1185 +
  1.1186 +  // Xor Packed Byte Integer Values
  1.1187 +  void pxor(XMMRegister dst, Address src);
  1.1188 +  void pxor(XMMRegister dst, XMMRegister src);
  1.1189 +
  1.1190 +  void rcll(Register dst, int imm8);
  1.1191 +
  1.1192 +  void rclq(Register dst, int imm8);
  1.1193 +
  1.1194 +  void ret(int imm16);
  1.1195 +
  1.1196 +  void sahf();
  1.1197 +
  1.1198 +  void sarl(Register dst, int imm8);
  1.1199 +  void sarl(Register dst);
  1.1200 +
  1.1201 +  void sarq(Register dst, int imm8);
  1.1202 +  void sarq(Register dst);
  1.1203 +
  1.1204 +  void sbbl(Address dst, int32_t imm32);
  1.1205 +  void sbbl(Register dst, int32_t imm32);
  1.1206 +  void sbbl(Register dst, Address src);
  1.1207 +  void sbbl(Register dst, Register src);
  1.1208 +
  1.1209 +  void sbbq(Address dst, int32_t imm32);
  1.1210 +  void sbbq(Register dst, int32_t imm32);
  1.1211 +  void sbbq(Register dst, Address src);
  1.1212 +  void sbbq(Register dst, Register src);
  1.1213 +
  1.1214 +  void setb(Condition cc, Register dst);
  1.1215 +
  1.1216 +  void shldl(Register dst, Register src);
  1.1217 +
  1.1218 +  void shll(Register dst, int imm8);
  1.1219 +  void shll(Register dst);
  1.1220 +
  1.1221 +  void shlq(Register dst, int imm8);
  1.1222 +  void shlq(Register dst);
  1.1223 +
  1.1224 +  void shrdl(Register dst, Register src);
  1.1225 +
  1.1226 +  void shrl(Register dst, int imm8);
  1.1227 +  void shrl(Register dst);
  1.1228 +
  1.1229 +  void shrq(Register dst, int imm8);
  1.1230 +  void shrq(Register dst);
  1.1231 +
  1.1232 +  void smovl(); // QQQ generic?
  1.1233 +
  1.1234 +  // Compute Square Root of Scalar Double-Precision Floating-Point Value
  1.1235 +  void sqrtsd(XMMRegister dst, Address src);
  1.1236 +  void sqrtsd(XMMRegister dst, XMMRegister src);
  1.1237 +
  1.1238 +  void std() { emit_byte(0xfd); }
  1.1239 +
  1.1240 +  void stmxcsr( Address dst );
  1.1241 +
  1.1242 +  void subl(Address dst, int32_t imm32);
  1.1243 +  void subl(Address dst, Register src);
  1.1244 +  void subl(Register dst, int32_t imm32);
  1.1245 +  void subl(Register dst, Address src);
  1.1246 +  void subl(Register dst, Register src);
  1.1247 +
  1.1248 +  void subq(Address dst, int32_t imm32);
  1.1249 +  void subq(Address dst, Register src);
  1.1250 +  void subq(Register dst, int32_t imm32);
  1.1251 +  void subq(Register dst, Address src);
  1.1252 +  void subq(Register dst, Register src);
  1.1253 +
  1.1254 +
  1.1255 +  // Subtract Scalar Double-Precision Floating-Point Values
  1.1256 +  void subsd(XMMRegister dst, Address src);
  1.1257 +  void subsd(XMMRegister dst, XMMRegister src);
  1.1258 +
  1.1259 +  // Subtract Scalar Single-Precision Floating-Point Values
  1.1260 +  void subss(XMMRegister dst, Address src);
  1.1261 +  void subss(XMMRegister dst, XMMRegister src);
  1.1262 +
  1.1263 +  void testb(Register dst, int imm8);
  1.1264 +
  1.1265 +  void testl(Register dst, int32_t imm32);
  1.1266 +  void testl(Register dst, Register src);
  1.1267 +  void testl(Register dst, Address src);
  1.1268 +
  1.1269 +  void testq(Register dst, int32_t imm32);
  1.1270 +  void testq(Register dst, Register src);
  1.1271 +
  1.1272 +
  1.1273 +  // Unordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
  1.1274 +  void ucomisd(XMMRegister dst, Address src);
  1.1275 +  void ucomisd(XMMRegister dst, XMMRegister src);
  1.1276 +
  1.1277 +  // Unordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS
  1.1278 +  void ucomiss(XMMRegister dst, Address src);
  1.1279 +  void ucomiss(XMMRegister dst, XMMRegister src);
  1.1280 +
  1.1281 +  void xaddl(Address dst, Register src);
  1.1282 +
  1.1283 +  void xaddq(Address dst, Register src);
  1.1284 +
  1.1285 +  void xchgl(Register reg, Address adr);
  1.1286 +  void xchgl(Register dst, Register src);
  1.1287 +
  1.1288 +  void xchgq(Register reg, Address adr);
  1.1289 +  void xchgq(Register dst, Register src);
  1.1290 +
  1.1291 +  void xorl(Register dst, int32_t imm32);
  1.1292 +  void xorl(Register dst, Address src);
  1.1293 +  void xorl(Register dst, Register src);
  1.1294 +
  1.1295 +  void xorq(Register dst, Address src);
  1.1296 +  void xorq(Register dst, Register src);
  1.1297 +
  1.1298 +  // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
  1.1299 +  void xorpd(XMMRegister dst, Address src);
  1.1300 +  void xorpd(XMMRegister dst, XMMRegister src);
  1.1301 +
  1.1302 +  // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
  1.1303 +  void xorps(XMMRegister dst, Address src);
  1.1304 +  void xorps(XMMRegister dst, XMMRegister src);
  1.1305 +
  1.1306 +  void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
  1.1307 +};
  1.1308 +
  1.1309 +
  1.1310 +// MacroAssembler extends Assembler by frequently used macros.
  1.1311 +//
  1.1312 +// Instructions for which a 'better' code sequence exists depending
  1.1313 +// on arguments should also go in here.
  1.1314 +
  1.1315 +class MacroAssembler: public Assembler {
  1.1316 + friend class LIR_Assembler;
  1.1317 + protected:
  1.1318 +
  1.1319 +  Address as_Address(AddressLiteral adr);
  1.1320 +  Address as_Address(ArrayAddress adr);
  1.1321 +
  1.1322 +  // Support for VM calls
  1.1323 +  //
  1.1324 +  // This is the base routine called by the different versions of call_VM_leaf. The interpreter
  1.1325 +  // may customize this version by overriding it for its purposes (e.g., to save/restore
  1.1326 +  // additional registers when doing a VM call).
  1.1327 +#ifdef CC_INTERP
  1.1328 +  // c++ interpreter never wants to use interp_masm version of call_VM
  1.1329 +  #define VIRTUAL
  1.1330 +#else
  1.1331 +  #define VIRTUAL virtual
  1.1332 +#endif
  1.1333 +
  1.1334 +  VIRTUAL void call_VM_leaf_base(
  1.1335 +    address entry_point,               // the entry point
  1.1336 +    int     number_of_arguments        // the number of arguments to pop after the call
  1.1337 +  );
  1.1338 +
  1.1339 +  // This is the base routine called by the different versions of call_VM. The interpreter
  1.1340 +  // may customize this version by overriding it for its purposes (e.g., to save/restore
  1.1341 +  // additional registers when doing a VM call).
  1.1342 +  //
  1.1343 +  // If no java_thread register is specified (noreg) than rdi will be used instead. call_VM_base
  1.1344 +  // returns the register which contains the thread upon return. If a thread register has been
  1.1345 +  // specified, the return value will correspond to that register. If no last_java_sp is specified
  1.1346 +  // (noreg) than rsp will be used instead.
  1.1347 +  VIRTUAL void call_VM_base(           // returns the register containing the thread upon return
  1.1348 +    Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
  1.1349 +    Register java_thread,              // the thread if computed before     ; use noreg otherwise
  1.1350 +    Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
  1.1351 +    address  entry_point,              // the entry point
  1.1352 +    int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
  1.1353 +    bool     check_exceptions          // whether to check for pending exceptions after return
  1.1354 +  );
  1.1355 +
  1.1356 +  // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code.
  1.1357 +  // The implementation is only non-empty for the InterpreterMacroAssembler,
  1.1358 +  // as only the interpreter handles PopFrame and ForceEarlyReturn requests.
  1.1359 +  virtual void check_and_handle_popframe(Register java_thread);
  1.1360 +  virtual void check_and_handle_earlyret(Register java_thread);
  1.1361 +
  1.1362 +  void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true);
  1.1363 +
  1.1364 +  // helpers for FPU flag access
  1.1365 +  // tmp is a temporary register, if none is available use noreg
  1.1366 +  void save_rax   (Register tmp);
  1.1367 +  void restore_rax(Register tmp);
  1.1368 +
  1.1369 + public:
  1.1370 +  MacroAssembler(CodeBuffer* code) : Assembler(code) {}
  1.1371 +
  1.1372 +  // Support for NULL-checks
  1.1373 +  //
  1.1374 +  // Generates code that causes a NULL OS exception if the content of reg is NULL.
  1.1375 +  // If the accessed location is M[reg + offset] and the offset is known, provide the
  1.1376 +  // offset. No explicit code generation is needed if the offset is within a certain
  1.1377 +  // range (0 <= offset <= page_size).
  1.1378 +
  1.1379 +  void null_check(Register reg, int offset = -1);
  1.1380 +  static bool needs_explicit_null_check(intptr_t offset);
  1.1381 +
  1.1382 +  // Required platform-specific helpers for Label::patch_instructions.
  1.1383 +  // They _shadow_ the declarations in AbstractAssembler, which are undefined.
  1.1384 +  void pd_patch_instruction(address branch, address target);
  1.1385 +#ifndef PRODUCT
  1.1386 +  static void pd_print_patched_instruction(address branch);
  1.1387 +#endif
  1.1388 +
  1.1389 +  // The following 4 methods return the offset of the appropriate move instruction
  1.1390 +
  1.1391 +  // Support for fast byte/word loading with zero extension (depending on particular CPU)
  1.1392 +  int load_unsigned_byte(Register dst, Address src);
  1.1393 +  int load_unsigned_word(Register dst, Address src);
  1.1394 +
  1.1395 +  // Support for fast byte/word loading with sign extension (depending on particular CPU)
  1.1396 +  int load_signed_byte(Register dst, Address src);
  1.1397 +  int load_signed_word(Register dst, Address src);
  1.1398 +
  1.1399 +  // Support for sign-extension (hi:lo = extend_sign(lo))
  1.1400 +  void extend_sign(Register hi, Register lo);
  1.1401 +
  1.1402 +  // Support for inc/dec with optimal instruction selection depending on value
  1.1403 +
  1.1404 +  void increment(Register reg, int value = 1) { LP64_ONLY(incrementq(reg, value)) NOT_LP64(incrementl(reg, value)) ; }
  1.1405 +  void decrement(Register reg, int value = 1) { LP64_ONLY(decrementq(reg, value)) NOT_LP64(decrementl(reg, value)) ; }
  1.1406 +
  1.1407 +  void decrementl(Address dst, int value = 1);
  1.1408 +  void decrementl(Register reg, int value = 1);
  1.1409 +
  1.1410 +  void decrementq(Register reg, int value = 1);
  1.1411 +  void decrementq(Address dst, int value = 1);
  1.1412 +
  1.1413 +  void incrementl(Address dst, int value = 1);
  1.1414 +  void incrementl(Register reg, int value = 1);
  1.1415 +
  1.1416 +  void incrementq(Register reg, int value = 1);
  1.1417 +  void incrementq(Address dst, int value = 1);
  1.1418 +
  1.1419 +
  1.1420 +  // Support optimal SSE move instructions.
  1.1421 +  void movflt(XMMRegister dst, XMMRegister src) {
  1.1422 +    if (UseXmmRegToRegMoveAll) { movaps(dst, src); return; }
  1.1423 +    else                       { movss (dst, src); return; }
  1.1424 +  }
  1.1425 +  void movflt(XMMRegister dst, Address src) { movss(dst, src); }
  1.1426 +  void movflt(XMMRegister dst, AddressLiteral src);
  1.1427 +  void movflt(Address dst, XMMRegister src) { movss(dst, src); }
  1.1428 +
  1.1429 +  void movdbl(XMMRegister dst, XMMRegister src) {
  1.1430 +    if (UseXmmRegToRegMoveAll) { movapd(dst, src); return; }
  1.1431 +    else                       { movsd (dst, src); return; }
  1.1432 +  }
  1.1433 +
  1.1434 +  void movdbl(XMMRegister dst, AddressLiteral src);
  1.1435 +
  1.1436 +  void movdbl(XMMRegister dst, Address src) {
  1.1437 +    if (UseXmmLoadAndClearUpper) { movsd (dst, src); return; }
  1.1438 +    else                         { movlpd(dst, src); return; }
  1.1439 +  }
  1.1440 +  void movdbl(Address dst, XMMRegister src) { movsd(dst, src); }
  1.1441 +
  1.1442 +  void incrementl(AddressLiteral dst);
  1.1443 +  void incrementl(ArrayAddress dst);
  1.1444 +
  1.1445 +  // Alignment
  1.1446 +  void align(int modulus);
  1.1447 +
  1.1448 +  // Misc
  1.1449 +  void fat_nop(); // 5 byte nop
  1.1450 +
  1.1451 +  // Stack frame creation/removal
  1.1452 +  void enter();
  1.1453 +  void leave();
  1.1454 +
  1.1455 +  // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
  1.1456 +  // The pointer will be loaded into the thread register.
  1.1457 +  void get_thread(Register thread);
  1.1458 +
  1.1459 +  // Support for VM calls
  1.1460 +  //
  1.1461 +  // It is imperative that all calls into the VM are handled via the call_VM macros.
  1.1462 +  // They make sure that the stack linkage is setup correctly. call_VM's correspond
  1.1463 +  // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
  1.1464 +
  1.1465 +
  1.1466 +  void call_VM(Register oop_result,
  1.1467 +               address entry_point,
  1.1468 +               bool check_exceptions = true);
  1.1469 +  void call_VM(Register oop_result,
  1.1470 +               address entry_point,
  1.1471 +               Register arg_1,
  1.1472 +               bool check_exceptions = true);
  1.1473 +  void call_VM(Register oop_result,
  1.1474 +               address entry_point,
  1.1475 +               Register arg_1, Register arg_2,
  1.1476 +               bool check_exceptions = true);
  1.1477 +  void call_VM(Register oop_result,
  1.1478 +               address entry_point,
  1.1479 +               Register arg_1, Register arg_2, Register arg_3,
  1.1480 +               bool check_exceptions = true);
  1.1481 +
  1.1482 +  // Overloadings with last_Java_sp
  1.1483 +  void call_VM(Register oop_result,
  1.1484 +               Register last_java_sp,
  1.1485 +               address entry_point,
  1.1486 +               int number_of_arguments = 0,
  1.1487 +               bool check_exceptions = true);
  1.1488 +  void call_VM(Register oop_result,
  1.1489 +               Register last_java_sp,
  1.1490 +               address entry_point,
  1.1491 +               Register arg_1, bool
  1.1492 +               check_exceptions = true);
  1.1493 +  void call_VM(Register oop_result,
  1.1494 +               Register last_java_sp,
  1.1495 +               address entry_point,
  1.1496 +               Register arg_1, Register arg_2,
  1.1497 +               bool check_exceptions = true);
  1.1498 +  void call_VM(Register oop_result,
  1.1499 +               Register last_java_sp,
  1.1500 +               address entry_point,
  1.1501 +               Register arg_1, Register arg_2, Register arg_3,
  1.1502 +               bool check_exceptions = true);
  1.1503 +
  1.1504 +  void call_VM_leaf(address entry_point,
  1.1505 +                    int number_of_arguments = 0);
  1.1506 +  void call_VM_leaf(address entry_point,
  1.1507 +                    Register arg_1);
  1.1508 +  void call_VM_leaf(address entry_point,
  1.1509 +                    Register arg_1, Register arg_2);
  1.1510 +  void call_VM_leaf(address entry_point,
  1.1511 +                    Register arg_1, Register arg_2, Register arg_3);
  1.1512 +
  1.1513 +  // last Java Frame (fills frame anchor)
  1.1514 +  void set_last_Java_frame(Register thread,
  1.1515 +                           Register last_java_sp,
  1.1516 +                           Register last_java_fp,
  1.1517 +                           address last_java_pc);
  1.1518 +
  1.1519 +  // thread in the default location (r15_thread on 64bit)
  1.1520 +  void set_last_Java_frame(Register last_java_sp,
  1.1521 +                           Register last_java_fp,
  1.1522 +                           address last_java_pc);
  1.1523 +
  1.1524 +  void reset_last_Java_frame(Register thread, bool clear_fp, bool clear_pc);
  1.1525 +
  1.1526 +  // thread in the default location (r15_thread on 64bit)
  1.1527 +  void reset_last_Java_frame(bool clear_fp, bool clear_pc);
  1.1528 +
  1.1529 +  // Stores
  1.1530 +  void store_check(Register obj);                // store check for obj - register is destroyed afterwards
  1.1531 +  void store_check(Register obj, Address dst);   // same as above, dst is exact store location (reg. is destroyed)
  1.1532 +
  1.1533 +  // split store_check(Register obj) to enhance instruction interleaving
  1.1534 +  void store_check_part_1(Register obj);
  1.1535 +  void store_check_part_2(Register obj);
  1.1536 +
  1.1537 +  // C 'boolean' to Java boolean: x == 0 ? 0 : 1
  1.1538 +  void c2bool(Register x);
  1.1539 +
  1.1540 +  // C++ bool manipulation
  1.1541 +
  1.1542 +  void movbool(Register dst, Address src);
  1.1543 +  void movbool(Address dst, bool boolconst);
  1.1544 +  void movbool(Address dst, Register src);
  1.1545 +  void testbool(Register dst);
  1.1546 +
  1.1547 +  // oop manipulations
  1.1548 +  void load_klass(Register dst, Register src);
  1.1549 +  void store_klass(Register dst, Register src);
  1.1550 +
  1.1551 +  void load_prototype_header(Register dst, Register src);
  1.1552 +
  1.1553 +#ifdef _LP64
  1.1554 +  void store_klass_gap(Register dst, Register src);
  1.1555 +
  1.1556 +  void load_heap_oop(Register dst, Address src);
  1.1557 +  void store_heap_oop(Address dst, Register src);
  1.1558 +  void encode_heap_oop(Register r);
  1.1559 +  void decode_heap_oop(Register r);
  1.1560 +  void encode_heap_oop_not_null(Register r);
  1.1561 +  void decode_heap_oop_not_null(Register r);
  1.1562 +  void encode_heap_oop_not_null(Register dst, Register src);
  1.1563 +  void decode_heap_oop_not_null(Register dst, Register src);
  1.1564 +
  1.1565 +  void set_narrow_oop(Register dst, jobject obj);
  1.1566 +
  1.1567 +  // if heap base register is used - reinit it with the correct value
  1.1568 +  void reinit_heapbase();
  1.1569 +#endif // _LP64
  1.1570 +
  1.1571 +  // Int division/remainder for Java
  1.1572 +  // (as idivl, but checks for special case as described in JVM spec.)
  1.1573 +  // returns idivl instruction offset for implicit exception handling
  1.1574 +  int corrected_idivl(Register reg);
  1.1575 +
  1.1576 +  // Long division/remainder for Java
  1.1577 +  // (as idivq, but checks for special case as described in JVM spec.)
  1.1578 +  // returns idivq instruction offset for implicit exception handling
  1.1579 +  int corrected_idivq(Register reg);
  1.1580 +
  1.1581 +  void int3();
  1.1582 +
  1.1583 +  // Long operation macros for a 32bit cpu
  1.1584 +  // Long negation for Java
  1.1585 +  void lneg(Register hi, Register lo);
  1.1586 +
  1.1587 +  // Long multiplication for Java
  1.1588 +  // (destroys contents of eax, ebx, ecx and edx)
  1.1589 +  void lmul(int x_rsp_offset, int y_rsp_offset); // rdx:rax = x * y
  1.1590 +
  1.1591 +  // Long shifts for Java
  1.1592 +  // (semantics as described in JVM spec.)
  1.1593 +  void lshl(Register hi, Register lo);                               // hi:lo << (rcx & 0x3f)
  1.1594 +  void lshr(Register hi, Register lo, bool sign_extension = false);  // hi:lo >> (rcx & 0x3f)
  1.1595 +
  1.1596 +  // Long compare for Java
  1.1597 +  // (semantics as described in JVM spec.)
  1.1598 +  void lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo); // x_hi = lcmp(x, y)
  1.1599 +
  1.1600 +
  1.1601 +  // misc
  1.1602 +
  1.1603 +  // Sign extension
  1.1604 +  void sign_extend_short(Register reg);
  1.1605 +  void sign_extend_byte(Register reg);
  1.1606 +
  1.1607 +  // Division by power of 2, rounding towards 0
  1.1608 +  void division_with_shift(Register reg, int shift_value);
  1.1609 +
  1.1610 +  // Compares the top-most stack entries on the FPU stack and sets the eflags as follows:
  1.1611 +  //
  1.1612 +  // CF (corresponds to C0) if x < y
  1.1613 +  // PF (corresponds to C2) if unordered
  1.1614 +  // ZF (corresponds to C3) if x = y
  1.1615 +  //
  1.1616 +  // The arguments are in reversed order on the stack (i.e., top of stack is first argument).
  1.1617 +  // tmp is a temporary register, if none is available use noreg (only matters for non-P6 code)
  1.1618 +  void fcmp(Register tmp);
  1.1619 +  // Variant of the above which allows y to be further down the stack
  1.1620 +  // and which only pops x and y if specified. If pop_right is
  1.1621 +  // specified then pop_left must also be specified.
  1.1622 +  void fcmp(Register tmp, int index, bool pop_left, bool pop_right);
  1.1623 +
  1.1624 +  // Floating-point comparison for Java
  1.1625 +  // Compares the top-most stack entries on the FPU stack and stores the result in dst.
  1.1626 +  // The arguments are in reversed order on the stack (i.e., top of stack is first argument).
  1.1627 +  // (semantics as described in JVM spec.)
  1.1628 +  void fcmp2int(Register dst, bool unordered_is_less);
  1.1629 +  // Variant of the above which allows y to be further down the stack
  1.1630 +  // and which only pops x and y if specified. If pop_right is
  1.1631 +  // specified then pop_left must also be specified.
  1.1632 +  void fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right);
  1.1633 +
  1.1634 +  // Floating-point remainder for Java (ST0 = ST0 fremr ST1, ST1 is empty afterwards)
  1.1635 +  // tmp is a temporary register, if none is available use noreg
  1.1636 +  void fremr(Register tmp);
  1.1637 +
  1.1638 +
  1.1639 +  // same as fcmp2int, but using SSE2
  1.1640 +  void cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less);
  1.1641 +  void cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less);
  1.1642 +
  1.1643 +  // Inlined sin/cos generator for Java; must not use CPU instruction
  1.1644 +  // directly on Intel as it does not have high enough precision
  1.1645 +  // outside of the range [-pi/4, pi/4]. Extra argument indicate the
  1.1646 +  // number of FPU stack slots in use; all but the topmost will
  1.1647 +  // require saving if a slow case is necessary. Assumes argument is
  1.1648 +  // on FP TOS; result is on FP TOS.  No cpu registers are changed by
  1.1649 +  // this code.
  1.1650 +  void trigfunc(char trig, int num_fpu_regs_in_use = 1);
  1.1651 +
  1.1652 +  // branch to L if FPU flag C2 is set/not set
  1.1653 +  // tmp is a temporary register, if none is available use noreg
  1.1654 +  void jC2 (Register tmp, Label& L);
  1.1655 +  void jnC2(Register tmp, Label& L);
  1.1656 +
  1.1657 +  // Pop ST (ffree & fincstp combined)
  1.1658 +  void fpop();
  1.1659 +
  1.1660 +  // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack
  1.1661 +  void push_fTOS();
  1.1662 +
  1.1663 +  // pops double TOS element from CPU stack and pushes on FPU stack
  1.1664 +  void pop_fTOS();
  1.1665 +
  1.1666 +  void empty_FPU_stack();
  1.1667 +
  1.1668 +  void push_IU_state();
  1.1669 +  void pop_IU_state();
  1.1670 +
  1.1671 +  void push_FPU_state();
  1.1672 +  void pop_FPU_state();
  1.1673 +
  1.1674 +  void push_CPU_state();
  1.1675 +  void pop_CPU_state();
  1.1676 +
  1.1677 +  // Round up to a power of two
  1.1678 +  void round_to(Register reg, int modulus);
  1.1679 +
  1.1680 +  // Callee saved registers handling
  1.1681 +  void push_callee_saved_registers();
  1.1682 +  void pop_callee_saved_registers();
  1.1683 +
  1.1684 +  // allocation
  1.1685 +  void eden_allocate(
  1.1686 +    Register obj,                      // result: pointer to object after successful allocation
  1.1687 +    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
  1.1688 +    int      con_size_in_bytes,        // object size in bytes if   known at compile time
  1.1689 +    Register t1,                       // temp register
  1.1690 +    Label&   slow_case                 // continuation point if fast allocation fails
  1.1691 +  );
  1.1692 +  void tlab_allocate(
  1.1693 +    Register obj,                      // result: pointer to object after successful allocation
  1.1694 +    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
  1.1695 +    int      con_size_in_bytes,        // object size in bytes if   known at compile time
  1.1696 +    Register t1,                       // temp register
  1.1697 +    Register t2,                       // temp register
  1.1698 +    Label&   slow_case                 // continuation point if fast allocation fails
  1.1699 +  );
  1.1700 +  void tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case);
  1.1701 +
  1.1702 +  //----
  1.1703 +  void set_word_if_not_zero(Register reg); // sets reg to 1 if not zero, otherwise 0
  1.1704 +
  1.1705 +  // Debugging
  1.1706 +
  1.1707 +  // only if +VerifyOops
  1.1708 +  void verify_oop(Register reg, const char* s = "broken oop");
  1.1709 +  void verify_oop_addr(Address addr, const char * s = "broken oop addr");
  1.1710 +
  1.1711 +  // only if +VerifyFPU
  1.1712 +  void verify_FPU(int stack_depth, const char* s = "illegal FPU state");
  1.1713 +
  1.1714 +  // prints msg, dumps registers and stops execution
  1.1715 +  void stop(const char* msg);
  1.1716 +
  1.1717 +  // prints msg and continues
  1.1718 +  void warn(const char* msg);
  1.1719 +
  1.1720 +  static void debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg);
  1.1721 +  static void debug64(char* msg, int64_t pc, int64_t regs[]);
  1.1722 +
  1.1723 +  void os_breakpoint();
  1.1724 +
  1.1725 +  void untested()                                { stop("untested"); }
  1.1726 +
  1.1727 +  void unimplemented(const char* what = "")      { char* b = new char[1024];  jio_snprintf(b, sizeof(b), "unimplemented: %s", what);  stop(b); }
  1.1728 +
  1.1729 +  void should_not_reach_here()                   { stop("should not reach here"); }
  1.1730 +
  1.1731 +  void print_CPU_state();
  1.1732 +
  1.1733 +  // Stack overflow checking
  1.1734 +  void bang_stack_with_offset(int offset) {
  1.1735 +    // stack grows down, caller passes positive offset
  1.1736 +    assert(offset > 0, "must bang with negative offset");
  1.1737 +    movl(Address(rsp, (-offset)), rax);
  1.1738 +  }
  1.1739 +
  1.1740 +  // Writes to stack successive pages until offset reached to check for
  1.1741 +  // stack overflow + shadow pages.  Also, clobbers tmp
  1.1742 +  void bang_stack_size(Register size, Register tmp);
  1.1743 +
  1.1744 +  // Support for serializing memory accesses between threads
  1.1745 +  void serialize_memory(Register thread, Register tmp);
  1.1746 +
  1.1747 +  void verify_tlab();
  1.1748 +
  1.1749 +  // Biased locking support
  1.1750 +  // lock_reg and obj_reg must be loaded up with the appropriate values.
  1.1751 +  // swap_reg must be rax, and is killed.
  1.1752 +  // tmp_reg is optional. If it is supplied (i.e., != noreg) it will
  1.1753 +  // be killed; if not supplied, push/pop will be used internally to
  1.1754 +  // allocate a temporary (inefficient, avoid if possible).
  1.1755 +  // Optional slow case is for implementations (interpreter and C1) which branch to
  1.1756 +  // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
  1.1757 +  // Returns offset of first potentially-faulting instruction for null
  1.1758 +  // check info (currently consumed only by C1). If
  1.1759 +  // swap_reg_contains_mark is true then returns -1 as it is assumed
  1.1760 +  // the calling code has already passed any potential faults.
  1.1761 +  int biased_locking_enter(Register lock_reg, Register obj_reg, Register swap_reg, Register tmp_reg,
  1.1762 +                           bool swap_reg_contains_mark,
  1.1763 +                           Label& done, Label* slow_case = NULL,
  1.1764 +                           BiasedLockingCounters* counters = NULL);
  1.1765 +  void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done);
  1.1766 +
  1.1767 +
  1.1768 +  Condition negate_condition(Condition cond);
  1.1769 +
  1.1770 +  // Instructions that use AddressLiteral operands. These instruction can handle 32bit/64bit
  1.1771 +  // operands. In general the names are modified to avoid hiding the instruction in Assembler
  1.1772 +  // so that we don't need to implement all the varieties in the Assembler with trivial wrappers
  1.1773 +  // here in MacroAssembler. The major exception to this rule is call
  1.1774 +
  1.1775 +  // Arithmetics
  1.1776 +
  1.1777 +
  1.1778 +  void addptr(Address dst, int32_t src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)) ; }
  1.1779 +  void addptr(Address dst, Register src);
  1.1780 +
  1.1781 +  void addptr(Register dst, Address src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); }
  1.1782 +  void addptr(Register dst, int32_t src);
  1.1783 +  void addptr(Register dst, Register src);
  1.1784 +
  1.1785 +  void andptr(Register dst, int32_t src);
  1.1786 +  void andptr(Register src1, Register src2) { LP64_ONLY(andq(src1, src2)) NOT_LP64(andl(src1, src2)) ; }
  1.1787 +
  1.1788 +  void cmp8(AddressLiteral src1, int imm);
  1.1789 +
  1.1790 +  // renamed to drag out the casting of address to int32_t/intptr_t
  1.1791 +  void cmp32(Register src1, int32_t imm);
  1.1792 +
  1.1793 +  void cmp32(AddressLiteral src1, int32_t imm);
  1.1794 +  // compare reg - mem, or reg - &mem
  1.1795 +  void cmp32(Register src1, AddressLiteral src2);
  1.1796 +
  1.1797 +  void cmp32(Register src1, Address src2);
  1.1798 +
  1.1799 +#ifndef _LP64
  1.1800 +  void cmpoop(Address dst, jobject obj);
  1.1801 +  void cmpoop(Register dst, jobject obj);
  1.1802 +#endif // _LP64
  1.1803 +
  1.1804 +  // NOTE src2 must be the lval. This is NOT an mem-mem compare
  1.1805 +  void cmpptr(Address src1, AddressLiteral src2);
  1.1806 +
  1.1807 +  void cmpptr(Register src1, AddressLiteral src2);
  1.1808 +
  1.1809 +  void cmpptr(Register src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
  1.1810 +  void cmpptr(Register src1, Address src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
  1.1811 +  // void cmpptr(Address src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
  1.1812 +
  1.1813 +  void cmpptr(Register src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
  1.1814 +  void cmpptr(Address src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
  1.1815 +
  1.1816 +  // cmp64 to avoild hiding cmpq
  1.1817 +  void cmp64(Register src1, AddressLiteral src);
  1.1818 +
  1.1819 +  void cmpxchgptr(Register reg, Address adr);
  1.1820 +
  1.1821 +  void locked_cmpxchgptr(Register reg, AddressLiteral adr);
  1.1822 +
  1.1823 +
  1.1824 +  void imulptr(Register dst, Register src) { LP64_ONLY(imulq(dst, src)) NOT_LP64(imull(dst, src)); }
  1.1825 +
  1.1826 +
  1.1827 +  void negptr(Register dst) { LP64_ONLY(negq(dst)) NOT_LP64(negl(dst)); }
  1.1828 +
  1.1829 +  void notptr(Register dst) { LP64_ONLY(notq(dst)) NOT_LP64(notl(dst)); }
  1.1830 +
  1.1831 +  void shlptr(Register dst, int32_t shift);
  1.1832 +  void shlptr(Register dst) { LP64_ONLY(shlq(dst)) NOT_LP64(shll(dst)); }
  1.1833 +
  1.1834 +  void shrptr(Register dst, int32_t shift);
  1.1835 +  void shrptr(Register dst) { LP64_ONLY(shrq(dst)) NOT_LP64(shrl(dst)); }
  1.1836 +
  1.1837 +  void sarptr(Register dst) { LP64_ONLY(sarq(dst)) NOT_LP64(sarl(dst)); }
  1.1838 +  void sarptr(Register dst, int32_t src) { LP64_ONLY(sarq(dst, src)) NOT_LP64(sarl(dst, src)); }
  1.1839 +
  1.1840 +  void subptr(Address dst, int32_t src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); }
  1.1841 +
  1.1842 +  void subptr(Register dst, Address src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); }
  1.1843 +  void subptr(Register dst, int32_t src);
  1.1844 +  void subptr(Register dst, Register src);
  1.1845 +
  1.1846 +
  1.1847 +  void sbbptr(Address dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); }
  1.1848 +  void sbbptr(Register dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); }
  1.1849 +
  1.1850 +  void xchgptr(Register src1, Register src2) { LP64_ONLY(xchgq(src1, src2)) NOT_LP64(xchgl(src1, src2)) ; }
  1.1851 +  void xchgptr(Register src1, Address src2) { LP64_ONLY(xchgq(src1, src2)) NOT_LP64(xchgl(src1, src2)) ; }
  1.1852 +
  1.1853 +  void xaddptr(Address src1, Register src2) { LP64_ONLY(xaddq(src1, src2)) NOT_LP64(xaddl(src1, src2)) ; }
  1.1854 +
  1.1855 +
  1.1856 +
  1.1857 +  // Helper functions for statistics gathering.
  1.1858 +  // Conditionally (atomically, on MPs) increments passed counter address, preserving condition codes.
  1.1859 +  void cond_inc32(Condition cond, AddressLiteral counter_addr);
  1.1860 +  // Unconditional atomic increment.
  1.1861 +  void atomic_incl(AddressLiteral counter_addr);
  1.1862 +
  1.1863 +  void lea(Register dst, AddressLiteral adr);
  1.1864 +  void lea(Address dst, AddressLiteral adr);
  1.1865 +  void lea(Register dst, Address adr) { Assembler::lea(dst, adr); }
  1.1866 +
  1.1867 +  void leal32(Register dst, Address src) { leal(dst, src); }
  1.1868 +
  1.1869 +  void test32(Register src1, AddressLiteral src2);
  1.1870 +
  1.1871 +  void orptr(Register dst, Address src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); }
  1.1872 +  void orptr(Register dst, Register src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); }
  1.1873 +  void orptr(Register dst, int32_t src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); }
  1.1874 +
  1.1875 +  void testptr(Register src, int32_t imm32) {  LP64_ONLY(testq(src, imm32)) NOT_LP64(testl(src, imm32)); }
  1.1876 +  void testptr(Register src1, Register src2);
  1.1877 +
  1.1878 +  void xorptr(Register dst, Register src) { LP64_ONLY(xorq(dst, src)) NOT_LP64(xorl(dst, src)); }
  1.1879 +  void xorptr(Register dst, Address src) { LP64_ONLY(xorq(dst, src)) NOT_LP64(xorl(dst, src)); }
  1.1880 +
  1.1881 +  // Calls
  1.1882 +
  1.1883 +  void call(Label& L, relocInfo::relocType rtype);
  1.1884 +  void call(Register entry);
  1.1885 +
  1.1886 +  // NOTE: this call tranfers to the effective address of entry NOT
  1.1887 +  // the address contained by entry. This is because this is more natural
  1.1888 +  // for jumps/calls.
  1.1889 +  void call(AddressLiteral entry);
  1.1890 +
  1.1891 +  // Jumps
  1.1892 +
  1.1893 +  // NOTE: these jumps tranfer to the effective address of dst NOT
  1.1894 +  // the address contained by dst. This is because this is more natural
  1.1895 +  // for jumps/calls.
  1.1896 +  void jump(AddressLiteral dst);
  1.1897 +  void jump_cc(Condition cc, AddressLiteral dst);
  1.1898 +
  1.1899 +  // 32bit can do a case table jump in one instruction but we no longer allow the base
  1.1900 +  // to be installed in the Address class. This jump will tranfers to the address
  1.1901 +  // contained in the location described by entry (not the address of entry)
  1.1902 +  void jump(ArrayAddress entry);
  1.1903 +
  1.1904 +  // Floating
  1.1905 +
  1.1906 +  void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); }
  1.1907 +  void andpd(XMMRegister dst, AddressLiteral src);
  1.1908 +
  1.1909 +  void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); }
  1.1910 +  void comiss(XMMRegister dst, AddressLiteral src);
  1.1911 +
  1.1912 +  void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); }
  1.1913 +  void comisd(XMMRegister dst, AddressLiteral src);
  1.1914 +
  1.1915 +  void fldcw(Address src) { Assembler::fldcw(src); }
  1.1916 +  void fldcw(AddressLiteral src);
  1.1917 +
  1.1918 +  void fld_s(int index)   { Assembler::fld_s(index); }
  1.1919 +  void fld_s(Address src) { Assembler::fld_s(src); }
  1.1920 +  void fld_s(AddressLiteral src);
  1.1921 +
  1.1922 +  void fld_d(Address src) { Assembler::fld_d(src); }
  1.1923 +  void fld_d(AddressLiteral src);
  1.1924 +
  1.1925 +  void fld_x(Address src) { Assembler::fld_x(src); }
  1.1926 +  void fld_x(AddressLiteral src);
  1.1927 +
  1.1928 +  void ldmxcsr(Address src) { Assembler::ldmxcsr(src); }
  1.1929 +  void ldmxcsr(AddressLiteral src);
  1.1930 +
  1.1931 +private:
  1.1932 +  // these are private because users should be doing movflt/movdbl
  1.1933 +
  1.1934 +  void movss(Address dst, XMMRegister src)     { Assembler::movss(dst, src); }
  1.1935 +  void movss(XMMRegister dst, XMMRegister src) { Assembler::movss(dst, src); }
  1.1936 +  void movss(XMMRegister dst, Address src)     { Assembler::movss(dst, src); }
  1.1937 +  void movss(XMMRegister dst, AddressLiteral src);
  1.1938 +
  1.1939 +  void movlpd(XMMRegister dst, Address src)      {Assembler::movlpd(dst, src); }
  1.1940 +  void movlpd(XMMRegister dst, AddressLiteral src);
  1.1941 +
  1.1942 +public:
  1.1943 +
  1.1944 +  void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); }
  1.1945 +  void movsd(Address dst, XMMRegister src)     { Assembler::movsd(dst, src); }
  1.1946 +  void movsd(XMMRegister dst, Address src)     { Assembler::movsd(dst, src); }
  1.1947 +  void movsd(XMMRegister dst, AddressLiteral src);
  1.1948 +
  1.1949 +  void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); }
  1.1950 +  void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); }
  1.1951 +  void ucomiss(XMMRegister dst, AddressLiteral src);
  1.1952 +
  1.1953 +  void ucomisd(XMMRegister dst, XMMRegister src) { Assembler::ucomisd(dst, src); }
  1.1954 +  void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); }
  1.1955 +  void ucomisd(XMMRegister dst, AddressLiteral src);
  1.1956 +
  1.1957 +  // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
  1.1958 +  void xorpd(XMMRegister dst, XMMRegister src) { Assembler::xorpd(dst, src); }
  1.1959 +  void xorpd(XMMRegister dst, Address src)     { Assembler::xorpd(dst, src); }
  1.1960 +  void xorpd(XMMRegister dst, AddressLiteral src);
  1.1961 +
  1.1962 +  // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
  1.1963 +  void xorps(XMMRegister dst, XMMRegister src) { Assembler::xorps(dst, src); }
  1.1964 +  void xorps(XMMRegister dst, Address src)     { Assembler::xorps(dst, src); }
  1.1965 +  void xorps(XMMRegister dst, AddressLiteral src);
  1.1966 +
  1.1967 +  // Data
  1.1968 +
  1.1969 +  void cmov(Condition cc, Register dst, Register src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmovl(cc, dst, src)); }
  1.1970 +
  1.1971 +  void cmovptr(Condition cc, Register dst, Address src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmovl(cc, dst, src)); }
  1.1972 +  void cmovptr(Condition cc, Register dst, Register src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmovl(cc, dst, src)); }
  1.1973 +
  1.1974 +  void movoop(Register dst, jobject obj);
  1.1975 +  void movoop(Address dst, jobject obj);
  1.1976 +
  1.1977 +  void movptr(ArrayAddress dst, Register src);
  1.1978 +  // can this do an lea?
  1.1979 +  void movptr(Register dst, ArrayAddress src);
  1.1980 +
  1.1981 +  void movptr(Register dst, Address src);
  1.1982 +
  1.1983 +  void movptr(Register dst, AddressLiteral src);
  1.1984 +
  1.1985 +  void movptr(Register dst, intptr_t src);
  1.1986 +  void movptr(Register dst, Register src);
  1.1987 +  void movptr(Address dst, intptr_t src);
  1.1988 +
  1.1989 +  void movptr(Address dst, Register src);
  1.1990 +
  1.1991 +#ifdef _LP64
  1.1992 +  // Generally the next two are only used for moving NULL
  1.1993 +  // Although there are situations in initializing the mark word where
  1.1994 +  // they could be used. They are dangerous.
  1.1995 +
  1.1996 +  // They only exist on LP64 so that int32_t and intptr_t are not the same
  1.1997 +  // and we have ambiguous declarations.
  1.1998 +
  1.1999 +  void movptr(Address dst, int32_t imm32);
  1.2000 +  void movptr(Register dst, int32_t imm32);
  1.2001 +#endif // _LP64
  1.2002 +
  1.2003 +  // to avoid hiding movl
  1.2004 +  void mov32(AddressLiteral dst, Register src);
  1.2005 +  void mov32(Register dst, AddressLiteral src);
  1.2006 +
  1.2007 +  // to avoid hiding movb
  1.2008 +  void movbyte(ArrayAddress dst, int src);
  1.2009 +
  1.2010 +  // Can push value or effective address
  1.2011 +  void pushptr(AddressLiteral src);
  1.2012 +
  1.2013 +  void pushptr(Address src) { LP64_ONLY(pushq(src)) NOT_LP64(pushl(src)); }
  1.2014 +  void popptr(Address src) { LP64_ONLY(popq(src)) NOT_LP64(popl(src)); }
  1.2015 +
  1.2016 +  void pushoop(jobject obj);
  1.2017 +
  1.2018 +  // sign extend as need a l to ptr sized element
  1.2019 +  void movl2ptr(Register dst, Address src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(movl(dst, src)); }
  1.2020 +  void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(if (dst != src) movl(dst, src)); }
  1.2021 +
  1.2022 +
  1.2023 +#undef VIRTUAL
  1.2024 +
  1.2025 +};
  1.2026 +
  1.2027 +/**
  1.2028 + * class SkipIfEqual:
  1.2029 + *
  1.2030 + * Instantiating this class will result in assembly code being output that will
  1.2031 + * jump around any code emitted between the creation of the instance and it's
  1.2032 + * automatic destruction at the end of a scope block, depending on the value of
  1.2033 + * the flag passed to the constructor, which will be checked at run-time.
  1.2034 + */
  1.2035 +class SkipIfEqual {
  1.2036 + private:
  1.2037 +  MacroAssembler* _masm;
  1.2038 +  Label _label;
  1.2039 +
  1.2040 + public:
  1.2041 +   SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
  1.2042 +   ~SkipIfEqual();
  1.2043 +};
  1.2044 +
  1.2045 +#ifdef ASSERT
  1.2046 +inline bool AbstractAssembler::pd_check_instruction_mark() { return true; }
  1.2047 +#endif

mercurial