src/cpu/x86/vm/assembler_x86.hpp

Thu, 24 May 2018 17:06:56 +0800

author
aoqi
date
Thu, 24 May 2018 17:06:56 +0800
changeset 8604
04d83ba48607
parent 8307
daaf806995b3
parent 7994
04ff2f6cd0eb
child 9806
758c07667682
permissions
-rw-r--r--

Merge

aoqi@0 1 /*
aoqi@0 2 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
aoqi@0 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
aoqi@0 4 *
aoqi@0 5 * This code is free software; you can redistribute it and/or modify it
aoqi@0 6 * under the terms of the GNU General Public License version 2 only, as
aoqi@0 7 * published by the Free Software Foundation.
aoqi@0 8 *
aoqi@0 9 * This code is distributed in the hope that it will be useful, but WITHOUT
aoqi@0 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
aoqi@0 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
aoqi@0 12 * version 2 for more details (a copy is included in the LICENSE file that
aoqi@0 13 * accompanied this code).
aoqi@0 14 *
aoqi@0 15 * You should have received a copy of the GNU General Public License version
aoqi@0 16 * 2 along with this work; if not, write to the Free Software Foundation,
aoqi@0 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
aoqi@0 18 *
aoqi@0 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
aoqi@0 20 * or visit www.oracle.com if you need additional information or have any
aoqi@0 21 * questions.
aoqi@0 22 *
aoqi@0 23 */
aoqi@0 24
aoqi@0 25 #ifndef CPU_X86_VM_ASSEMBLER_X86_HPP
aoqi@0 26 #define CPU_X86_VM_ASSEMBLER_X86_HPP
aoqi@0 27
aoqi@0 28 #include "asm/register.hpp"
aoqi@0 29
aoqi@0 30 class BiasedLockingCounters;
aoqi@0 31
aoqi@0 32 // Contains all the definitions needed for x86 assembly code generation.
aoqi@0 33
aoqi@0 34 // Calling convention
aoqi@0 35 class Argument VALUE_OBJ_CLASS_SPEC {
aoqi@0 36 public:
aoqi@0 37 enum {
aoqi@0 38 #ifdef _LP64
aoqi@0 39 #ifdef _WIN64
aoqi@0 40 n_int_register_parameters_c = 4, // rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
aoqi@0 41 n_float_register_parameters_c = 4, // xmm0 - xmm3 (c_farg0, c_farg1, ... )
aoqi@0 42 #else
aoqi@0 43 n_int_register_parameters_c = 6, // rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
aoqi@0 44 n_float_register_parameters_c = 8, // xmm0 - xmm7 (c_farg0, c_farg1, ... )
aoqi@0 45 #endif // _WIN64
aoqi@0 46 n_int_register_parameters_j = 6, // j_rarg0, j_rarg1, ...
aoqi@0 47 n_float_register_parameters_j = 8 // j_farg0, j_farg1, ...
aoqi@0 48 #else
aoqi@0 49 n_register_parameters = 0 // 0 registers used to pass arguments
aoqi@0 50 #endif // _LP64
aoqi@0 51 };
aoqi@0 52 };
aoqi@0 53
aoqi@0 54
aoqi@0 55 #ifdef _LP64
aoqi@0 56 // Symbolically name the register arguments used by the c calling convention.
aoqi@0 57 // Windows is different from linux/solaris. So much for standards...
aoqi@0 58
aoqi@0 59 #ifdef _WIN64
aoqi@0 60
aoqi@0 61 REGISTER_DECLARATION(Register, c_rarg0, rcx);
aoqi@0 62 REGISTER_DECLARATION(Register, c_rarg1, rdx);
aoqi@0 63 REGISTER_DECLARATION(Register, c_rarg2, r8);
aoqi@0 64 REGISTER_DECLARATION(Register, c_rarg3, r9);
aoqi@0 65
aoqi@0 66 REGISTER_DECLARATION(XMMRegister, c_farg0, xmm0);
aoqi@0 67 REGISTER_DECLARATION(XMMRegister, c_farg1, xmm1);
aoqi@0 68 REGISTER_DECLARATION(XMMRegister, c_farg2, xmm2);
aoqi@0 69 REGISTER_DECLARATION(XMMRegister, c_farg3, xmm3);
aoqi@0 70
aoqi@0 71 #else
aoqi@0 72
aoqi@0 73 REGISTER_DECLARATION(Register, c_rarg0, rdi);
aoqi@0 74 REGISTER_DECLARATION(Register, c_rarg1, rsi);
aoqi@0 75 REGISTER_DECLARATION(Register, c_rarg2, rdx);
aoqi@0 76 REGISTER_DECLARATION(Register, c_rarg3, rcx);
aoqi@0 77 REGISTER_DECLARATION(Register, c_rarg4, r8);
aoqi@0 78 REGISTER_DECLARATION(Register, c_rarg5, r9);
aoqi@0 79
aoqi@0 80 REGISTER_DECLARATION(XMMRegister, c_farg0, xmm0);
aoqi@0 81 REGISTER_DECLARATION(XMMRegister, c_farg1, xmm1);
aoqi@0 82 REGISTER_DECLARATION(XMMRegister, c_farg2, xmm2);
aoqi@0 83 REGISTER_DECLARATION(XMMRegister, c_farg3, xmm3);
aoqi@0 84 REGISTER_DECLARATION(XMMRegister, c_farg4, xmm4);
aoqi@0 85 REGISTER_DECLARATION(XMMRegister, c_farg5, xmm5);
aoqi@0 86 REGISTER_DECLARATION(XMMRegister, c_farg6, xmm6);
aoqi@0 87 REGISTER_DECLARATION(XMMRegister, c_farg7, xmm7);
aoqi@0 88
aoqi@0 89 #endif // _WIN64
aoqi@0 90
aoqi@0 91 // Symbolically name the register arguments used by the Java calling convention.
aoqi@0 92 // We have control over the convention for java so we can do what we please.
aoqi@0 93 // What pleases us is to offset the java calling convention so that when
aoqi@0 94 // we call a suitable jni method the arguments are lined up and we don't
aoqi@0 95 // have to do little shuffling. A suitable jni method is non-static and a
aoqi@0 96 // small number of arguments (two fewer args on windows)
aoqi@0 97 //
aoqi@0 98 // |-------------------------------------------------------|
aoqi@0 99 // | c_rarg0 c_rarg1 c_rarg2 c_rarg3 c_rarg4 c_rarg5 |
aoqi@0 100 // |-------------------------------------------------------|
aoqi@0 101 // | rcx rdx r8 r9 rdi* rsi* | windows (* not a c_rarg)
aoqi@0 102 // | rdi rsi rdx rcx r8 r9 | solaris/linux
aoqi@0 103 // |-------------------------------------------------------|
aoqi@0 104 // | j_rarg5 j_rarg0 j_rarg1 j_rarg2 j_rarg3 j_rarg4 |
aoqi@0 105 // |-------------------------------------------------------|
aoqi@0 106
aoqi@0 107 REGISTER_DECLARATION(Register, j_rarg0, c_rarg1);
aoqi@0 108 REGISTER_DECLARATION(Register, j_rarg1, c_rarg2);
aoqi@0 109 REGISTER_DECLARATION(Register, j_rarg2, c_rarg3);
aoqi@0 110 // Windows runs out of register args here
aoqi@0 111 #ifdef _WIN64
aoqi@0 112 REGISTER_DECLARATION(Register, j_rarg3, rdi);
aoqi@0 113 REGISTER_DECLARATION(Register, j_rarg4, rsi);
aoqi@0 114 #else
aoqi@0 115 REGISTER_DECLARATION(Register, j_rarg3, c_rarg4);
aoqi@0 116 REGISTER_DECLARATION(Register, j_rarg4, c_rarg5);
aoqi@0 117 #endif /* _WIN64 */
aoqi@0 118 REGISTER_DECLARATION(Register, j_rarg5, c_rarg0);
aoqi@0 119
aoqi@0 120 REGISTER_DECLARATION(XMMRegister, j_farg0, xmm0);
aoqi@0 121 REGISTER_DECLARATION(XMMRegister, j_farg1, xmm1);
aoqi@0 122 REGISTER_DECLARATION(XMMRegister, j_farg2, xmm2);
aoqi@0 123 REGISTER_DECLARATION(XMMRegister, j_farg3, xmm3);
aoqi@0 124 REGISTER_DECLARATION(XMMRegister, j_farg4, xmm4);
aoqi@0 125 REGISTER_DECLARATION(XMMRegister, j_farg5, xmm5);
aoqi@0 126 REGISTER_DECLARATION(XMMRegister, j_farg6, xmm6);
aoqi@0 127 REGISTER_DECLARATION(XMMRegister, j_farg7, xmm7);
aoqi@0 128
aoqi@0 129 REGISTER_DECLARATION(Register, rscratch1, r10); // volatile
aoqi@0 130 REGISTER_DECLARATION(Register, rscratch2, r11); // volatile
aoqi@0 131
aoqi@0 132 REGISTER_DECLARATION(Register, r12_heapbase, r12); // callee-saved
aoqi@0 133 REGISTER_DECLARATION(Register, r15_thread, r15); // callee-saved
aoqi@0 134
aoqi@0 135 #else
aoqi@0 136 // rscratch1 will apear in 32bit code that is dead but of course must compile
aoqi@0 137 // Using noreg ensures if the dead code is incorrectly live and executed it
aoqi@0 138 // will cause an assertion failure
aoqi@0 139 #define rscratch1 noreg
aoqi@0 140 #define rscratch2 noreg
aoqi@0 141
aoqi@0 142 #endif // _LP64
aoqi@0 143
zmajo@7854 144 // JSR 292
zmajo@7854 145 // On x86, the SP does not have to be saved when invoking method handle intrinsics
zmajo@7854 146 // or compiled lambda forms. We indicate that by setting rbp_mh_SP_save to noreg.
zmajo@7854 147 REGISTER_DECLARATION(Register, rbp_mh_SP_save, noreg);
aoqi@0 148
aoqi@0 149 // Address is an abstraction used to represent a memory location
aoqi@0 150 // using any of the amd64 addressing modes with one object.
aoqi@0 151 //
aoqi@0 152 // Note: A register location is represented via a Register, not
aoqi@0 153 // via an address for efficiency & simplicity reasons.
aoqi@0 154
aoqi@0 155 class ArrayAddress;
aoqi@0 156
aoqi@0 157 class Address VALUE_OBJ_CLASS_SPEC {
aoqi@0 158 public:
aoqi@0 159 enum ScaleFactor {
aoqi@0 160 no_scale = -1,
aoqi@0 161 times_1 = 0,
aoqi@0 162 times_2 = 1,
aoqi@0 163 times_4 = 2,
aoqi@0 164 times_8 = 3,
aoqi@0 165 times_ptr = LP64_ONLY(times_8) NOT_LP64(times_4)
aoqi@0 166 };
aoqi@0 167 static ScaleFactor times(int size) {
aoqi@0 168 assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size");
aoqi@0 169 if (size == 8) return times_8;
aoqi@0 170 if (size == 4) return times_4;
aoqi@0 171 if (size == 2) return times_2;
aoqi@0 172 return times_1;
aoqi@0 173 }
aoqi@0 174 static int scale_size(ScaleFactor scale) {
aoqi@0 175 assert(scale != no_scale, "");
aoqi@0 176 assert(((1 << (int)times_1) == 1 &&
aoqi@0 177 (1 << (int)times_2) == 2 &&
aoqi@0 178 (1 << (int)times_4) == 4 &&
aoqi@0 179 (1 << (int)times_8) == 8), "");
aoqi@0 180 return (1 << (int)scale);
aoqi@0 181 }
aoqi@0 182
aoqi@0 183 private:
aoqi@0 184 Register _base;
aoqi@0 185 Register _index;
aoqi@0 186 ScaleFactor _scale;
aoqi@0 187 int _disp;
aoqi@0 188 RelocationHolder _rspec;
aoqi@0 189
aoqi@0 190 // Easily misused constructors make them private
aoqi@0 191 // %%% can we make these go away?
aoqi@0 192 NOT_LP64(Address(address loc, RelocationHolder spec);)
aoqi@0 193 Address(int disp, address loc, relocInfo::relocType rtype);
aoqi@0 194 Address(int disp, address loc, RelocationHolder spec);
aoqi@0 195
aoqi@0 196 public:
aoqi@0 197
aoqi@0 198 int disp() { return _disp; }
aoqi@0 199 // creation
aoqi@0 200 Address()
aoqi@0 201 : _base(noreg),
aoqi@0 202 _index(noreg),
aoqi@0 203 _scale(no_scale),
aoqi@0 204 _disp(0) {
aoqi@0 205 }
aoqi@0 206
aoqi@0 207 // No default displacement otherwise Register can be implicitly
aoqi@0 208 // converted to 0(Register) which is quite a different animal.
aoqi@0 209
aoqi@0 210 Address(Register base, int disp)
aoqi@0 211 : _base(base),
aoqi@0 212 _index(noreg),
aoqi@0 213 _scale(no_scale),
aoqi@0 214 _disp(disp) {
aoqi@0 215 }
aoqi@0 216
aoqi@0 217 Address(Register base, Register index, ScaleFactor scale, int disp = 0)
aoqi@0 218 : _base (base),
aoqi@0 219 _index(index),
aoqi@0 220 _scale(scale),
aoqi@0 221 _disp (disp) {
aoqi@0 222 assert(!index->is_valid() == (scale == Address::no_scale),
aoqi@0 223 "inconsistent address");
aoqi@0 224 }
aoqi@0 225
aoqi@0 226 Address(Register base, RegisterOrConstant index, ScaleFactor scale = times_1, int disp = 0)
aoqi@0 227 : _base (base),
aoqi@0 228 _index(index.register_or_noreg()),
aoqi@0 229 _scale(scale),
aoqi@0 230 _disp (disp + (index.constant_or_zero() * scale_size(scale))) {
aoqi@0 231 if (!index.is_register()) scale = Address::no_scale;
aoqi@0 232 assert(!_index->is_valid() == (scale == Address::no_scale),
aoqi@0 233 "inconsistent address");
aoqi@0 234 }
aoqi@0 235
aoqi@0 236 Address plus_disp(int disp) const {
aoqi@0 237 Address a = (*this);
aoqi@0 238 a._disp += disp;
aoqi@0 239 return a;
aoqi@0 240 }
aoqi@0 241 Address plus_disp(RegisterOrConstant disp, ScaleFactor scale = times_1) const {
aoqi@0 242 Address a = (*this);
aoqi@0 243 a._disp += disp.constant_or_zero() * scale_size(scale);
aoqi@0 244 if (disp.is_register()) {
aoqi@0 245 assert(!a.index()->is_valid(), "competing indexes");
aoqi@0 246 a._index = disp.as_register();
aoqi@0 247 a._scale = scale;
aoqi@0 248 }
aoqi@0 249 return a;
aoqi@0 250 }
aoqi@0 251 bool is_same_address(Address a) const {
aoqi@0 252 // disregard _rspec
aoqi@0 253 return _base == a._base && _disp == a._disp && _index == a._index && _scale == a._scale;
aoqi@0 254 }
aoqi@0 255
aoqi@0 256 // The following two overloads are used in connection with the
aoqi@0 257 // ByteSize type (see sizes.hpp). They simplify the use of
aoqi@0 258 // ByteSize'd arguments in assembly code. Note that their equivalent
aoqi@0 259 // for the optimized build are the member functions with int disp
aoqi@0 260 // argument since ByteSize is mapped to an int type in that case.
aoqi@0 261 //
aoqi@0 262 // Note: DO NOT introduce similar overloaded functions for WordSize
aoqi@0 263 // arguments as in the optimized mode, both ByteSize and WordSize
aoqi@0 264 // are mapped to the same type and thus the compiler cannot make a
aoqi@0 265 // distinction anymore (=> compiler errors).
aoqi@0 266
aoqi@0 267 #ifdef ASSERT
aoqi@0 268 Address(Register base, ByteSize disp)
aoqi@0 269 : _base(base),
aoqi@0 270 _index(noreg),
aoqi@0 271 _scale(no_scale),
aoqi@0 272 _disp(in_bytes(disp)) {
aoqi@0 273 }
aoqi@0 274
aoqi@0 275 Address(Register base, Register index, ScaleFactor scale, ByteSize disp)
aoqi@0 276 : _base(base),
aoqi@0 277 _index(index),
aoqi@0 278 _scale(scale),
aoqi@0 279 _disp(in_bytes(disp)) {
aoqi@0 280 assert(!index->is_valid() == (scale == Address::no_scale),
aoqi@0 281 "inconsistent address");
aoqi@0 282 }
aoqi@0 283
aoqi@0 284 Address(Register base, RegisterOrConstant index, ScaleFactor scale, ByteSize disp)
aoqi@0 285 : _base (base),
aoqi@0 286 _index(index.register_or_noreg()),
aoqi@0 287 _scale(scale),
aoqi@0 288 _disp (in_bytes(disp) + (index.constant_or_zero() * scale_size(scale))) {
aoqi@0 289 if (!index.is_register()) scale = Address::no_scale;
aoqi@0 290 assert(!_index->is_valid() == (scale == Address::no_scale),
aoqi@0 291 "inconsistent address");
aoqi@0 292 }
aoqi@0 293
aoqi@0 294 #endif // ASSERT
aoqi@0 295
aoqi@0 296 // accessors
aoqi@0 297 bool uses(Register reg) const { return _base == reg || _index == reg; }
aoqi@0 298 Register base() const { return _base; }
aoqi@0 299 Register index() const { return _index; }
aoqi@0 300 ScaleFactor scale() const { return _scale; }
aoqi@0 301 int disp() const { return _disp; }
aoqi@0 302
aoqi@0 303 // Convert the raw encoding form into the form expected by the constructor for
aoqi@0 304 // Address. An index of 4 (rsp) corresponds to having no index, so convert
aoqi@0 305 // that to noreg for the Address constructor.
aoqi@0 306 static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc);
aoqi@0 307
aoqi@0 308 static Address make_array(ArrayAddress);
aoqi@0 309
aoqi@0 310 private:
aoqi@0 311 bool base_needs_rex() const {
aoqi@0 312 return _base != noreg && _base->encoding() >= 8;
aoqi@0 313 }
aoqi@0 314
aoqi@0 315 bool index_needs_rex() const {
aoqi@0 316 return _index != noreg &&_index->encoding() >= 8;
aoqi@0 317 }
aoqi@0 318
aoqi@0 319 relocInfo::relocType reloc() const { return _rspec.type(); }
aoqi@0 320
aoqi@0 321 friend class Assembler;
aoqi@0 322 friend class MacroAssembler;
aoqi@0 323 friend class LIR_Assembler; // base/index/scale/disp
aoqi@0 324 };
aoqi@0 325
aoqi@0 326 //
aoqi@0 327 // AddressLiteral has been split out from Address because operands of this type
aoqi@0 328 // need to be treated specially on 32bit vs. 64bit platforms. By splitting it out
aoqi@0 329 // the few instructions that need to deal with address literals are unique and the
aoqi@0 330 // MacroAssembler does not have to implement every instruction in the Assembler
aoqi@0 331 // in order to search for address literals that may need special handling depending
aoqi@0 332 // on the instruction and the platform. As small step on the way to merging i486/amd64
aoqi@0 333 // directories.
aoqi@0 334 //
aoqi@0 335 class AddressLiteral VALUE_OBJ_CLASS_SPEC {
aoqi@0 336 friend class ArrayAddress;
aoqi@0 337 RelocationHolder _rspec;
aoqi@0 338 // Typically we use AddressLiterals we want to use their rval
aoqi@0 339 // However in some situations we want the lval (effect address) of the item.
aoqi@0 340 // We provide a special factory for making those lvals.
aoqi@0 341 bool _is_lval;
aoqi@0 342
aoqi@0 343 // If the target is far we'll need to load the ea of this to
aoqi@0 344 // a register to reach it. Otherwise if near we can do rip
aoqi@0 345 // relative addressing.
aoqi@0 346
aoqi@0 347 address _target;
aoqi@0 348
aoqi@0 349 protected:
aoqi@0 350 // creation
aoqi@0 351 AddressLiteral()
aoqi@0 352 : _is_lval(false),
aoqi@0 353 _target(NULL)
aoqi@0 354 {}
aoqi@0 355
aoqi@0 356 public:
aoqi@0 357
aoqi@0 358
aoqi@0 359 AddressLiteral(address target, relocInfo::relocType rtype);
aoqi@0 360
aoqi@0 361 AddressLiteral(address target, RelocationHolder const& rspec)
aoqi@0 362 : _rspec(rspec),
aoqi@0 363 _is_lval(false),
aoqi@0 364 _target(target)
aoqi@0 365 {}
aoqi@0 366
aoqi@0 367 AddressLiteral addr() {
aoqi@0 368 AddressLiteral ret = *this;
aoqi@0 369 ret._is_lval = true;
aoqi@0 370 return ret;
aoqi@0 371 }
aoqi@0 372
aoqi@0 373
aoqi@0 374 private:
aoqi@0 375
aoqi@0 376 address target() { return _target; }
aoqi@0 377 bool is_lval() { return _is_lval; }
aoqi@0 378
aoqi@0 379 relocInfo::relocType reloc() const { return _rspec.type(); }
aoqi@0 380 const RelocationHolder& rspec() const { return _rspec; }
aoqi@0 381
aoqi@0 382 friend class Assembler;
aoqi@0 383 friend class MacroAssembler;
aoqi@0 384 friend class Address;
aoqi@0 385 friend class LIR_Assembler;
aoqi@0 386 };
aoqi@0 387
aoqi@0 388 // Convience classes
aoqi@0 389 class RuntimeAddress: public AddressLiteral {
aoqi@0 390
aoqi@0 391 public:
aoqi@0 392
aoqi@0 393 RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {}
aoqi@0 394
aoqi@0 395 };
aoqi@0 396
aoqi@0 397 class ExternalAddress: public AddressLiteral {
aoqi@0 398 private:
aoqi@0 399 static relocInfo::relocType reloc_for_target(address target) {
aoqi@0 400 // Sometimes ExternalAddress is used for values which aren't
aoqi@0 401 // exactly addresses, like the card table base.
aoqi@0 402 // external_word_type can't be used for values in the first page
aoqi@0 403 // so just skip the reloc in that case.
aoqi@0 404 return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none;
aoqi@0 405 }
aoqi@0 406
aoqi@0 407 public:
aoqi@0 408
aoqi@0 409 ExternalAddress(address target) : AddressLiteral(target, reloc_for_target(target)) {}
aoqi@0 410
aoqi@0 411 };
aoqi@0 412
aoqi@0 413 class InternalAddress: public AddressLiteral {
aoqi@0 414
aoqi@0 415 public:
aoqi@0 416
aoqi@0 417 InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {}
aoqi@0 418
aoqi@0 419 };
aoqi@0 420
aoqi@0 421 // x86 can do array addressing as a single operation since disp can be an absolute
aoqi@0 422 // address amd64 can't. We create a class that expresses the concept but does extra
aoqi@0 423 // magic on amd64 to get the final result
aoqi@0 424
aoqi@0 425 class ArrayAddress VALUE_OBJ_CLASS_SPEC {
aoqi@0 426 private:
aoqi@0 427
aoqi@0 428 AddressLiteral _base;
aoqi@0 429 Address _index;
aoqi@0 430
aoqi@0 431 public:
aoqi@0 432
aoqi@0 433 ArrayAddress() {};
aoqi@0 434 ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {};
aoqi@0 435 AddressLiteral base() { return _base; }
aoqi@0 436 Address index() { return _index; }
aoqi@0 437
aoqi@0 438 };
aoqi@0 439
aoqi@0 440 const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY( 512 / wordSize);
aoqi@0 441
aoqi@0 442 // The Intel x86/Amd64 Assembler: Pure assembler doing NO optimizations on the instruction
aoqi@0 443 // level (e.g. mov rax, 0 is not translated into xor rax, rax!); i.e., what you write
aoqi@0 444 // is what you get. The Assembler is generating code into a CodeBuffer.
aoqi@0 445
aoqi@0 446 class Assembler : public AbstractAssembler {
aoqi@0 447 friend class AbstractAssembler; // for the non-virtual hack
aoqi@0 448 friend class LIR_Assembler; // as_Address()
aoqi@0 449 friend class StubGenerator;
aoqi@0 450
aoqi@0 451 public:
aoqi@0 452 enum Condition { // The x86 condition codes used for conditional jumps/moves.
aoqi@0 453 zero = 0x4,
aoqi@0 454 notZero = 0x5,
aoqi@0 455 equal = 0x4,
aoqi@0 456 notEqual = 0x5,
aoqi@0 457 less = 0xc,
aoqi@0 458 lessEqual = 0xe,
aoqi@0 459 greater = 0xf,
aoqi@0 460 greaterEqual = 0xd,
aoqi@0 461 below = 0x2,
aoqi@0 462 belowEqual = 0x6,
aoqi@0 463 above = 0x7,
aoqi@0 464 aboveEqual = 0x3,
aoqi@0 465 overflow = 0x0,
aoqi@0 466 noOverflow = 0x1,
aoqi@0 467 carrySet = 0x2,
aoqi@0 468 carryClear = 0x3,
aoqi@0 469 negative = 0x8,
aoqi@0 470 positive = 0x9,
aoqi@0 471 parity = 0xa,
aoqi@0 472 noParity = 0xb
aoqi@0 473 };
aoqi@0 474
aoqi@0 475 enum Prefix {
aoqi@0 476 // segment overrides
aoqi@0 477 CS_segment = 0x2e,
aoqi@0 478 SS_segment = 0x36,
aoqi@0 479 DS_segment = 0x3e,
aoqi@0 480 ES_segment = 0x26,
aoqi@0 481 FS_segment = 0x64,
aoqi@0 482 GS_segment = 0x65,
aoqi@0 483
aoqi@0 484 REX = 0x40,
aoqi@0 485
aoqi@0 486 REX_B = 0x41,
aoqi@0 487 REX_X = 0x42,
aoqi@0 488 REX_XB = 0x43,
aoqi@0 489 REX_R = 0x44,
aoqi@0 490 REX_RB = 0x45,
aoqi@0 491 REX_RX = 0x46,
aoqi@0 492 REX_RXB = 0x47,
aoqi@0 493
aoqi@0 494 REX_W = 0x48,
aoqi@0 495
aoqi@0 496 REX_WB = 0x49,
aoqi@0 497 REX_WX = 0x4A,
aoqi@0 498 REX_WXB = 0x4B,
aoqi@0 499 REX_WR = 0x4C,
aoqi@0 500 REX_WRB = 0x4D,
aoqi@0 501 REX_WRX = 0x4E,
aoqi@0 502 REX_WRXB = 0x4F,
aoqi@0 503
aoqi@0 504 VEX_3bytes = 0xC4,
aoqi@0 505 VEX_2bytes = 0xC5
aoqi@0 506 };
aoqi@0 507
aoqi@0 508 enum VexPrefix {
aoqi@0 509 VEX_B = 0x20,
aoqi@0 510 VEX_X = 0x40,
aoqi@0 511 VEX_R = 0x80,
aoqi@0 512 VEX_W = 0x80
aoqi@0 513 };
aoqi@0 514
aoqi@0 515 enum VexSimdPrefix {
aoqi@0 516 VEX_SIMD_NONE = 0x0,
aoqi@0 517 VEX_SIMD_66 = 0x1,
aoqi@0 518 VEX_SIMD_F3 = 0x2,
aoqi@0 519 VEX_SIMD_F2 = 0x3
aoqi@0 520 };
aoqi@0 521
aoqi@0 522 enum VexOpcode {
aoqi@0 523 VEX_OPCODE_NONE = 0x0,
aoqi@0 524 VEX_OPCODE_0F = 0x1,
aoqi@0 525 VEX_OPCODE_0F_38 = 0x2,
aoqi@0 526 VEX_OPCODE_0F_3A = 0x3
aoqi@0 527 };
aoqi@0 528
aoqi@0 529 enum WhichOperand {
aoqi@0 530 // input to locate_operand, and format code for relocations
aoqi@0 531 imm_operand = 0, // embedded 32-bit|64-bit immediate operand
aoqi@0 532 disp32_operand = 1, // embedded 32-bit displacement or address
aoqi@0 533 call32_operand = 2, // embedded 32-bit self-relative displacement
aoqi@0 534 #ifndef _LP64
aoqi@0 535 _WhichOperand_limit = 3
aoqi@0 536 #else
aoqi@0 537 narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop
aoqi@0 538 _WhichOperand_limit = 4
aoqi@0 539 #endif
aoqi@0 540 };
aoqi@0 541
aoqi@0 542
aoqi@0 543
aoqi@0 544 // NOTE: The general philopsophy of the declarations here is that 64bit versions
aoqi@0 545 // of instructions are freely declared without the need for wrapping them an ifdef.
aoqi@0 546 // (Some dangerous instructions are ifdef's out of inappropriate jvm's.)
aoqi@0 547 // In the .cpp file the implementations are wrapped so that they are dropped out
aoqi@0 548 // of the resulting jvm. This is done mostly to keep the footprint of MINIMAL
aoqi@0 549 // to the size it was prior to merging up the 32bit and 64bit assemblers.
aoqi@0 550 //
aoqi@0 551 // This does mean you'll get a linker/runtime error if you use a 64bit only instruction
aoqi@0 552 // in a 32bit vm. This is somewhat unfortunate but keeps the ifdef noise down.
aoqi@0 553
aoqi@0 554 private:
aoqi@0 555
aoqi@0 556
aoqi@0 557 // 64bit prefixes
aoqi@0 558 int prefix_and_encode(int reg_enc, bool byteinst = false);
aoqi@0 559 int prefixq_and_encode(int reg_enc);
aoqi@0 560
aoqi@0 561 int prefix_and_encode(int dst_enc, int src_enc, bool byteinst = false);
aoqi@0 562 int prefixq_and_encode(int dst_enc, int src_enc);
aoqi@0 563
aoqi@0 564 void prefix(Register reg);
aoqi@0 565 void prefix(Address adr);
aoqi@0 566 void prefixq(Address adr);
aoqi@0 567
aoqi@0 568 void prefix(Address adr, Register reg, bool byteinst = false);
aoqi@0 569 void prefix(Address adr, XMMRegister reg);
aoqi@0 570 void prefixq(Address adr, Register reg);
aoqi@0 571 void prefixq(Address adr, XMMRegister reg);
aoqi@0 572
aoqi@0 573 void prefetch_prefix(Address src);
aoqi@0 574
aoqi@0 575 void rex_prefix(Address adr, XMMRegister xreg,
aoqi@0 576 VexSimdPrefix pre, VexOpcode opc, bool rex_w);
aoqi@0 577 int rex_prefix_and_encode(int dst_enc, int src_enc,
aoqi@0 578 VexSimdPrefix pre, VexOpcode opc, bool rex_w);
aoqi@0 579
aoqi@0 580 void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w,
aoqi@0 581 int nds_enc, VexSimdPrefix pre, VexOpcode opc,
aoqi@0 582 bool vector256);
aoqi@0 583
aoqi@0 584 void vex_prefix(Address adr, int nds_enc, int xreg_enc,
aoqi@0 585 VexSimdPrefix pre, VexOpcode opc,
aoqi@0 586 bool vex_w, bool vector256);
aoqi@0 587
aoqi@0 588 void vex_prefix(XMMRegister dst, XMMRegister nds, Address src,
aoqi@0 589 VexSimdPrefix pre, bool vector256 = false) {
aoqi@0 590 int dst_enc = dst->encoding();
aoqi@0 591 int nds_enc = nds->is_valid() ? nds->encoding() : 0;
aoqi@0 592 vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector256);
aoqi@0 593 }
aoqi@0 594
aoqi@0 595 void vex_prefix_0F38(Register dst, Register nds, Address src) {
aoqi@0 596 bool vex_w = false;
aoqi@0 597 bool vector256 = false;
aoqi@0 598 vex_prefix(src, nds->encoding(), dst->encoding(),
aoqi@0 599 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
aoqi@0 600 }
aoqi@0 601
aoqi@0 602 void vex_prefix_0F38_q(Register dst, Register nds, Address src) {
aoqi@0 603 bool vex_w = true;
aoqi@0 604 bool vector256 = false;
aoqi@0 605 vex_prefix(src, nds->encoding(), dst->encoding(),
aoqi@0 606 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
aoqi@0 607 }
aoqi@0 608 int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
aoqi@0 609 VexSimdPrefix pre, VexOpcode opc,
aoqi@0 610 bool vex_w, bool vector256);
aoqi@0 611
aoqi@0 612 int vex_prefix_0F38_and_encode(Register dst, Register nds, Register src) {
aoqi@0 613 bool vex_w = false;
aoqi@0 614 bool vector256 = false;
aoqi@0 615 return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
aoqi@0 616 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
aoqi@0 617 }
aoqi@0 618 int vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src) {
aoqi@0 619 bool vex_w = true;
aoqi@0 620 bool vector256 = false;
aoqi@0 621 return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
aoqi@0 622 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
aoqi@0 623 }
aoqi@0 624 int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
aoqi@0 625 VexSimdPrefix pre, bool vector256 = false,
aoqi@0 626 VexOpcode opc = VEX_OPCODE_0F) {
aoqi@0 627 int src_enc = src->encoding();
aoqi@0 628 int dst_enc = dst->encoding();
aoqi@0 629 int nds_enc = nds->is_valid() ? nds->encoding() : 0;
aoqi@0 630 return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector256);
aoqi@0 631 }
aoqi@0 632
aoqi@0 633 void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,
aoqi@0 634 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
aoqi@0 635 bool rex_w = false, bool vector256 = false);
aoqi@0 636
aoqi@0 637 void simd_prefix(XMMRegister dst, Address src,
aoqi@0 638 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
aoqi@0 639 simd_prefix(dst, xnoreg, src, pre, opc);
aoqi@0 640 }
aoqi@0 641
aoqi@0 642 void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre) {
aoqi@0 643 simd_prefix(src, dst, pre);
aoqi@0 644 }
aoqi@0 645 void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
aoqi@0 646 VexSimdPrefix pre) {
aoqi@0 647 bool rex_w = true;
aoqi@0 648 simd_prefix(dst, nds, src, pre, VEX_OPCODE_0F, rex_w);
aoqi@0 649 }
aoqi@0 650
aoqi@0 651 int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
aoqi@0 652 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
aoqi@0 653 bool rex_w = false, bool vector256 = false);
aoqi@0 654
aoqi@0 655 // Move/convert 32-bit integer value.
aoqi@0 656 int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src,
aoqi@0 657 VexSimdPrefix pre) {
aoqi@0 658 // It is OK to cast from Register to XMMRegister to pass argument here
aoqi@0 659 // since only encoding is used in simd_prefix_and_encode() and number of
aoqi@0 660 // Gen and Xmm registers are the same.
aoqi@0 661 return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre);
aoqi@0 662 }
aoqi@0 663 int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre) {
aoqi@0 664 return simd_prefix_and_encode(dst, xnoreg, src, pre);
aoqi@0 665 }
aoqi@0 666 int simd_prefix_and_encode(Register dst, XMMRegister src,
aoqi@0 667 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
aoqi@0 668 return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc);
aoqi@0 669 }
aoqi@0 670
aoqi@0 671 // Move/convert 64-bit integer value.
aoqi@0 672 int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src,
aoqi@0 673 VexSimdPrefix pre) {
aoqi@0 674 bool rex_w = true;
aoqi@0 675 return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, VEX_OPCODE_0F, rex_w);
aoqi@0 676 }
aoqi@0 677 int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre) {
aoqi@0 678 return simd_prefix_and_encode_q(dst, xnoreg, src, pre);
aoqi@0 679 }
aoqi@0 680 int simd_prefix_and_encode_q(Register dst, XMMRegister src,
aoqi@0 681 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
aoqi@0 682 bool rex_w = true;
aoqi@0 683 return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc, rex_w);
aoqi@0 684 }
aoqi@0 685
aoqi@0 686 // Helper functions for groups of instructions
aoqi@0 687 void emit_arith_b(int op1, int op2, Register dst, int imm8);
aoqi@0 688
aoqi@0 689 void emit_arith(int op1, int op2, Register dst, int32_t imm32);
aoqi@0 690 // Force generation of a 4 byte immediate value even if it fits into 8bit
aoqi@0 691 void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
aoqi@0 692 void emit_arith(int op1, int op2, Register dst, Register src);
aoqi@0 693
aoqi@0 694 void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre);
aoqi@0 695 void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre);
aoqi@0 696 void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre);
aoqi@0 697 void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre);
aoqi@0 698 void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
aoqi@0 699 Address src, VexSimdPrefix pre, bool vector256);
aoqi@0 700 void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
aoqi@0 701 XMMRegister src, VexSimdPrefix pre, bool vector256);
aoqi@0 702
aoqi@0 703 void emit_operand(Register reg,
aoqi@0 704 Register base, Register index, Address::ScaleFactor scale,
aoqi@0 705 int disp,
aoqi@0 706 RelocationHolder const& rspec,
aoqi@0 707 int rip_relative_correction = 0);
aoqi@0 708
aoqi@0 709 void emit_operand(Register reg, Address adr, int rip_relative_correction = 0);
aoqi@0 710
aoqi@0 711 // operands that only take the original 32bit registers
aoqi@0 712 void emit_operand32(Register reg, Address adr);
aoqi@0 713
aoqi@0 714 void emit_operand(XMMRegister reg,
aoqi@0 715 Register base, Register index, Address::ScaleFactor scale,
aoqi@0 716 int disp,
aoqi@0 717 RelocationHolder const& rspec);
aoqi@0 718
aoqi@0 719 void emit_operand(XMMRegister reg, Address adr);
aoqi@0 720
aoqi@0 721 void emit_operand(MMXRegister reg, Address adr);
aoqi@0 722
aoqi@0 723 // workaround gcc (3.2.1-7) bug
aoqi@0 724 void emit_operand(Address adr, MMXRegister reg);
aoqi@0 725
aoqi@0 726
aoqi@0 727 // Immediate-to-memory forms
aoqi@0 728 void emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32);
aoqi@0 729
aoqi@0 730 void emit_farith(int b1, int b2, int i);
aoqi@0 731
aoqi@0 732
aoqi@0 733 protected:
aoqi@0 734 #ifdef ASSERT
aoqi@0 735 void check_relocation(RelocationHolder const& rspec, int format);
aoqi@0 736 #endif
aoqi@0 737
aoqi@0 738 void emit_data(jint data, relocInfo::relocType rtype, int format);
aoqi@0 739 void emit_data(jint data, RelocationHolder const& rspec, int format);
aoqi@0 740 void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0);
aoqi@0 741 void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0);
aoqi@0 742
aoqi@0 743 bool reachable(AddressLiteral adr) NOT_LP64({ return true;});
aoqi@0 744
aoqi@0 745 // These are all easily abused and hence protected
aoqi@0 746
aoqi@0 747 // 32BIT ONLY SECTION
aoqi@0 748 #ifndef _LP64
aoqi@0 749 // Make these disappear in 64bit mode since they would never be correct
aoqi@0 750 void cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
aoqi@0 751 void cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
aoqi@0 752
aoqi@0 753 void mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
aoqi@0 754 void mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
aoqi@0 755
aoqi@0 756 void push_literal32(int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
aoqi@0 757 #else
aoqi@0 758 // 64BIT ONLY SECTION
aoqi@0 759 void mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec); // 64BIT ONLY
aoqi@0 760
aoqi@0 761 void cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec);
aoqi@0 762 void cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec);
aoqi@0 763
aoqi@0 764 void mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec);
aoqi@0 765 void mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec);
aoqi@0 766 #endif // _LP64
aoqi@0 767
aoqi@0 768 // These are unique in that we are ensured by the caller that the 32bit
aoqi@0 769 // relative in these instructions will always be able to reach the potentially
aoqi@0 770 // 64bit address described by entry. Since they can take a 64bit address they
aoqi@0 771 // don't have the 32 suffix like the other instructions in this class.
aoqi@0 772
aoqi@0 773 void call_literal(address entry, RelocationHolder const& rspec);
aoqi@0 774 void jmp_literal(address entry, RelocationHolder const& rspec);
aoqi@0 775
aoqi@0 776 // Avoid using directly section
aoqi@0 777 // Instructions in this section are actually usable by anyone without danger
aoqi@0 778 // of failure but have performance issues that are addressed my enhanced
aoqi@0 779 // instructions which will do the proper thing base on the particular cpu.
aoqi@0 780 // We protect them because we don't trust you...
aoqi@0 781
aoqi@0 782 // Don't use next inc() and dec() methods directly. INC & DEC instructions
aoqi@0 783 // could cause a partial flag stall since they don't set CF flag.
aoqi@0 784 // Use MacroAssembler::decrement() & MacroAssembler::increment() methods
aoqi@0 785 // which call inc() & dec() or add() & sub() in accordance with
aoqi@0 786 // the product flag UseIncDec value.
aoqi@0 787
aoqi@0 788 void decl(Register dst);
aoqi@0 789 void decl(Address dst);
aoqi@0 790 void decq(Register dst);
aoqi@0 791 void decq(Address dst);
aoqi@0 792
aoqi@0 793 void incl(Register dst);
aoqi@0 794 void incl(Address dst);
aoqi@0 795 void incq(Register dst);
aoqi@0 796 void incq(Address dst);
aoqi@0 797
aoqi@0 798 // New cpus require use of movsd and movss to avoid partial register stall
aoqi@0 799 // when loading from memory. But for old Opteron use movlpd instead of movsd.
aoqi@0 800 // The selection is done in MacroAssembler::movdbl() and movflt().
aoqi@0 801
aoqi@0 802 // Move Scalar Single-Precision Floating-Point Values
aoqi@0 803 void movss(XMMRegister dst, Address src);
aoqi@0 804 void movss(XMMRegister dst, XMMRegister src);
aoqi@0 805 void movss(Address dst, XMMRegister src);
aoqi@0 806
aoqi@0 807 // Move Scalar Double-Precision Floating-Point Values
aoqi@0 808 void movsd(XMMRegister dst, Address src);
aoqi@0 809 void movsd(XMMRegister dst, XMMRegister src);
aoqi@0 810 void movsd(Address dst, XMMRegister src);
aoqi@0 811 void movlpd(XMMRegister dst, Address src);
aoqi@0 812
aoqi@0 813 // New cpus require use of movaps and movapd to avoid partial register stall
aoqi@0 814 // when moving between registers.
aoqi@0 815 void movaps(XMMRegister dst, XMMRegister src);
aoqi@0 816 void movapd(XMMRegister dst, XMMRegister src);
aoqi@0 817
aoqi@0 818 // End avoid using directly
aoqi@0 819
aoqi@0 820
aoqi@0 821 // Instruction prefixes
aoqi@0 822 void prefix(Prefix p);
aoqi@0 823
aoqi@0 824 public:
aoqi@0 825
aoqi@0 826 // Creation
aoqi@0 827 Assembler(CodeBuffer* code) : AbstractAssembler(code) {}
aoqi@0 828
aoqi@0 829 // Decoding
aoqi@0 830 static address locate_operand(address inst, WhichOperand which);
aoqi@0 831 static address locate_next_instruction(address inst);
aoqi@0 832
aoqi@0 833 // Utilities
aoqi@0 834 static bool is_polling_page_far() NOT_LP64({ return false;});
aoqi@0 835
aoqi@0 836 // Generic instructions
aoqi@0 837 // Does 32bit or 64bit as needed for the platform. In some sense these
aoqi@0 838 // belong in macro assembler but there is no need for both varieties to exist
aoqi@0 839
aoqi@0 840 void lea(Register dst, Address src);
aoqi@0 841
aoqi@0 842 void mov(Register dst, Register src);
aoqi@0 843
aoqi@0 844 void pusha();
aoqi@0 845 void popa();
aoqi@0 846
aoqi@0 847 void pushf();
aoqi@0 848 void popf();
aoqi@0 849
aoqi@0 850 void push(int32_t imm32);
aoqi@0 851
aoqi@0 852 void push(Register src);
aoqi@0 853
aoqi@0 854 void pop(Register dst);
aoqi@0 855
aoqi@0 856 // These are dummies to prevent surprise implicit conversions to Register
aoqi@0 857 void push(void* v);
aoqi@0 858 void pop(void* v);
aoqi@0 859
aoqi@0 860 // These do register sized moves/scans
aoqi@0 861 void rep_mov();
aoqi@0 862 void rep_stos();
aoqi@0 863 void rep_stosb();
aoqi@0 864 void repne_scan();
aoqi@0 865 #ifdef _LP64
aoqi@0 866 void repne_scanl();
aoqi@0 867 #endif
aoqi@0 868
aoqi@0 869 // Vanilla instructions in lexical order
aoqi@0 870
aoqi@0 871 void adcl(Address dst, int32_t imm32);
aoqi@0 872 void adcl(Address dst, Register src);
aoqi@0 873 void adcl(Register dst, int32_t imm32);
aoqi@0 874 void adcl(Register dst, Address src);
aoqi@0 875 void adcl(Register dst, Register src);
aoqi@0 876
aoqi@0 877 void adcq(Register dst, int32_t imm32);
aoqi@0 878 void adcq(Register dst, Address src);
aoqi@0 879 void adcq(Register dst, Register src);
aoqi@0 880
aoqi@0 881 void addl(Address dst, int32_t imm32);
aoqi@0 882 void addl(Address dst, Register src);
aoqi@0 883 void addl(Register dst, int32_t imm32);
aoqi@0 884 void addl(Register dst, Address src);
aoqi@0 885 void addl(Register dst, Register src);
aoqi@0 886
aoqi@0 887 void addq(Address dst, int32_t imm32);
aoqi@0 888 void addq(Address dst, Register src);
aoqi@0 889 void addq(Register dst, int32_t imm32);
aoqi@0 890 void addq(Register dst, Address src);
aoqi@0 891 void addq(Register dst, Register src);
aoqi@0 892
kvn@7152 893 #ifdef _LP64
kvn@7152 894 //Add Unsigned Integers with Carry Flag
kvn@7152 895 void adcxq(Register dst, Register src);
kvn@7152 896
kvn@7152 897 //Add Unsigned Integers with Overflow Flag
kvn@7152 898 void adoxq(Register dst, Register src);
kvn@7152 899 #endif
kvn@7152 900
aoqi@0 901 void addr_nop_4();
aoqi@0 902 void addr_nop_5();
aoqi@0 903 void addr_nop_7();
aoqi@0 904 void addr_nop_8();
aoqi@0 905
aoqi@0 906 // Add Scalar Double-Precision Floating-Point Values
aoqi@0 907 void addsd(XMMRegister dst, Address src);
aoqi@0 908 void addsd(XMMRegister dst, XMMRegister src);
aoqi@0 909
aoqi@0 910 // Add Scalar Single-Precision Floating-Point Values
aoqi@0 911 void addss(XMMRegister dst, Address src);
aoqi@0 912 void addss(XMMRegister dst, XMMRegister src);
aoqi@0 913
aoqi@0 914 // AES instructions
aoqi@0 915 void aesdec(XMMRegister dst, Address src);
aoqi@0 916 void aesdec(XMMRegister dst, XMMRegister src);
aoqi@0 917 void aesdeclast(XMMRegister dst, Address src);
aoqi@0 918 void aesdeclast(XMMRegister dst, XMMRegister src);
aoqi@0 919 void aesenc(XMMRegister dst, Address src);
aoqi@0 920 void aesenc(XMMRegister dst, XMMRegister src);
aoqi@0 921 void aesenclast(XMMRegister dst, Address src);
aoqi@0 922 void aesenclast(XMMRegister dst, XMMRegister src);
aoqi@0 923
aoqi@0 924
aoqi@0 925 void andl(Address dst, int32_t imm32);
aoqi@0 926 void andl(Register dst, int32_t imm32);
aoqi@0 927 void andl(Register dst, Address src);
aoqi@0 928 void andl(Register dst, Register src);
aoqi@0 929
aoqi@0 930 void andq(Address dst, int32_t imm32);
aoqi@0 931 void andq(Register dst, int32_t imm32);
aoqi@0 932 void andq(Register dst, Address src);
aoqi@0 933 void andq(Register dst, Register src);
aoqi@0 934
aoqi@0 935 // BMI instructions
aoqi@0 936 void andnl(Register dst, Register src1, Register src2);
aoqi@0 937 void andnl(Register dst, Register src1, Address src2);
aoqi@0 938 void andnq(Register dst, Register src1, Register src2);
aoqi@0 939 void andnq(Register dst, Register src1, Address src2);
aoqi@0 940
aoqi@0 941 void blsil(Register dst, Register src);
aoqi@0 942 void blsil(Register dst, Address src);
aoqi@0 943 void blsiq(Register dst, Register src);
aoqi@0 944 void blsiq(Register dst, Address src);
aoqi@0 945
aoqi@0 946 void blsmskl(Register dst, Register src);
aoqi@0 947 void blsmskl(Register dst, Address src);
aoqi@0 948 void blsmskq(Register dst, Register src);
aoqi@0 949 void blsmskq(Register dst, Address src);
aoqi@0 950
aoqi@0 951 void blsrl(Register dst, Register src);
aoqi@0 952 void blsrl(Register dst, Address src);
aoqi@0 953 void blsrq(Register dst, Register src);
aoqi@0 954 void blsrq(Register dst, Address src);
aoqi@0 955
aoqi@0 956 void bsfl(Register dst, Register src);
aoqi@0 957 void bsrl(Register dst, Register src);
aoqi@0 958
aoqi@0 959 #ifdef _LP64
aoqi@0 960 void bsfq(Register dst, Register src);
aoqi@0 961 void bsrq(Register dst, Register src);
aoqi@0 962 #endif
aoqi@0 963
aoqi@0 964 void bswapl(Register reg);
aoqi@0 965
aoqi@0 966 void bswapq(Register reg);
aoqi@0 967
aoqi@0 968 void call(Label& L, relocInfo::relocType rtype);
aoqi@0 969 void call(Register reg); // push pc; pc <- reg
aoqi@0 970 void call(Address adr); // push pc; pc <- adr
aoqi@0 971
aoqi@0 972 void cdql();
aoqi@0 973
aoqi@0 974 void cdqq();
aoqi@0 975
aoqi@0 976 void cld();
aoqi@0 977
aoqi@0 978 void clflush(Address adr);
aoqi@0 979
aoqi@0 980 void cmovl(Condition cc, Register dst, Register src);
aoqi@0 981 void cmovl(Condition cc, Register dst, Address src);
aoqi@0 982
aoqi@0 983 void cmovq(Condition cc, Register dst, Register src);
aoqi@0 984 void cmovq(Condition cc, Register dst, Address src);
aoqi@0 985
aoqi@0 986
aoqi@0 987 void cmpb(Address dst, int imm8);
aoqi@0 988
aoqi@0 989 void cmpl(Address dst, int32_t imm32);
aoqi@0 990
aoqi@0 991 void cmpl(Register dst, int32_t imm32);
aoqi@0 992 void cmpl(Register dst, Register src);
aoqi@0 993 void cmpl(Register dst, Address src);
aoqi@0 994
aoqi@0 995 void cmpq(Address dst, int32_t imm32);
aoqi@0 996 void cmpq(Address dst, Register src);
aoqi@0 997
aoqi@0 998 void cmpq(Register dst, int32_t imm32);
aoqi@0 999 void cmpq(Register dst, Register src);
aoqi@0 1000 void cmpq(Register dst, Address src);
aoqi@0 1001
aoqi@0 1002 // these are dummies used to catch attempting to convert NULL to Register
aoqi@0 1003 void cmpl(Register dst, void* junk); // dummy
aoqi@0 1004 void cmpq(Register dst, void* junk); // dummy
aoqi@0 1005
aoqi@0 1006 void cmpw(Address dst, int imm16);
aoqi@0 1007
aoqi@0 1008 void cmpxchg8 (Address adr);
aoqi@0 1009
aoqi@0 1010 void cmpxchgl(Register reg, Address adr);
aoqi@0 1011
aoqi@0 1012 void cmpxchgq(Register reg, Address adr);
aoqi@0 1013
aoqi@0 1014 // Ordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
aoqi@0 1015 void comisd(XMMRegister dst, Address src);
aoqi@0 1016 void comisd(XMMRegister dst, XMMRegister src);
aoqi@0 1017
aoqi@0 1018 // Ordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS
aoqi@0 1019 void comiss(XMMRegister dst, Address src);
aoqi@0 1020 void comiss(XMMRegister dst, XMMRegister src);
aoqi@0 1021
aoqi@0 1022 // Identify processor type and features
aoqi@0 1023 void cpuid();
aoqi@0 1024
aoqi@0 1025 // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
aoqi@0 1026 void cvtsd2ss(XMMRegister dst, XMMRegister src);
aoqi@0 1027 void cvtsd2ss(XMMRegister dst, Address src);
aoqi@0 1028
aoqi@0 1029 // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value
aoqi@0 1030 void cvtsi2sdl(XMMRegister dst, Register src);
aoqi@0 1031 void cvtsi2sdl(XMMRegister dst, Address src);
aoqi@0 1032 void cvtsi2sdq(XMMRegister dst, Register src);
aoqi@0 1033 void cvtsi2sdq(XMMRegister dst, Address src);
aoqi@0 1034
aoqi@0 1035 // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value
aoqi@0 1036 void cvtsi2ssl(XMMRegister dst, Register src);
aoqi@0 1037 void cvtsi2ssl(XMMRegister dst, Address src);
aoqi@0 1038 void cvtsi2ssq(XMMRegister dst, Register src);
aoqi@0 1039 void cvtsi2ssq(XMMRegister dst, Address src);
aoqi@0 1040
aoqi@0 1041 // Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value
aoqi@0 1042 void cvtdq2pd(XMMRegister dst, XMMRegister src);
aoqi@0 1043
aoqi@0 1044 // Convert Packed Signed Doubleword Integers to Packed Single-Precision Floating-Point Value
aoqi@0 1045 void cvtdq2ps(XMMRegister dst, XMMRegister src);
aoqi@0 1046
aoqi@0 1047 // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
aoqi@0 1048 void cvtss2sd(XMMRegister dst, XMMRegister src);
aoqi@0 1049 void cvtss2sd(XMMRegister dst, Address src);
aoqi@0 1050
aoqi@0 1051 // Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer
aoqi@0 1052 void cvttsd2sil(Register dst, Address src);
aoqi@0 1053 void cvttsd2sil(Register dst, XMMRegister src);
aoqi@0 1054 void cvttsd2siq(Register dst, XMMRegister src);
aoqi@0 1055
aoqi@0 1056 // Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer
aoqi@0 1057 void cvttss2sil(Register dst, XMMRegister src);
aoqi@0 1058 void cvttss2siq(Register dst, XMMRegister src);
aoqi@0 1059
aoqi@0 1060 // Divide Scalar Double-Precision Floating-Point Values
aoqi@0 1061 void divsd(XMMRegister dst, Address src);
aoqi@0 1062 void divsd(XMMRegister dst, XMMRegister src);
aoqi@0 1063
aoqi@0 1064 // Divide Scalar Single-Precision Floating-Point Values
aoqi@0 1065 void divss(XMMRegister dst, Address src);
aoqi@0 1066 void divss(XMMRegister dst, XMMRegister src);
aoqi@0 1067
aoqi@0 1068 void emms();
aoqi@0 1069
aoqi@0 1070 void fabs();
aoqi@0 1071
aoqi@0 1072 void fadd(int i);
aoqi@0 1073
aoqi@0 1074 void fadd_d(Address src);
aoqi@0 1075 void fadd_s(Address src);
aoqi@0 1076
aoqi@0 1077 // "Alternate" versions of x87 instructions place result down in FPU
aoqi@0 1078 // stack instead of on TOS
aoqi@0 1079
aoqi@0 1080 void fadda(int i); // "alternate" fadd
aoqi@0 1081 void faddp(int i = 1);
aoqi@0 1082
aoqi@0 1083 void fchs();
aoqi@0 1084
aoqi@0 1085 void fcom(int i);
aoqi@0 1086
aoqi@0 1087 void fcomp(int i = 1);
aoqi@0 1088 void fcomp_d(Address src);
aoqi@0 1089 void fcomp_s(Address src);
aoqi@0 1090
aoqi@0 1091 void fcompp();
aoqi@0 1092
aoqi@0 1093 void fcos();
aoqi@0 1094
aoqi@0 1095 void fdecstp();
aoqi@0 1096
aoqi@0 1097 void fdiv(int i);
aoqi@0 1098 void fdiv_d(Address src);
aoqi@0 1099 void fdivr_s(Address src);
aoqi@0 1100 void fdiva(int i); // "alternate" fdiv
aoqi@0 1101 void fdivp(int i = 1);
aoqi@0 1102
aoqi@0 1103 void fdivr(int i);
aoqi@0 1104 void fdivr_d(Address src);
aoqi@0 1105 void fdiv_s(Address src);
aoqi@0 1106
aoqi@0 1107 void fdivra(int i); // "alternate" reversed fdiv
aoqi@0 1108
aoqi@0 1109 void fdivrp(int i = 1);
aoqi@0 1110
aoqi@0 1111 void ffree(int i = 0);
aoqi@0 1112
aoqi@0 1113 void fild_d(Address adr);
aoqi@0 1114 void fild_s(Address adr);
aoqi@0 1115
aoqi@0 1116 void fincstp();
aoqi@0 1117
aoqi@0 1118 void finit();
aoqi@0 1119
aoqi@0 1120 void fist_s (Address adr);
aoqi@0 1121 void fistp_d(Address adr);
aoqi@0 1122 void fistp_s(Address adr);
aoqi@0 1123
aoqi@0 1124 void fld1();
aoqi@0 1125
aoqi@0 1126 void fld_d(Address adr);
aoqi@0 1127 void fld_s(Address adr);
aoqi@0 1128 void fld_s(int index);
aoqi@0 1129 void fld_x(Address adr); // extended-precision (80-bit) format
aoqi@0 1130
aoqi@0 1131 void fldcw(Address src);
aoqi@0 1132
aoqi@0 1133 void fldenv(Address src);
aoqi@0 1134
aoqi@0 1135 void fldlg2();
aoqi@0 1136
aoqi@0 1137 void fldln2();
aoqi@0 1138
aoqi@0 1139 void fldz();
aoqi@0 1140
aoqi@0 1141 void flog();
aoqi@0 1142 void flog10();
aoqi@0 1143
aoqi@0 1144 void fmul(int i);
aoqi@0 1145
aoqi@0 1146 void fmul_d(Address src);
aoqi@0 1147 void fmul_s(Address src);
aoqi@0 1148
aoqi@0 1149 void fmula(int i); // "alternate" fmul
aoqi@0 1150
aoqi@0 1151 void fmulp(int i = 1);
aoqi@0 1152
aoqi@0 1153 void fnsave(Address dst);
aoqi@0 1154
aoqi@0 1155 void fnstcw(Address src);
aoqi@0 1156
aoqi@0 1157 void fnstsw_ax();
aoqi@0 1158
aoqi@0 1159 void fprem();
aoqi@0 1160 void fprem1();
aoqi@0 1161
aoqi@0 1162 void frstor(Address src);
aoqi@0 1163
aoqi@0 1164 void fsin();
aoqi@0 1165
aoqi@0 1166 void fsqrt();
aoqi@0 1167
aoqi@0 1168 void fst_d(Address adr);
aoqi@0 1169 void fst_s(Address adr);
aoqi@0 1170
aoqi@0 1171 void fstp_d(Address adr);
aoqi@0 1172 void fstp_d(int index);
aoqi@0 1173 void fstp_s(Address adr);
aoqi@0 1174 void fstp_x(Address adr); // extended-precision (80-bit) format
aoqi@0 1175
aoqi@0 1176 void fsub(int i);
aoqi@0 1177 void fsub_d(Address src);
aoqi@0 1178 void fsub_s(Address src);
aoqi@0 1179
aoqi@0 1180 void fsuba(int i); // "alternate" fsub
aoqi@0 1181
aoqi@0 1182 void fsubp(int i = 1);
aoqi@0 1183
aoqi@0 1184 void fsubr(int i);
aoqi@0 1185 void fsubr_d(Address src);
aoqi@0 1186 void fsubr_s(Address src);
aoqi@0 1187
aoqi@0 1188 void fsubra(int i); // "alternate" reversed fsub
aoqi@0 1189
aoqi@0 1190 void fsubrp(int i = 1);
aoqi@0 1191
aoqi@0 1192 void ftan();
aoqi@0 1193
aoqi@0 1194 void ftst();
aoqi@0 1195
aoqi@0 1196 void fucomi(int i = 1);
aoqi@0 1197 void fucomip(int i = 1);
aoqi@0 1198
aoqi@0 1199 void fwait();
aoqi@0 1200
aoqi@0 1201 void fxch(int i = 1);
aoqi@0 1202
aoqi@0 1203 void fxrstor(Address src);
aoqi@0 1204
aoqi@0 1205 void fxsave(Address dst);
aoqi@0 1206
aoqi@0 1207 void fyl2x();
aoqi@0 1208 void frndint();
aoqi@0 1209 void f2xm1();
aoqi@0 1210 void fldl2e();
aoqi@0 1211
aoqi@0 1212 void hlt();
aoqi@0 1213
aoqi@0 1214 void idivl(Register src);
aoqi@0 1215 void divl(Register src); // Unsigned division
aoqi@0 1216
kvn@7152 1217 #ifdef _LP64
aoqi@0 1218 void idivq(Register src);
kvn@7152 1219 #endif
aoqi@0 1220
aoqi@0 1221 void imull(Register dst, Register src);
aoqi@0 1222 void imull(Register dst, Register src, int value);
aoqi@0 1223 void imull(Register dst, Address src);
aoqi@0 1224
kvn@7152 1225 #ifdef _LP64
aoqi@0 1226 void imulq(Register dst, Register src);
aoqi@0 1227 void imulq(Register dst, Register src, int value);
aoqi@0 1228 void imulq(Register dst, Address src);
aoqi@0 1229 #endif
aoqi@0 1230
aoqi@0 1231 // jcc is the generic conditional branch generator to run-
aoqi@0 1232 // time routines, jcc is used for branches to labels. jcc
aoqi@0 1233 // takes a branch opcode (cc) and a label (L) and generates
aoqi@0 1234 // either a backward branch or a forward branch and links it
aoqi@0 1235 // to the label fixup chain. Usage:
aoqi@0 1236 //
aoqi@0 1237 // Label L; // unbound label
aoqi@0 1238 // jcc(cc, L); // forward branch to unbound label
aoqi@0 1239 // bind(L); // bind label to the current pc
aoqi@0 1240 // jcc(cc, L); // backward branch to bound label
aoqi@0 1241 // bind(L); // illegal: a label may be bound only once
aoqi@0 1242 //
aoqi@0 1243 // Note: The same Label can be used for forward and backward branches
aoqi@0 1244 // but it may be bound only once.
aoqi@0 1245
aoqi@0 1246 void jcc(Condition cc, Label& L, bool maybe_short = true);
aoqi@0 1247
aoqi@0 1248 // Conditional jump to a 8-bit offset to L.
aoqi@0 1249 // WARNING: be very careful using this for forward jumps. If the label is
aoqi@0 1250 // not bound within an 8-bit offset of this instruction, a run-time error
aoqi@0 1251 // will occur.
aoqi@0 1252 void jccb(Condition cc, Label& L);
aoqi@0 1253
aoqi@0 1254 void jmp(Address entry); // pc <- entry
aoqi@0 1255
aoqi@0 1256 // Label operations & relative jumps (PPUM Appendix D)
aoqi@0 1257 void jmp(Label& L, bool maybe_short = true); // unconditional jump to L
aoqi@0 1258
aoqi@0 1259 void jmp(Register entry); // pc <- entry
aoqi@0 1260
aoqi@0 1261 // Unconditional 8-bit offset jump to L.
aoqi@0 1262 // WARNING: be very careful using this for forward jumps. If the label is
aoqi@0 1263 // not bound within an 8-bit offset of this instruction, a run-time error
aoqi@0 1264 // will occur.
aoqi@0 1265 void jmpb(Label& L);
aoqi@0 1266
aoqi@0 1267 void ldmxcsr( Address src );
aoqi@0 1268
aoqi@0 1269 void leal(Register dst, Address src);
aoqi@0 1270
aoqi@0 1271 void leaq(Register dst, Address src);
aoqi@0 1272
aoqi@0 1273 void lfence();
aoqi@0 1274
aoqi@0 1275 void lock();
aoqi@0 1276
aoqi@0 1277 void lzcntl(Register dst, Register src);
aoqi@0 1278
aoqi@0 1279 #ifdef _LP64
aoqi@0 1280 void lzcntq(Register dst, Register src);
aoqi@0 1281 #endif
aoqi@0 1282
aoqi@0 1283 enum Membar_mask_bits {
aoqi@0 1284 StoreStore = 1 << 3,
aoqi@0 1285 LoadStore = 1 << 2,
aoqi@0 1286 StoreLoad = 1 << 1,
aoqi@0 1287 LoadLoad = 1 << 0
aoqi@0 1288 };
aoqi@0 1289
aoqi@0 1290 // Serializes memory and blows flags
aoqi@0 1291 void membar(Membar_mask_bits order_constraint) {
aoqi@0 1292 if (os::is_MP()) {
aoqi@0 1293 // We only have to handle StoreLoad
aoqi@0 1294 if (order_constraint & StoreLoad) {
aoqi@0 1295 // All usable chips support "locked" instructions which suffice
aoqi@0 1296 // as barriers, and are much faster than the alternative of
aoqi@0 1297 // using cpuid instruction. We use here a locked add [esp],0.
aoqi@0 1298 // This is conveniently otherwise a no-op except for blowing
aoqi@0 1299 // flags.
aoqi@0 1300 // Any change to this code may need to revisit other places in
aoqi@0 1301 // the code where this idiom is used, in particular the
aoqi@0 1302 // orderAccess code.
aoqi@0 1303 lock();
aoqi@0 1304 addl(Address(rsp, 0), 0);// Assert the lock# signal here
aoqi@0 1305 }
aoqi@0 1306 }
aoqi@0 1307 }
aoqi@0 1308
aoqi@0 1309 void mfence();
aoqi@0 1310
aoqi@0 1311 // Moves
aoqi@0 1312
aoqi@0 1313 void mov64(Register dst, int64_t imm64);
aoqi@0 1314
aoqi@0 1315 void movb(Address dst, Register src);
aoqi@0 1316 void movb(Address dst, int imm8);
aoqi@0 1317 void movb(Register dst, Address src);
aoqi@0 1318
aoqi@0 1319 void movdl(XMMRegister dst, Register src);
aoqi@0 1320 void movdl(Register dst, XMMRegister src);
aoqi@0 1321 void movdl(XMMRegister dst, Address src);
aoqi@0 1322 void movdl(Address dst, XMMRegister src);
aoqi@0 1323
aoqi@0 1324 // Move Double Quadword
aoqi@0 1325 void movdq(XMMRegister dst, Register src);
aoqi@0 1326 void movdq(Register dst, XMMRegister src);
aoqi@0 1327
aoqi@0 1328 // Move Aligned Double Quadword
aoqi@0 1329 void movdqa(XMMRegister dst, XMMRegister src);
aoqi@0 1330 void movdqa(XMMRegister dst, Address src);
aoqi@0 1331
aoqi@0 1332 // Move Unaligned Double Quadword
aoqi@0 1333 void movdqu(Address dst, XMMRegister src);
aoqi@0 1334 void movdqu(XMMRegister dst, Address src);
aoqi@0 1335 void movdqu(XMMRegister dst, XMMRegister src);
aoqi@0 1336
aoqi@0 1337 // Move Unaligned 256bit Vector
aoqi@0 1338 void vmovdqu(Address dst, XMMRegister src);
aoqi@0 1339 void vmovdqu(XMMRegister dst, Address src);
aoqi@0 1340 void vmovdqu(XMMRegister dst, XMMRegister src);
aoqi@0 1341
aoqi@0 1342 // Move lower 64bit to high 64bit in 128bit register
aoqi@0 1343 void movlhps(XMMRegister dst, XMMRegister src);
aoqi@0 1344
aoqi@0 1345 void movl(Register dst, int32_t imm32);
aoqi@0 1346 void movl(Address dst, int32_t imm32);
aoqi@0 1347 void movl(Register dst, Register src);
aoqi@0 1348 void movl(Register dst, Address src);
aoqi@0 1349 void movl(Address dst, Register src);
aoqi@0 1350
aoqi@0 1351 // These dummies prevent using movl from converting a zero (like NULL) into Register
aoqi@0 1352 // by giving the compiler two choices it can't resolve
aoqi@0 1353
aoqi@0 1354 void movl(Address dst, void* junk);
aoqi@0 1355 void movl(Register dst, void* junk);
aoqi@0 1356
aoqi@0 1357 #ifdef _LP64
aoqi@0 1358 void movq(Register dst, Register src);
aoqi@0 1359 void movq(Register dst, Address src);
aoqi@0 1360 void movq(Address dst, Register src);
aoqi@0 1361 #endif
aoqi@0 1362
aoqi@0 1363 void movq(Address dst, MMXRegister src );
aoqi@0 1364 void movq(MMXRegister dst, Address src );
aoqi@0 1365
aoqi@0 1366 #ifdef _LP64
aoqi@0 1367 // These dummies prevent using movq from converting a zero (like NULL) into Register
aoqi@0 1368 // by giving the compiler two choices it can't resolve
aoqi@0 1369
aoqi@0 1370 void movq(Address dst, void* dummy);
aoqi@0 1371 void movq(Register dst, void* dummy);
aoqi@0 1372 #endif
aoqi@0 1373
aoqi@0 1374 // Move Quadword
aoqi@0 1375 void movq(Address dst, XMMRegister src);
aoqi@0 1376 void movq(XMMRegister dst, Address src);
aoqi@0 1377
aoqi@0 1378 void movsbl(Register dst, Address src);
aoqi@0 1379 void movsbl(Register dst, Register src);
aoqi@0 1380
aoqi@0 1381 #ifdef _LP64
aoqi@0 1382 void movsbq(Register dst, Address src);
aoqi@0 1383 void movsbq(Register dst, Register src);
aoqi@0 1384
aoqi@0 1385 // Move signed 32bit immediate to 64bit extending sign
aoqi@0 1386 void movslq(Address dst, int32_t imm64);
aoqi@0 1387 void movslq(Register dst, int32_t imm64);
aoqi@0 1388
aoqi@0 1389 void movslq(Register dst, Address src);
aoqi@0 1390 void movslq(Register dst, Register src);
aoqi@0 1391 void movslq(Register dst, void* src); // Dummy declaration to cause NULL to be ambiguous
aoqi@0 1392 #endif
aoqi@0 1393
aoqi@0 1394 void movswl(Register dst, Address src);
aoqi@0 1395 void movswl(Register dst, Register src);
aoqi@0 1396
aoqi@0 1397 #ifdef _LP64
aoqi@0 1398 void movswq(Register dst, Address src);
aoqi@0 1399 void movswq(Register dst, Register src);
aoqi@0 1400 #endif
aoqi@0 1401
aoqi@0 1402 void movw(Address dst, int imm16);
aoqi@0 1403 void movw(Register dst, Address src);
aoqi@0 1404 void movw(Address dst, Register src);
aoqi@0 1405
aoqi@0 1406 void movzbl(Register dst, Address src);
aoqi@0 1407 void movzbl(Register dst, Register src);
aoqi@0 1408
aoqi@0 1409 #ifdef _LP64
aoqi@0 1410 void movzbq(Register dst, Address src);
aoqi@0 1411 void movzbq(Register dst, Register src);
aoqi@0 1412 #endif
aoqi@0 1413
aoqi@0 1414 void movzwl(Register dst, Address src);
aoqi@0 1415 void movzwl(Register dst, Register src);
aoqi@0 1416
aoqi@0 1417 #ifdef _LP64
aoqi@0 1418 void movzwq(Register dst, Address src);
aoqi@0 1419 void movzwq(Register dst, Register src);
aoqi@0 1420 #endif
aoqi@0 1421
kvn@7152 1422 // Unsigned multiply with RAX destination register
aoqi@0 1423 void mull(Address src);
aoqi@0 1424 void mull(Register src);
aoqi@0 1425
kvn@7152 1426 #ifdef _LP64
kvn@7152 1427 void mulq(Address src);
kvn@7152 1428 void mulq(Register src);
kvn@7152 1429 void mulxq(Register dst1, Register dst2, Register src);
kvn@7152 1430 #endif
kvn@7152 1431
aoqi@0 1432 // Multiply Scalar Double-Precision Floating-Point Values
aoqi@0 1433 void mulsd(XMMRegister dst, Address src);
aoqi@0 1434 void mulsd(XMMRegister dst, XMMRegister src);
aoqi@0 1435
aoqi@0 1436 // Multiply Scalar Single-Precision Floating-Point Values
aoqi@0 1437 void mulss(XMMRegister dst, Address src);
aoqi@0 1438 void mulss(XMMRegister dst, XMMRegister src);
aoqi@0 1439
aoqi@0 1440 void negl(Register dst);
aoqi@0 1441
aoqi@0 1442 #ifdef _LP64
aoqi@0 1443 void negq(Register dst);
aoqi@0 1444 #endif
aoqi@0 1445
aoqi@0 1446 void nop(int i = 1);
aoqi@0 1447
aoqi@0 1448 void notl(Register dst);
aoqi@0 1449
aoqi@0 1450 #ifdef _LP64
aoqi@0 1451 void notq(Register dst);
aoqi@0 1452 #endif
aoqi@0 1453
aoqi@0 1454 void orl(Address dst, int32_t imm32);
aoqi@0 1455 void orl(Register dst, int32_t imm32);
aoqi@0 1456 void orl(Register dst, Address src);
aoqi@0 1457 void orl(Register dst, Register src);
igerasim@8307 1458 void orl(Address dst, Register src);
aoqi@0 1459
aoqi@0 1460 void orq(Address dst, int32_t imm32);
aoqi@0 1461 void orq(Register dst, int32_t imm32);
aoqi@0 1462 void orq(Register dst, Address src);
aoqi@0 1463 void orq(Register dst, Register src);
aoqi@0 1464
aoqi@0 1465 // Pack with unsigned saturation
aoqi@0 1466 void packuswb(XMMRegister dst, XMMRegister src);
aoqi@0 1467 void packuswb(XMMRegister dst, Address src);
aoqi@0 1468 void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
aoqi@0 1469
aoqi@0 1470 // Pemutation of 64bit words
aoqi@0 1471 void vpermq(XMMRegister dst, XMMRegister src, int imm8, bool vector256);
aoqi@0 1472
aoqi@0 1473 void pause();
aoqi@0 1474
aoqi@0 1475 // SSE4.2 string instructions
aoqi@0 1476 void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
aoqi@0 1477 void pcmpestri(XMMRegister xmm1, Address src, int imm8);
aoqi@0 1478
aoqi@0 1479 // SSE 4.1 extract
aoqi@0 1480 void pextrd(Register dst, XMMRegister src, int imm8);
aoqi@0 1481 void pextrq(Register dst, XMMRegister src, int imm8);
aoqi@0 1482
aoqi@0 1483 // SSE 4.1 insert
aoqi@0 1484 void pinsrd(XMMRegister dst, Register src, int imm8);
aoqi@0 1485 void pinsrq(XMMRegister dst, Register src, int imm8);
aoqi@0 1486
aoqi@0 1487 // SSE4.1 packed move
aoqi@0 1488 void pmovzxbw(XMMRegister dst, XMMRegister src);
aoqi@0 1489 void pmovzxbw(XMMRegister dst, Address src);
aoqi@0 1490
aoqi@0 1491 #ifndef _LP64 // no 32bit push/pop on amd64
aoqi@0 1492 void popl(Address dst);
aoqi@0 1493 #endif
aoqi@0 1494
aoqi@0 1495 #ifdef _LP64
aoqi@0 1496 void popq(Address dst);
aoqi@0 1497 #endif
aoqi@0 1498
aoqi@0 1499 void popcntl(Register dst, Address src);
aoqi@0 1500 void popcntl(Register dst, Register src);
aoqi@0 1501
aoqi@0 1502 #ifdef _LP64
aoqi@0 1503 void popcntq(Register dst, Address src);
aoqi@0 1504 void popcntq(Register dst, Register src);
aoqi@0 1505 #endif
aoqi@0 1506
aoqi@0 1507 // Prefetches (SSE, SSE2, 3DNOW only)
aoqi@0 1508
aoqi@0 1509 void prefetchnta(Address src);
aoqi@0 1510 void prefetchr(Address src);
aoqi@0 1511 void prefetcht0(Address src);
aoqi@0 1512 void prefetcht1(Address src);
aoqi@0 1513 void prefetcht2(Address src);
aoqi@0 1514 void prefetchw(Address src);
aoqi@0 1515
aoqi@0 1516 // Shuffle Bytes
aoqi@0 1517 void pshufb(XMMRegister dst, XMMRegister src);
aoqi@0 1518 void pshufb(XMMRegister dst, Address src);
aoqi@0 1519
aoqi@0 1520 // Shuffle Packed Doublewords
aoqi@0 1521 void pshufd(XMMRegister dst, XMMRegister src, int mode);
aoqi@0 1522 void pshufd(XMMRegister dst, Address src, int mode);
aoqi@0 1523
aoqi@0 1524 // Shuffle Packed Low Words
aoqi@0 1525 void pshuflw(XMMRegister dst, XMMRegister src, int mode);
aoqi@0 1526 void pshuflw(XMMRegister dst, Address src, int mode);
aoqi@0 1527
aoqi@0 1528 // Shift Right by bytes Logical DoubleQuadword Immediate
aoqi@0 1529 void psrldq(XMMRegister dst, int shift);
aoqi@0 1530
aoqi@0 1531 // Logical Compare 128bit
aoqi@0 1532 void ptest(XMMRegister dst, XMMRegister src);
aoqi@0 1533 void ptest(XMMRegister dst, Address src);
aoqi@0 1534 // Logical Compare 256bit
aoqi@0 1535 void vptest(XMMRegister dst, XMMRegister src);
aoqi@0 1536 void vptest(XMMRegister dst, Address src);
aoqi@0 1537
aoqi@0 1538 // Interleave Low Bytes
aoqi@0 1539 void punpcklbw(XMMRegister dst, XMMRegister src);
aoqi@0 1540 void punpcklbw(XMMRegister dst, Address src);
aoqi@0 1541
aoqi@0 1542 // Interleave Low Doublewords
aoqi@0 1543 void punpckldq(XMMRegister dst, XMMRegister src);
aoqi@0 1544 void punpckldq(XMMRegister dst, Address src);
aoqi@0 1545
aoqi@0 1546 // Interleave Low Quadwords
aoqi@0 1547 void punpcklqdq(XMMRegister dst, XMMRegister src);
aoqi@0 1548
aoqi@0 1549 #ifndef _LP64 // no 32bit push/pop on amd64
aoqi@0 1550 void pushl(Address src);
aoqi@0 1551 #endif
aoqi@0 1552
aoqi@0 1553 void pushq(Address src);
aoqi@0 1554
aoqi@0 1555 void rcll(Register dst, int imm8);
aoqi@0 1556
aoqi@0 1557 void rclq(Register dst, int imm8);
aoqi@0 1558
igerasim@8307 1559 void rcrq(Register dst, int imm8);
igerasim@8307 1560
aoqi@0 1561 void rdtsc();
aoqi@0 1562
aoqi@0 1563 void ret(int imm16);
aoqi@0 1564
kvn@7152 1565 #ifdef _LP64
kvn@7152 1566 void rorq(Register dst, int imm8);
kvn@7152 1567 void rorxq(Register dst, Register src, int imm8);
kvn@7152 1568 #endif
kvn@7152 1569
aoqi@0 1570 void sahf();
aoqi@0 1571
aoqi@0 1572 void sarl(Register dst, int imm8);
aoqi@0 1573 void sarl(Register dst);
aoqi@0 1574
aoqi@0 1575 void sarq(Register dst, int imm8);
aoqi@0 1576 void sarq(Register dst);
aoqi@0 1577
aoqi@0 1578 void sbbl(Address dst, int32_t imm32);
aoqi@0 1579 void sbbl(Register dst, int32_t imm32);
aoqi@0 1580 void sbbl(Register dst, Address src);
aoqi@0 1581 void sbbl(Register dst, Register src);
aoqi@0 1582
aoqi@0 1583 void sbbq(Address dst, int32_t imm32);
aoqi@0 1584 void sbbq(Register dst, int32_t imm32);
aoqi@0 1585 void sbbq(Register dst, Address src);
aoqi@0 1586 void sbbq(Register dst, Register src);
aoqi@0 1587
aoqi@0 1588 void setb(Condition cc, Register dst);
aoqi@0 1589
aoqi@0 1590 void shldl(Register dst, Register src);
aoqi@0 1591
aoqi@0 1592 void shll(Register dst, int imm8);
aoqi@0 1593 void shll(Register dst);
aoqi@0 1594
aoqi@0 1595 void shlq(Register dst, int imm8);
aoqi@0 1596 void shlq(Register dst);
aoqi@0 1597
aoqi@0 1598 void shrdl(Register dst, Register src);
aoqi@0 1599
aoqi@0 1600 void shrl(Register dst, int imm8);
aoqi@0 1601 void shrl(Register dst);
aoqi@0 1602
aoqi@0 1603 void shrq(Register dst, int imm8);
aoqi@0 1604 void shrq(Register dst);
aoqi@0 1605
aoqi@0 1606 void smovl(); // QQQ generic?
aoqi@0 1607
aoqi@0 1608 // Compute Square Root of Scalar Double-Precision Floating-Point Value
aoqi@0 1609 void sqrtsd(XMMRegister dst, Address src);
aoqi@0 1610 void sqrtsd(XMMRegister dst, XMMRegister src);
aoqi@0 1611
aoqi@0 1612 // Compute Square Root of Scalar Single-Precision Floating-Point Value
aoqi@0 1613 void sqrtss(XMMRegister dst, Address src);
aoqi@0 1614 void sqrtss(XMMRegister dst, XMMRegister src);
aoqi@0 1615
aoqi@0 1616 void std();
aoqi@0 1617
aoqi@0 1618 void stmxcsr( Address dst );
aoqi@0 1619
aoqi@0 1620 void subl(Address dst, int32_t imm32);
aoqi@0 1621 void subl(Address dst, Register src);
aoqi@0 1622 void subl(Register dst, int32_t imm32);
aoqi@0 1623 void subl(Register dst, Address src);
aoqi@0 1624 void subl(Register dst, Register src);
aoqi@0 1625
aoqi@0 1626 void subq(Address dst, int32_t imm32);
aoqi@0 1627 void subq(Address dst, Register src);
aoqi@0 1628 void subq(Register dst, int32_t imm32);
aoqi@0 1629 void subq(Register dst, Address src);
aoqi@0 1630 void subq(Register dst, Register src);
aoqi@0 1631
aoqi@0 1632 // Force generation of a 4 byte immediate value even if it fits into 8bit
aoqi@0 1633 void subl_imm32(Register dst, int32_t imm32);
aoqi@0 1634 void subq_imm32(Register dst, int32_t imm32);
aoqi@0 1635
aoqi@0 1636 // Subtract Scalar Double-Precision Floating-Point Values
aoqi@0 1637 void subsd(XMMRegister dst, Address src);
aoqi@0 1638 void subsd(XMMRegister dst, XMMRegister src);
aoqi@0 1639
aoqi@0 1640 // Subtract Scalar Single-Precision Floating-Point Values
aoqi@0 1641 void subss(XMMRegister dst, Address src);
aoqi@0 1642 void subss(XMMRegister dst, XMMRegister src);
aoqi@0 1643
aoqi@0 1644 void testb(Register dst, int imm8);
aoqi@0 1645
aoqi@0 1646 void testl(Register dst, int32_t imm32);
aoqi@0 1647 void testl(Register dst, Register src);
aoqi@0 1648 void testl(Register dst, Address src);
aoqi@0 1649
aoqi@0 1650 void testq(Register dst, int32_t imm32);
aoqi@0 1651 void testq(Register dst, Register src);
aoqi@0 1652
aoqi@0 1653 // BMI - count trailing zeros
aoqi@0 1654 void tzcntl(Register dst, Register src);
aoqi@0 1655 void tzcntq(Register dst, Register src);
aoqi@0 1656
aoqi@0 1657 // Unordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
aoqi@0 1658 void ucomisd(XMMRegister dst, Address src);
aoqi@0 1659 void ucomisd(XMMRegister dst, XMMRegister src);
aoqi@0 1660
aoqi@0 1661 // Unordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS
aoqi@0 1662 void ucomiss(XMMRegister dst, Address src);
aoqi@0 1663 void ucomiss(XMMRegister dst, XMMRegister src);
aoqi@0 1664
aoqi@0 1665 void xabort(int8_t imm8);
aoqi@0 1666
aoqi@0 1667 void xaddl(Address dst, Register src);
aoqi@0 1668
aoqi@0 1669 void xaddq(Address dst, Register src);
aoqi@0 1670
aoqi@0 1671 void xbegin(Label& abort, relocInfo::relocType rtype = relocInfo::none);
aoqi@0 1672
aoqi@0 1673 void xchgl(Register reg, Address adr);
aoqi@0 1674 void xchgl(Register dst, Register src);
aoqi@0 1675
aoqi@0 1676 void xchgq(Register reg, Address adr);
aoqi@0 1677 void xchgq(Register dst, Register src);
aoqi@0 1678
aoqi@0 1679 void xend();
aoqi@0 1680
aoqi@0 1681 // Get Value of Extended Control Register
aoqi@0 1682 void xgetbv();
aoqi@0 1683
aoqi@0 1684 void xorl(Register dst, int32_t imm32);
aoqi@0 1685 void xorl(Register dst, Address src);
aoqi@0 1686 void xorl(Register dst, Register src);
aoqi@0 1687
aoqi@0 1688 void xorq(Register dst, Address src);
aoqi@0 1689 void xorq(Register dst, Register src);
aoqi@0 1690
aoqi@0 1691 void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
aoqi@0 1692
aoqi@0 1693 // AVX 3-operands scalar instructions (encoded with VEX prefix)
aoqi@0 1694
aoqi@0 1695 void vaddsd(XMMRegister dst, XMMRegister nds, Address src);
aoqi@0 1696 void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
aoqi@0 1697 void vaddss(XMMRegister dst, XMMRegister nds, Address src);
aoqi@0 1698 void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src);
aoqi@0 1699 void vdivsd(XMMRegister dst, XMMRegister nds, Address src);
aoqi@0 1700 void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
aoqi@0 1701 void vdivss(XMMRegister dst, XMMRegister nds, Address src);
aoqi@0 1702 void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src);
aoqi@0 1703 void vmulsd(XMMRegister dst, XMMRegister nds, Address src);
aoqi@0 1704 void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
aoqi@0 1705 void vmulss(XMMRegister dst, XMMRegister nds, Address src);
aoqi@0 1706 void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src);
aoqi@0 1707 void vsubsd(XMMRegister dst, XMMRegister nds, Address src);
aoqi@0 1708 void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
aoqi@0 1709 void vsubss(XMMRegister dst, XMMRegister nds, Address src);
aoqi@0 1710 void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
aoqi@0 1711
aoqi@0 1712
aoqi@0 1713 //====================VECTOR ARITHMETIC=====================================
aoqi@0 1714
aoqi@0 1715 // Add Packed Floating-Point Values
aoqi@0 1716 void addpd(XMMRegister dst, XMMRegister src);
aoqi@0 1717 void addps(XMMRegister dst, XMMRegister src);
aoqi@0 1718 void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
aoqi@0 1719 void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
aoqi@0 1720 void vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
aoqi@0 1721 void vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
aoqi@0 1722
aoqi@0 1723 // Subtract Packed Floating-Point Values
aoqi@0 1724 void subpd(XMMRegister dst, XMMRegister src);
aoqi@0 1725 void subps(XMMRegister dst, XMMRegister src);
aoqi@0 1726 void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
aoqi@0 1727 void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
aoqi@0 1728 void vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
aoqi@0 1729 void vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
aoqi@0 1730
aoqi@0 1731 // Multiply Packed Floating-Point Values
aoqi@0 1732 void mulpd(XMMRegister dst, XMMRegister src);
aoqi@0 1733 void mulps(XMMRegister dst, XMMRegister src);
aoqi@0 1734 void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
aoqi@0 1735 void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
aoqi@0 1736 void vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
aoqi@0 1737 void vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
aoqi@0 1738
aoqi@0 1739 // Divide Packed Floating-Point Values
aoqi@0 1740 void divpd(XMMRegister dst, XMMRegister src);
aoqi@0 1741 void divps(XMMRegister dst, XMMRegister src);
aoqi@0 1742 void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
aoqi@0 1743 void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
aoqi@0 1744 void vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
aoqi@0 1745 void vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
aoqi@0 1746
aoqi@0 1747 // Bitwise Logical AND of Packed Floating-Point Values
aoqi@0 1748 void andpd(XMMRegister dst, XMMRegister src);
aoqi@0 1749 void andps(XMMRegister dst, XMMRegister src);
aoqi@0 1750 void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
aoqi@0 1751 void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
aoqi@0 1752 void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
aoqi@0 1753 void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
aoqi@0 1754
aoqi@0 1755 // Bitwise Logical XOR of Packed Floating-Point Values
aoqi@0 1756 void xorpd(XMMRegister dst, XMMRegister src);
aoqi@0 1757 void xorps(XMMRegister dst, XMMRegister src);
aoqi@0 1758 void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
aoqi@0 1759 void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
aoqi@0 1760 void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
aoqi@0 1761 void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
aoqi@0 1762
aoqi@0 1763 // Add packed integers
aoqi@0 1764 void paddb(XMMRegister dst, XMMRegister src);
aoqi@0 1765 void paddw(XMMRegister dst, XMMRegister src);
aoqi@0 1766 void paddd(XMMRegister dst, XMMRegister src);
aoqi@0 1767 void paddq(XMMRegister dst, XMMRegister src);
aoqi@0 1768 void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
aoqi@0 1769 void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
aoqi@0 1770 void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
aoqi@0 1771 void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
aoqi@0 1772 void vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
aoqi@0 1773 void vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
aoqi@0 1774 void vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
aoqi@0 1775 void vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
aoqi@0 1776
aoqi@0 1777 // Sub packed integers
aoqi@0 1778 void psubb(XMMRegister dst, XMMRegister src);
aoqi@0 1779 void psubw(XMMRegister dst, XMMRegister src);
aoqi@0 1780 void psubd(XMMRegister dst, XMMRegister src);
aoqi@0 1781 void psubq(XMMRegister dst, XMMRegister src);
aoqi@0 1782 void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
aoqi@0 1783 void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
aoqi@0 1784 void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
aoqi@0 1785 void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
aoqi@0 1786 void vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
aoqi@0 1787 void vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
aoqi@0 1788 void vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
aoqi@0 1789 void vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
aoqi@0 1790
aoqi@0 1791 // Multiply packed integers (only shorts and ints)
aoqi@0 1792 void pmullw(XMMRegister dst, XMMRegister src);
aoqi@0 1793 void pmulld(XMMRegister dst, XMMRegister src);
aoqi@0 1794 void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
aoqi@0 1795 void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
aoqi@0 1796 void vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
aoqi@0 1797 void vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
aoqi@0 1798
aoqi@0 1799 // Shift left packed integers
aoqi@0 1800 void psllw(XMMRegister dst, int shift);
aoqi@0 1801 void pslld(XMMRegister dst, int shift);
aoqi@0 1802 void psllq(XMMRegister dst, int shift);
aoqi@0 1803 void psllw(XMMRegister dst, XMMRegister shift);
aoqi@0 1804 void pslld(XMMRegister dst, XMMRegister shift);
aoqi@0 1805 void psllq(XMMRegister dst, XMMRegister shift);
aoqi@0 1806 void vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
aoqi@0 1807 void vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256);
aoqi@0 1808 void vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256);
aoqi@0 1809 void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
aoqi@0 1810 void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
aoqi@0 1811 void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
aoqi@0 1812
aoqi@0 1813 // Logical shift right packed integers
aoqi@0 1814 void psrlw(XMMRegister dst, int shift);
aoqi@0 1815 void psrld(XMMRegister dst, int shift);
aoqi@0 1816 void psrlq(XMMRegister dst, int shift);
aoqi@0 1817 void psrlw(XMMRegister dst, XMMRegister shift);
aoqi@0 1818 void psrld(XMMRegister dst, XMMRegister shift);
aoqi@0 1819 void psrlq(XMMRegister dst, XMMRegister shift);
aoqi@0 1820 void vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
aoqi@0 1821 void vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256);
aoqi@0 1822 void vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256);
aoqi@0 1823 void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
aoqi@0 1824 void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
aoqi@0 1825 void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
aoqi@0 1826
aoqi@0 1827 // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
aoqi@0 1828 void psraw(XMMRegister dst, int shift);
aoqi@0 1829 void psrad(XMMRegister dst, int shift);
aoqi@0 1830 void psraw(XMMRegister dst, XMMRegister shift);
aoqi@0 1831 void psrad(XMMRegister dst, XMMRegister shift);
aoqi@0 1832 void vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
aoqi@0 1833 void vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256);
aoqi@0 1834 void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
aoqi@0 1835 void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
aoqi@0 1836
aoqi@0 1837 // And packed integers
aoqi@0 1838 void pand(XMMRegister dst, XMMRegister src);
aoqi@0 1839 void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
aoqi@0 1840 void vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
aoqi@0 1841
aoqi@0 1842 // Or packed integers
aoqi@0 1843 void por(XMMRegister dst, XMMRegister src);
aoqi@0 1844 void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
aoqi@0 1845 void vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
aoqi@0 1846
aoqi@0 1847 // Xor packed integers
aoqi@0 1848 void pxor(XMMRegister dst, XMMRegister src);
aoqi@0 1849 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
aoqi@0 1850 void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
aoqi@0 1851
aoqi@0 1852 // Copy low 128bit into high 128bit of YMM registers.
aoqi@0 1853 void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
aoqi@0 1854 void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
aoqi@0 1855
aoqi@0 1856 // Load/store high 128bit of YMM registers which does not destroy other half.
aoqi@0 1857 void vinsertf128h(XMMRegister dst, Address src);
aoqi@0 1858 void vinserti128h(XMMRegister dst, Address src);
aoqi@0 1859 void vextractf128h(Address dst, XMMRegister src);
aoqi@0 1860 void vextracti128h(Address dst, XMMRegister src);
aoqi@0 1861
aoqi@0 1862 // duplicate 4-bytes integer data from src into 8 locations in dest
aoqi@0 1863 void vpbroadcastd(XMMRegister dst, XMMRegister src);
aoqi@0 1864
aoqi@0 1865 // Carry-Less Multiplication Quadword
kvn@7025 1866 void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
aoqi@0 1867 void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
aoqi@0 1868
aoqi@0 1869 // AVX instruction which is used to clear upper 128 bits of YMM registers and
aoqi@0 1870 // to avoid transaction penalty between AVX and SSE states. There is no
aoqi@0 1871 // penalty if legacy SSE instructions are encoded using VEX prefix because
aoqi@0 1872 // they always clear upper 128 bits. It should be used before calling
aoqi@0 1873 // runtime code and native libraries.
aoqi@0 1874 void vzeroupper();
aoqi@0 1875
aoqi@0 1876 protected:
aoqi@0 1877 // Next instructions require address alignment 16 bytes SSE mode.
aoqi@0 1878 // They should be called only from corresponding MacroAssembler instructions.
aoqi@0 1879 void andpd(XMMRegister dst, Address src);
aoqi@0 1880 void andps(XMMRegister dst, Address src);
aoqi@0 1881 void xorpd(XMMRegister dst, Address src);
aoqi@0 1882 void xorps(XMMRegister dst, Address src);
aoqi@0 1883
aoqi@0 1884 };
aoqi@0 1885
aoqi@0 1886 #endif // CPU_X86_VM_ASSEMBLER_X86_HPP

mercurial