1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/cpu/x86/vm/assembler_x86.hpp Wed Apr 27 01:25:04 2016 +0800 1.3 @@ -0,0 +1,1859 @@ 1.4 +/* 1.5 + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. 1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 + * 1.8 + * This code is free software; you can redistribute it and/or modify it 1.9 + * under the terms of the GNU General Public License version 2 only, as 1.10 + * published by the Free Software Foundation. 1.11 + * 1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.15 + * version 2 for more details (a copy is included in the LICENSE file that 1.16 + * accompanied this code). 1.17 + * 1.18 + * You should have received a copy of the GNU General Public License version 1.19 + * 2 along with this work; if not, write to the Free Software Foundation, 1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.21 + * 1.22 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 1.23 + * or visit www.oracle.com if you need additional information or have any 1.24 + * questions. 1.25 + * 1.26 + */ 1.27 + 1.28 +#ifndef CPU_X86_VM_ASSEMBLER_X86_HPP 1.29 +#define CPU_X86_VM_ASSEMBLER_X86_HPP 1.30 + 1.31 +#include "asm/register.hpp" 1.32 + 1.33 +class BiasedLockingCounters; 1.34 + 1.35 +// Contains all the definitions needed for x86 assembly code generation. 1.36 + 1.37 +// Calling convention 1.38 +class Argument VALUE_OBJ_CLASS_SPEC { 1.39 + public: 1.40 + enum { 1.41 +#ifdef _LP64 1.42 +#ifdef _WIN64 1.43 + n_int_register_parameters_c = 4, // rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...) 1.44 + n_float_register_parameters_c = 4, // xmm0 - xmm3 (c_farg0, c_farg1, ... ) 1.45 +#else 1.46 + n_int_register_parameters_c = 6, // rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...) 1.47 + n_float_register_parameters_c = 8, // xmm0 - xmm7 (c_farg0, c_farg1, ... ) 1.48 +#endif // _WIN64 1.49 + n_int_register_parameters_j = 6, // j_rarg0, j_rarg1, ... 1.50 + n_float_register_parameters_j = 8 // j_farg0, j_farg1, ... 1.51 +#else 1.52 + n_register_parameters = 0 // 0 registers used to pass arguments 1.53 +#endif // _LP64 1.54 + }; 1.55 +}; 1.56 + 1.57 + 1.58 +#ifdef _LP64 1.59 +// Symbolically name the register arguments used by the c calling convention. 1.60 +// Windows is different from linux/solaris. So much for standards... 1.61 + 1.62 +#ifdef _WIN64 1.63 + 1.64 +REGISTER_DECLARATION(Register, c_rarg0, rcx); 1.65 +REGISTER_DECLARATION(Register, c_rarg1, rdx); 1.66 +REGISTER_DECLARATION(Register, c_rarg2, r8); 1.67 +REGISTER_DECLARATION(Register, c_rarg3, r9); 1.68 + 1.69 +REGISTER_DECLARATION(XMMRegister, c_farg0, xmm0); 1.70 +REGISTER_DECLARATION(XMMRegister, c_farg1, xmm1); 1.71 +REGISTER_DECLARATION(XMMRegister, c_farg2, xmm2); 1.72 +REGISTER_DECLARATION(XMMRegister, c_farg3, xmm3); 1.73 + 1.74 +#else 1.75 + 1.76 +REGISTER_DECLARATION(Register, c_rarg0, rdi); 1.77 +REGISTER_DECLARATION(Register, c_rarg1, rsi); 1.78 +REGISTER_DECLARATION(Register, c_rarg2, rdx); 1.79 +REGISTER_DECLARATION(Register, c_rarg3, rcx); 1.80 +REGISTER_DECLARATION(Register, c_rarg4, r8); 1.81 +REGISTER_DECLARATION(Register, c_rarg5, r9); 1.82 + 1.83 +REGISTER_DECLARATION(XMMRegister, c_farg0, xmm0); 1.84 +REGISTER_DECLARATION(XMMRegister, c_farg1, xmm1); 1.85 +REGISTER_DECLARATION(XMMRegister, c_farg2, xmm2); 1.86 +REGISTER_DECLARATION(XMMRegister, c_farg3, xmm3); 1.87 +REGISTER_DECLARATION(XMMRegister, c_farg4, xmm4); 1.88 +REGISTER_DECLARATION(XMMRegister, c_farg5, xmm5); 1.89 +REGISTER_DECLARATION(XMMRegister, c_farg6, xmm6); 1.90 +REGISTER_DECLARATION(XMMRegister, c_farg7, xmm7); 1.91 + 1.92 +#endif // _WIN64 1.93 + 1.94 +// Symbolically name the register arguments used by the Java calling convention. 1.95 +// We have control over the convention for java so we can do what we please. 1.96 +// What pleases us is to offset the java calling convention so that when 1.97 +// we call a suitable jni method the arguments are lined up and we don't 1.98 +// have to do little shuffling. A suitable jni method is non-static and a 1.99 +// small number of arguments (two fewer args on windows) 1.100 +// 1.101 +// |-------------------------------------------------------| 1.102 +// | c_rarg0 c_rarg1 c_rarg2 c_rarg3 c_rarg4 c_rarg5 | 1.103 +// |-------------------------------------------------------| 1.104 +// | rcx rdx r8 r9 rdi* rsi* | windows (* not a c_rarg) 1.105 +// | rdi rsi rdx rcx r8 r9 | solaris/linux 1.106 +// |-------------------------------------------------------| 1.107 +// | j_rarg5 j_rarg0 j_rarg1 j_rarg2 j_rarg3 j_rarg4 | 1.108 +// |-------------------------------------------------------| 1.109 + 1.110 +REGISTER_DECLARATION(Register, j_rarg0, c_rarg1); 1.111 +REGISTER_DECLARATION(Register, j_rarg1, c_rarg2); 1.112 +REGISTER_DECLARATION(Register, j_rarg2, c_rarg3); 1.113 +// Windows runs out of register args here 1.114 +#ifdef _WIN64 1.115 +REGISTER_DECLARATION(Register, j_rarg3, rdi); 1.116 +REGISTER_DECLARATION(Register, j_rarg4, rsi); 1.117 +#else 1.118 +REGISTER_DECLARATION(Register, j_rarg3, c_rarg4); 1.119 +REGISTER_DECLARATION(Register, j_rarg4, c_rarg5); 1.120 +#endif /* _WIN64 */ 1.121 +REGISTER_DECLARATION(Register, j_rarg5, c_rarg0); 1.122 + 1.123 +REGISTER_DECLARATION(XMMRegister, j_farg0, xmm0); 1.124 +REGISTER_DECLARATION(XMMRegister, j_farg1, xmm1); 1.125 +REGISTER_DECLARATION(XMMRegister, j_farg2, xmm2); 1.126 +REGISTER_DECLARATION(XMMRegister, j_farg3, xmm3); 1.127 +REGISTER_DECLARATION(XMMRegister, j_farg4, xmm4); 1.128 +REGISTER_DECLARATION(XMMRegister, j_farg5, xmm5); 1.129 +REGISTER_DECLARATION(XMMRegister, j_farg6, xmm6); 1.130 +REGISTER_DECLARATION(XMMRegister, j_farg7, xmm7); 1.131 + 1.132 +REGISTER_DECLARATION(Register, rscratch1, r10); // volatile 1.133 +REGISTER_DECLARATION(Register, rscratch2, r11); // volatile 1.134 + 1.135 +REGISTER_DECLARATION(Register, r12_heapbase, r12); // callee-saved 1.136 +REGISTER_DECLARATION(Register, r15_thread, r15); // callee-saved 1.137 + 1.138 +#else 1.139 +// rscratch1 will apear in 32bit code that is dead but of course must compile 1.140 +// Using noreg ensures if the dead code is incorrectly live and executed it 1.141 +// will cause an assertion failure 1.142 +#define rscratch1 noreg 1.143 +#define rscratch2 noreg 1.144 + 1.145 +#endif // _LP64 1.146 + 1.147 +// JSR 292 fixed register usages: 1.148 +REGISTER_DECLARATION(Register, rbp_mh_SP_save, rbp); 1.149 + 1.150 +// Address is an abstraction used to represent a memory location 1.151 +// using any of the amd64 addressing modes with one object. 1.152 +// 1.153 +// Note: A register location is represented via a Register, not 1.154 +// via an address for efficiency & simplicity reasons. 1.155 + 1.156 +class ArrayAddress; 1.157 + 1.158 +class Address VALUE_OBJ_CLASS_SPEC { 1.159 + public: 1.160 + enum ScaleFactor { 1.161 + no_scale = -1, 1.162 + times_1 = 0, 1.163 + times_2 = 1, 1.164 + times_4 = 2, 1.165 + times_8 = 3, 1.166 + times_ptr = LP64_ONLY(times_8) NOT_LP64(times_4) 1.167 + }; 1.168 + static ScaleFactor times(int size) { 1.169 + assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size"); 1.170 + if (size == 8) return times_8; 1.171 + if (size == 4) return times_4; 1.172 + if (size == 2) return times_2; 1.173 + return times_1; 1.174 + } 1.175 + static int scale_size(ScaleFactor scale) { 1.176 + assert(scale != no_scale, ""); 1.177 + assert(((1 << (int)times_1) == 1 && 1.178 + (1 << (int)times_2) == 2 && 1.179 + (1 << (int)times_4) == 4 && 1.180 + (1 << (int)times_8) == 8), ""); 1.181 + return (1 << (int)scale); 1.182 + } 1.183 + 1.184 + private: 1.185 + Register _base; 1.186 + Register _index; 1.187 + ScaleFactor _scale; 1.188 + int _disp; 1.189 + RelocationHolder _rspec; 1.190 + 1.191 + // Easily misused constructors make them private 1.192 + // %%% can we make these go away? 1.193 + NOT_LP64(Address(address loc, RelocationHolder spec);) 1.194 + Address(int disp, address loc, relocInfo::relocType rtype); 1.195 + Address(int disp, address loc, RelocationHolder spec); 1.196 + 1.197 + public: 1.198 + 1.199 + int disp() { return _disp; } 1.200 + // creation 1.201 + Address() 1.202 + : _base(noreg), 1.203 + _index(noreg), 1.204 + _scale(no_scale), 1.205 + _disp(0) { 1.206 + } 1.207 + 1.208 + // No default displacement otherwise Register can be implicitly 1.209 + // converted to 0(Register) which is quite a different animal. 1.210 + 1.211 + Address(Register base, int disp) 1.212 + : _base(base), 1.213 + _index(noreg), 1.214 + _scale(no_scale), 1.215 + _disp(disp) { 1.216 + } 1.217 + 1.218 + Address(Register base, Register index, ScaleFactor scale, int disp = 0) 1.219 + : _base (base), 1.220 + _index(index), 1.221 + _scale(scale), 1.222 + _disp (disp) { 1.223 + assert(!index->is_valid() == (scale == Address::no_scale), 1.224 + "inconsistent address"); 1.225 + } 1.226 + 1.227 + Address(Register base, RegisterOrConstant index, ScaleFactor scale = times_1, int disp = 0) 1.228 + : _base (base), 1.229 + _index(index.register_or_noreg()), 1.230 + _scale(scale), 1.231 + _disp (disp + (index.constant_or_zero() * scale_size(scale))) { 1.232 + if (!index.is_register()) scale = Address::no_scale; 1.233 + assert(!_index->is_valid() == (scale == Address::no_scale), 1.234 + "inconsistent address"); 1.235 + } 1.236 + 1.237 + Address plus_disp(int disp) const { 1.238 + Address a = (*this); 1.239 + a._disp += disp; 1.240 + return a; 1.241 + } 1.242 + Address plus_disp(RegisterOrConstant disp, ScaleFactor scale = times_1) const { 1.243 + Address a = (*this); 1.244 + a._disp += disp.constant_or_zero() * scale_size(scale); 1.245 + if (disp.is_register()) { 1.246 + assert(!a.index()->is_valid(), "competing indexes"); 1.247 + a._index = disp.as_register(); 1.248 + a._scale = scale; 1.249 + } 1.250 + return a; 1.251 + } 1.252 + bool is_same_address(Address a) const { 1.253 + // disregard _rspec 1.254 + return _base == a._base && _disp == a._disp && _index == a._index && _scale == a._scale; 1.255 + } 1.256 + 1.257 + // The following two overloads are used in connection with the 1.258 + // ByteSize type (see sizes.hpp). They simplify the use of 1.259 + // ByteSize'd arguments in assembly code. Note that their equivalent 1.260 + // for the optimized build are the member functions with int disp 1.261 + // argument since ByteSize is mapped to an int type in that case. 1.262 + // 1.263 + // Note: DO NOT introduce similar overloaded functions for WordSize 1.264 + // arguments as in the optimized mode, both ByteSize and WordSize 1.265 + // are mapped to the same type and thus the compiler cannot make a 1.266 + // distinction anymore (=> compiler errors). 1.267 + 1.268 +#ifdef ASSERT 1.269 + Address(Register base, ByteSize disp) 1.270 + : _base(base), 1.271 + _index(noreg), 1.272 + _scale(no_scale), 1.273 + _disp(in_bytes(disp)) { 1.274 + } 1.275 + 1.276 + Address(Register base, Register index, ScaleFactor scale, ByteSize disp) 1.277 + : _base(base), 1.278 + _index(index), 1.279 + _scale(scale), 1.280 + _disp(in_bytes(disp)) { 1.281 + assert(!index->is_valid() == (scale == Address::no_scale), 1.282 + "inconsistent address"); 1.283 + } 1.284 + 1.285 + Address(Register base, RegisterOrConstant index, ScaleFactor scale, ByteSize disp) 1.286 + : _base (base), 1.287 + _index(index.register_or_noreg()), 1.288 + _scale(scale), 1.289 + _disp (in_bytes(disp) + (index.constant_or_zero() * scale_size(scale))) { 1.290 + if (!index.is_register()) scale = Address::no_scale; 1.291 + assert(!_index->is_valid() == (scale == Address::no_scale), 1.292 + "inconsistent address"); 1.293 + } 1.294 + 1.295 +#endif // ASSERT 1.296 + 1.297 + // accessors 1.298 + bool uses(Register reg) const { return _base == reg || _index == reg; } 1.299 + Register base() const { return _base; } 1.300 + Register index() const { return _index; } 1.301 + ScaleFactor scale() const { return _scale; } 1.302 + int disp() const { return _disp; } 1.303 + 1.304 + // Convert the raw encoding form into the form expected by the constructor for 1.305 + // Address. An index of 4 (rsp) corresponds to having no index, so convert 1.306 + // that to noreg for the Address constructor. 1.307 + static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc); 1.308 + 1.309 + static Address make_array(ArrayAddress); 1.310 + 1.311 + private: 1.312 + bool base_needs_rex() const { 1.313 + return _base != noreg && _base->encoding() >= 8; 1.314 + } 1.315 + 1.316 + bool index_needs_rex() const { 1.317 + return _index != noreg &&_index->encoding() >= 8; 1.318 + } 1.319 + 1.320 + relocInfo::relocType reloc() const { return _rspec.type(); } 1.321 + 1.322 + friend class Assembler; 1.323 + friend class MacroAssembler; 1.324 + friend class LIR_Assembler; // base/index/scale/disp 1.325 +}; 1.326 + 1.327 +// 1.328 +// AddressLiteral has been split out from Address because operands of this type 1.329 +// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out 1.330 +// the few instructions that need to deal with address literals are unique and the 1.331 +// MacroAssembler does not have to implement every instruction in the Assembler 1.332 +// in order to search for address literals that may need special handling depending 1.333 +// on the instruction and the platform. As small step on the way to merging i486/amd64 1.334 +// directories. 1.335 +// 1.336 +class AddressLiteral VALUE_OBJ_CLASS_SPEC { 1.337 + friend class ArrayAddress; 1.338 + RelocationHolder _rspec; 1.339 + // Typically we use AddressLiterals we want to use their rval 1.340 + // However in some situations we want the lval (effect address) of the item. 1.341 + // We provide a special factory for making those lvals. 1.342 + bool _is_lval; 1.343 + 1.344 + // If the target is far we'll need to load the ea of this to 1.345 + // a register to reach it. Otherwise if near we can do rip 1.346 + // relative addressing. 1.347 + 1.348 + address _target; 1.349 + 1.350 + protected: 1.351 + // creation 1.352 + AddressLiteral() 1.353 + : _is_lval(false), 1.354 + _target(NULL) 1.355 + {} 1.356 + 1.357 + public: 1.358 + 1.359 + 1.360 + AddressLiteral(address target, relocInfo::relocType rtype); 1.361 + 1.362 + AddressLiteral(address target, RelocationHolder const& rspec) 1.363 + : _rspec(rspec), 1.364 + _is_lval(false), 1.365 + _target(target) 1.366 + {} 1.367 + 1.368 + AddressLiteral addr() { 1.369 + AddressLiteral ret = *this; 1.370 + ret._is_lval = true; 1.371 + return ret; 1.372 + } 1.373 + 1.374 + 1.375 + private: 1.376 + 1.377 + address target() { return _target; } 1.378 + bool is_lval() { return _is_lval; } 1.379 + 1.380 + relocInfo::relocType reloc() const { return _rspec.type(); } 1.381 + const RelocationHolder& rspec() const { return _rspec; } 1.382 + 1.383 + friend class Assembler; 1.384 + friend class MacroAssembler; 1.385 + friend class Address; 1.386 + friend class LIR_Assembler; 1.387 +}; 1.388 + 1.389 +// Convience classes 1.390 +class RuntimeAddress: public AddressLiteral { 1.391 + 1.392 + public: 1.393 + 1.394 + RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {} 1.395 + 1.396 +}; 1.397 + 1.398 +class ExternalAddress: public AddressLiteral { 1.399 + private: 1.400 + static relocInfo::relocType reloc_for_target(address target) { 1.401 + // Sometimes ExternalAddress is used for values which aren't 1.402 + // exactly addresses, like the card table base. 1.403 + // external_word_type can't be used for values in the first page 1.404 + // so just skip the reloc in that case. 1.405 + return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none; 1.406 + } 1.407 + 1.408 + public: 1.409 + 1.410 + ExternalAddress(address target) : AddressLiteral(target, reloc_for_target(target)) {} 1.411 + 1.412 +}; 1.413 + 1.414 +class InternalAddress: public AddressLiteral { 1.415 + 1.416 + public: 1.417 + 1.418 + InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {} 1.419 + 1.420 +}; 1.421 + 1.422 +// x86 can do array addressing as a single operation since disp can be an absolute 1.423 +// address amd64 can't. We create a class that expresses the concept but does extra 1.424 +// magic on amd64 to get the final result 1.425 + 1.426 +class ArrayAddress VALUE_OBJ_CLASS_SPEC { 1.427 + private: 1.428 + 1.429 + AddressLiteral _base; 1.430 + Address _index; 1.431 + 1.432 + public: 1.433 + 1.434 + ArrayAddress() {}; 1.435 + ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {}; 1.436 + AddressLiteral base() { return _base; } 1.437 + Address index() { return _index; } 1.438 + 1.439 +}; 1.440 + 1.441 +const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY( 512 / wordSize); 1.442 + 1.443 +// The Intel x86/Amd64 Assembler: Pure assembler doing NO optimizations on the instruction 1.444 +// level (e.g. mov rax, 0 is not translated into xor rax, rax!); i.e., what you write 1.445 +// is what you get. The Assembler is generating code into a CodeBuffer. 1.446 + 1.447 +class Assembler : public AbstractAssembler { 1.448 + friend class AbstractAssembler; // for the non-virtual hack 1.449 + friend class LIR_Assembler; // as_Address() 1.450 + friend class StubGenerator; 1.451 + 1.452 + public: 1.453 + enum Condition { // The x86 condition codes used for conditional jumps/moves. 1.454 + zero = 0x4, 1.455 + notZero = 0x5, 1.456 + equal = 0x4, 1.457 + notEqual = 0x5, 1.458 + less = 0xc, 1.459 + lessEqual = 0xe, 1.460 + greater = 0xf, 1.461 + greaterEqual = 0xd, 1.462 + below = 0x2, 1.463 + belowEqual = 0x6, 1.464 + above = 0x7, 1.465 + aboveEqual = 0x3, 1.466 + overflow = 0x0, 1.467 + noOverflow = 0x1, 1.468 + carrySet = 0x2, 1.469 + carryClear = 0x3, 1.470 + negative = 0x8, 1.471 + positive = 0x9, 1.472 + parity = 0xa, 1.473 + noParity = 0xb 1.474 + }; 1.475 + 1.476 + enum Prefix { 1.477 + // segment overrides 1.478 + CS_segment = 0x2e, 1.479 + SS_segment = 0x36, 1.480 + DS_segment = 0x3e, 1.481 + ES_segment = 0x26, 1.482 + FS_segment = 0x64, 1.483 + GS_segment = 0x65, 1.484 + 1.485 + REX = 0x40, 1.486 + 1.487 + REX_B = 0x41, 1.488 + REX_X = 0x42, 1.489 + REX_XB = 0x43, 1.490 + REX_R = 0x44, 1.491 + REX_RB = 0x45, 1.492 + REX_RX = 0x46, 1.493 + REX_RXB = 0x47, 1.494 + 1.495 + REX_W = 0x48, 1.496 + 1.497 + REX_WB = 0x49, 1.498 + REX_WX = 0x4A, 1.499 + REX_WXB = 0x4B, 1.500 + REX_WR = 0x4C, 1.501 + REX_WRB = 0x4D, 1.502 + REX_WRX = 0x4E, 1.503 + REX_WRXB = 0x4F, 1.504 + 1.505 + VEX_3bytes = 0xC4, 1.506 + VEX_2bytes = 0xC5 1.507 + }; 1.508 + 1.509 + enum VexPrefix { 1.510 + VEX_B = 0x20, 1.511 + VEX_X = 0x40, 1.512 + VEX_R = 0x80, 1.513 + VEX_W = 0x80 1.514 + }; 1.515 + 1.516 + enum VexSimdPrefix { 1.517 + VEX_SIMD_NONE = 0x0, 1.518 + VEX_SIMD_66 = 0x1, 1.519 + VEX_SIMD_F3 = 0x2, 1.520 + VEX_SIMD_F2 = 0x3 1.521 + }; 1.522 + 1.523 + enum VexOpcode { 1.524 + VEX_OPCODE_NONE = 0x0, 1.525 + VEX_OPCODE_0F = 0x1, 1.526 + VEX_OPCODE_0F_38 = 0x2, 1.527 + VEX_OPCODE_0F_3A = 0x3 1.528 + }; 1.529 + 1.530 + enum WhichOperand { 1.531 + // input to locate_operand, and format code for relocations 1.532 + imm_operand = 0, // embedded 32-bit|64-bit immediate operand 1.533 + disp32_operand = 1, // embedded 32-bit displacement or address 1.534 + call32_operand = 2, // embedded 32-bit self-relative displacement 1.535 +#ifndef _LP64 1.536 + _WhichOperand_limit = 3 1.537 +#else 1.538 + narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop 1.539 + _WhichOperand_limit = 4 1.540 +#endif 1.541 + }; 1.542 + 1.543 + 1.544 + 1.545 + // NOTE: The general philopsophy of the declarations here is that 64bit versions 1.546 + // of instructions are freely declared without the need for wrapping them an ifdef. 1.547 + // (Some dangerous instructions are ifdef's out of inappropriate jvm's.) 1.548 + // In the .cpp file the implementations are wrapped so that they are dropped out 1.549 + // of the resulting jvm. This is done mostly to keep the footprint of MINIMAL 1.550 + // to the size it was prior to merging up the 32bit and 64bit assemblers. 1.551 + // 1.552 + // This does mean you'll get a linker/runtime error if you use a 64bit only instruction 1.553 + // in a 32bit vm. This is somewhat unfortunate but keeps the ifdef noise down. 1.554 + 1.555 +private: 1.556 + 1.557 + 1.558 + // 64bit prefixes 1.559 + int prefix_and_encode(int reg_enc, bool byteinst = false); 1.560 + int prefixq_and_encode(int reg_enc); 1.561 + 1.562 + int prefix_and_encode(int dst_enc, int src_enc, bool byteinst = false); 1.563 + int prefixq_and_encode(int dst_enc, int src_enc); 1.564 + 1.565 + void prefix(Register reg); 1.566 + void prefix(Address adr); 1.567 + void prefixq(Address adr); 1.568 + 1.569 + void prefix(Address adr, Register reg, bool byteinst = false); 1.570 + void prefix(Address adr, XMMRegister reg); 1.571 + void prefixq(Address adr, Register reg); 1.572 + void prefixq(Address adr, XMMRegister reg); 1.573 + 1.574 + void prefetch_prefix(Address src); 1.575 + 1.576 + void rex_prefix(Address adr, XMMRegister xreg, 1.577 + VexSimdPrefix pre, VexOpcode opc, bool rex_w); 1.578 + int rex_prefix_and_encode(int dst_enc, int src_enc, 1.579 + VexSimdPrefix pre, VexOpcode opc, bool rex_w); 1.580 + 1.581 + void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, 1.582 + int nds_enc, VexSimdPrefix pre, VexOpcode opc, 1.583 + bool vector256); 1.584 + 1.585 + void vex_prefix(Address adr, int nds_enc, int xreg_enc, 1.586 + VexSimdPrefix pre, VexOpcode opc, 1.587 + bool vex_w, bool vector256); 1.588 + 1.589 + void vex_prefix(XMMRegister dst, XMMRegister nds, Address src, 1.590 + VexSimdPrefix pre, bool vector256 = false) { 1.591 + int dst_enc = dst->encoding(); 1.592 + int nds_enc = nds->is_valid() ? nds->encoding() : 0; 1.593 + vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector256); 1.594 + } 1.595 + 1.596 + void vex_prefix_0F38(Register dst, Register nds, Address src) { 1.597 + bool vex_w = false; 1.598 + bool vector256 = false; 1.599 + vex_prefix(src, nds->encoding(), dst->encoding(), 1.600 + VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256); 1.601 + } 1.602 + 1.603 + void vex_prefix_0F38_q(Register dst, Register nds, Address src) { 1.604 + bool vex_w = true; 1.605 + bool vector256 = false; 1.606 + vex_prefix(src, nds->encoding(), dst->encoding(), 1.607 + VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256); 1.608 + } 1.609 + int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, 1.610 + VexSimdPrefix pre, VexOpcode opc, 1.611 + bool vex_w, bool vector256); 1.612 + 1.613 + int vex_prefix_0F38_and_encode(Register dst, Register nds, Register src) { 1.614 + bool vex_w = false; 1.615 + bool vector256 = false; 1.616 + return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), 1.617 + VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256); 1.618 + } 1.619 + int vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src) { 1.620 + bool vex_w = true; 1.621 + bool vector256 = false; 1.622 + return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), 1.623 + VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256); 1.624 + } 1.625 + int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, 1.626 + VexSimdPrefix pre, bool vector256 = false, 1.627 + VexOpcode opc = VEX_OPCODE_0F) { 1.628 + int src_enc = src->encoding(); 1.629 + int dst_enc = dst->encoding(); 1.630 + int nds_enc = nds->is_valid() ? nds->encoding() : 0; 1.631 + return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector256); 1.632 + } 1.633 + 1.634 + void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, 1.635 + VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F, 1.636 + bool rex_w = false, bool vector256 = false); 1.637 + 1.638 + void simd_prefix(XMMRegister dst, Address src, 1.639 + VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) { 1.640 + simd_prefix(dst, xnoreg, src, pre, opc); 1.641 + } 1.642 + 1.643 + void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre) { 1.644 + simd_prefix(src, dst, pre); 1.645 + } 1.646 + void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src, 1.647 + VexSimdPrefix pre) { 1.648 + bool rex_w = true; 1.649 + simd_prefix(dst, nds, src, pre, VEX_OPCODE_0F, rex_w); 1.650 + } 1.651 + 1.652 + int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, 1.653 + VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F, 1.654 + bool rex_w = false, bool vector256 = false); 1.655 + 1.656 + // Move/convert 32-bit integer value. 1.657 + int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src, 1.658 + VexSimdPrefix pre) { 1.659 + // It is OK to cast from Register to XMMRegister to pass argument here 1.660 + // since only encoding is used in simd_prefix_and_encode() and number of 1.661 + // Gen and Xmm registers are the same. 1.662 + return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre); 1.663 + } 1.664 + int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre) { 1.665 + return simd_prefix_and_encode(dst, xnoreg, src, pre); 1.666 + } 1.667 + int simd_prefix_and_encode(Register dst, XMMRegister src, 1.668 + VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) { 1.669 + return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc); 1.670 + } 1.671 + 1.672 + // Move/convert 64-bit integer value. 1.673 + int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src, 1.674 + VexSimdPrefix pre) { 1.675 + bool rex_w = true; 1.676 + return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, VEX_OPCODE_0F, rex_w); 1.677 + } 1.678 + int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre) { 1.679 + return simd_prefix_and_encode_q(dst, xnoreg, src, pre); 1.680 + } 1.681 + int simd_prefix_and_encode_q(Register dst, XMMRegister src, 1.682 + VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) { 1.683 + bool rex_w = true; 1.684 + return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc, rex_w); 1.685 + } 1.686 + 1.687 + // Helper functions for groups of instructions 1.688 + void emit_arith_b(int op1, int op2, Register dst, int imm8); 1.689 + 1.690 + void emit_arith(int op1, int op2, Register dst, int32_t imm32); 1.691 + // Force generation of a 4 byte immediate value even if it fits into 8bit 1.692 + void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32); 1.693 + void emit_arith(int op1, int op2, Register dst, Register src); 1.694 + 1.695 + void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre); 1.696 + void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre); 1.697 + void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre); 1.698 + void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre); 1.699 + void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, 1.700 + Address src, VexSimdPrefix pre, bool vector256); 1.701 + void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, 1.702 + XMMRegister src, VexSimdPrefix pre, bool vector256); 1.703 + 1.704 + void emit_operand(Register reg, 1.705 + Register base, Register index, Address::ScaleFactor scale, 1.706 + int disp, 1.707 + RelocationHolder const& rspec, 1.708 + int rip_relative_correction = 0); 1.709 + 1.710 + void emit_operand(Register reg, Address adr, int rip_relative_correction = 0); 1.711 + 1.712 + // operands that only take the original 32bit registers 1.713 + void emit_operand32(Register reg, Address adr); 1.714 + 1.715 + void emit_operand(XMMRegister reg, 1.716 + Register base, Register index, Address::ScaleFactor scale, 1.717 + int disp, 1.718 + RelocationHolder const& rspec); 1.719 + 1.720 + void emit_operand(XMMRegister reg, Address adr); 1.721 + 1.722 + void emit_operand(MMXRegister reg, Address adr); 1.723 + 1.724 + // workaround gcc (3.2.1-7) bug 1.725 + void emit_operand(Address adr, MMXRegister reg); 1.726 + 1.727 + 1.728 + // Immediate-to-memory forms 1.729 + void emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32); 1.730 + 1.731 + void emit_farith(int b1, int b2, int i); 1.732 + 1.733 + 1.734 + protected: 1.735 + #ifdef ASSERT 1.736 + void check_relocation(RelocationHolder const& rspec, int format); 1.737 + #endif 1.738 + 1.739 + void emit_data(jint data, relocInfo::relocType rtype, int format); 1.740 + void emit_data(jint data, RelocationHolder const& rspec, int format); 1.741 + void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0); 1.742 + void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0); 1.743 + 1.744 + bool reachable(AddressLiteral adr) NOT_LP64({ return true;}); 1.745 + 1.746 + // These are all easily abused and hence protected 1.747 + 1.748 + // 32BIT ONLY SECTION 1.749 +#ifndef _LP64 1.750 + // Make these disappear in 64bit mode since they would never be correct 1.751 + void cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY 1.752 + void cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY 1.753 + 1.754 + void mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY 1.755 + void mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY 1.756 + 1.757 + void push_literal32(int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY 1.758 +#else 1.759 + // 64BIT ONLY SECTION 1.760 + void mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec); // 64BIT ONLY 1.761 + 1.762 + void cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec); 1.763 + void cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec); 1.764 + 1.765 + void mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec); 1.766 + void mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec); 1.767 +#endif // _LP64 1.768 + 1.769 + // These are unique in that we are ensured by the caller that the 32bit 1.770 + // relative in these instructions will always be able to reach the potentially 1.771 + // 64bit address described by entry. Since they can take a 64bit address they 1.772 + // don't have the 32 suffix like the other instructions in this class. 1.773 + 1.774 + void call_literal(address entry, RelocationHolder const& rspec); 1.775 + void jmp_literal(address entry, RelocationHolder const& rspec); 1.776 + 1.777 + // Avoid using directly section 1.778 + // Instructions in this section are actually usable by anyone without danger 1.779 + // of failure but have performance issues that are addressed my enhanced 1.780 + // instructions which will do the proper thing base on the particular cpu. 1.781 + // We protect them because we don't trust you... 1.782 + 1.783 + // Don't use next inc() and dec() methods directly. INC & DEC instructions 1.784 + // could cause a partial flag stall since they don't set CF flag. 1.785 + // Use MacroAssembler::decrement() & MacroAssembler::increment() methods 1.786 + // which call inc() & dec() or add() & sub() in accordance with 1.787 + // the product flag UseIncDec value. 1.788 + 1.789 + void decl(Register dst); 1.790 + void decl(Address dst); 1.791 + void decq(Register dst); 1.792 + void decq(Address dst); 1.793 + 1.794 + void incl(Register dst); 1.795 + void incl(Address dst); 1.796 + void incq(Register dst); 1.797 + void incq(Address dst); 1.798 + 1.799 + // New cpus require use of movsd and movss to avoid partial register stall 1.800 + // when loading from memory. But for old Opteron use movlpd instead of movsd. 1.801 + // The selection is done in MacroAssembler::movdbl() and movflt(). 1.802 + 1.803 + // Move Scalar Single-Precision Floating-Point Values 1.804 + void movss(XMMRegister dst, Address src); 1.805 + void movss(XMMRegister dst, XMMRegister src); 1.806 + void movss(Address dst, XMMRegister src); 1.807 + 1.808 + // Move Scalar Double-Precision Floating-Point Values 1.809 + void movsd(XMMRegister dst, Address src); 1.810 + void movsd(XMMRegister dst, XMMRegister src); 1.811 + void movsd(Address dst, XMMRegister src); 1.812 + void movlpd(XMMRegister dst, Address src); 1.813 + 1.814 + // New cpus require use of movaps and movapd to avoid partial register stall 1.815 + // when moving between registers. 1.816 + void movaps(XMMRegister dst, XMMRegister src); 1.817 + void movapd(XMMRegister dst, XMMRegister src); 1.818 + 1.819 + // End avoid using directly 1.820 + 1.821 + 1.822 + // Instruction prefixes 1.823 + void prefix(Prefix p); 1.824 + 1.825 + public: 1.826 + 1.827 + // Creation 1.828 + Assembler(CodeBuffer* code) : AbstractAssembler(code) {} 1.829 + 1.830 + // Decoding 1.831 + static address locate_operand(address inst, WhichOperand which); 1.832 + static address locate_next_instruction(address inst); 1.833 + 1.834 + // Utilities 1.835 + static bool is_polling_page_far() NOT_LP64({ return false;}); 1.836 + 1.837 + // Generic instructions 1.838 + // Does 32bit or 64bit as needed for the platform. In some sense these 1.839 + // belong in macro assembler but there is no need for both varieties to exist 1.840 + 1.841 + void lea(Register dst, Address src); 1.842 + 1.843 + void mov(Register dst, Register src); 1.844 + 1.845 + void pusha(); 1.846 + void popa(); 1.847 + 1.848 + void pushf(); 1.849 + void popf(); 1.850 + 1.851 + void push(int32_t imm32); 1.852 + 1.853 + void push(Register src); 1.854 + 1.855 + void pop(Register dst); 1.856 + 1.857 + // These are dummies to prevent surprise implicit conversions to Register 1.858 + void push(void* v); 1.859 + void pop(void* v); 1.860 + 1.861 + // These do register sized moves/scans 1.862 + void rep_mov(); 1.863 + void rep_stos(); 1.864 + void rep_stosb(); 1.865 + void repne_scan(); 1.866 +#ifdef _LP64 1.867 + void repne_scanl(); 1.868 +#endif 1.869 + 1.870 + // Vanilla instructions in lexical order 1.871 + 1.872 + void adcl(Address dst, int32_t imm32); 1.873 + void adcl(Address dst, Register src); 1.874 + void adcl(Register dst, int32_t imm32); 1.875 + void adcl(Register dst, Address src); 1.876 + void adcl(Register dst, Register src); 1.877 + 1.878 + void adcq(Register dst, int32_t imm32); 1.879 + void adcq(Register dst, Address src); 1.880 + void adcq(Register dst, Register src); 1.881 + 1.882 + void addl(Address dst, int32_t imm32); 1.883 + void addl(Address dst, Register src); 1.884 + void addl(Register dst, int32_t imm32); 1.885 + void addl(Register dst, Address src); 1.886 + void addl(Register dst, Register src); 1.887 + 1.888 + void addq(Address dst, int32_t imm32); 1.889 + void addq(Address dst, Register src); 1.890 + void addq(Register dst, int32_t imm32); 1.891 + void addq(Register dst, Address src); 1.892 + void addq(Register dst, Register src); 1.893 + 1.894 + void addr_nop_4(); 1.895 + void addr_nop_5(); 1.896 + void addr_nop_7(); 1.897 + void addr_nop_8(); 1.898 + 1.899 + // Add Scalar Double-Precision Floating-Point Values 1.900 + void addsd(XMMRegister dst, Address src); 1.901 + void addsd(XMMRegister dst, XMMRegister src); 1.902 + 1.903 + // Add Scalar Single-Precision Floating-Point Values 1.904 + void addss(XMMRegister dst, Address src); 1.905 + void addss(XMMRegister dst, XMMRegister src); 1.906 + 1.907 + // AES instructions 1.908 + void aesdec(XMMRegister dst, Address src); 1.909 + void aesdec(XMMRegister dst, XMMRegister src); 1.910 + void aesdeclast(XMMRegister dst, Address src); 1.911 + void aesdeclast(XMMRegister dst, XMMRegister src); 1.912 + void aesenc(XMMRegister dst, Address src); 1.913 + void aesenc(XMMRegister dst, XMMRegister src); 1.914 + void aesenclast(XMMRegister dst, Address src); 1.915 + void aesenclast(XMMRegister dst, XMMRegister src); 1.916 + 1.917 + 1.918 + void andl(Address dst, int32_t imm32); 1.919 + void andl(Register dst, int32_t imm32); 1.920 + void andl(Register dst, Address src); 1.921 + void andl(Register dst, Register src); 1.922 + 1.923 + void andq(Address dst, int32_t imm32); 1.924 + void andq(Register dst, int32_t imm32); 1.925 + void andq(Register dst, Address src); 1.926 + void andq(Register dst, Register src); 1.927 + 1.928 + // BMI instructions 1.929 + void andnl(Register dst, Register src1, Register src2); 1.930 + void andnl(Register dst, Register src1, Address src2); 1.931 + void andnq(Register dst, Register src1, Register src2); 1.932 + void andnq(Register dst, Register src1, Address src2); 1.933 + 1.934 + void blsil(Register dst, Register src); 1.935 + void blsil(Register dst, Address src); 1.936 + void blsiq(Register dst, Register src); 1.937 + void blsiq(Register dst, Address src); 1.938 + 1.939 + void blsmskl(Register dst, Register src); 1.940 + void blsmskl(Register dst, Address src); 1.941 + void blsmskq(Register dst, Register src); 1.942 + void blsmskq(Register dst, Address src); 1.943 + 1.944 + void blsrl(Register dst, Register src); 1.945 + void blsrl(Register dst, Address src); 1.946 + void blsrq(Register dst, Register src); 1.947 + void blsrq(Register dst, Address src); 1.948 + 1.949 + void bsfl(Register dst, Register src); 1.950 + void bsrl(Register dst, Register src); 1.951 + 1.952 +#ifdef _LP64 1.953 + void bsfq(Register dst, Register src); 1.954 + void bsrq(Register dst, Register src); 1.955 +#endif 1.956 + 1.957 + void bswapl(Register reg); 1.958 + 1.959 + void bswapq(Register reg); 1.960 + 1.961 + void call(Label& L, relocInfo::relocType rtype); 1.962 + void call(Register reg); // push pc; pc <- reg 1.963 + void call(Address adr); // push pc; pc <- adr 1.964 + 1.965 + void cdql(); 1.966 + 1.967 + void cdqq(); 1.968 + 1.969 + void cld(); 1.970 + 1.971 + void clflush(Address adr); 1.972 + 1.973 + void cmovl(Condition cc, Register dst, Register src); 1.974 + void cmovl(Condition cc, Register dst, Address src); 1.975 + 1.976 + void cmovq(Condition cc, Register dst, Register src); 1.977 + void cmovq(Condition cc, Register dst, Address src); 1.978 + 1.979 + 1.980 + void cmpb(Address dst, int imm8); 1.981 + 1.982 + void cmpl(Address dst, int32_t imm32); 1.983 + 1.984 + void cmpl(Register dst, int32_t imm32); 1.985 + void cmpl(Register dst, Register src); 1.986 + void cmpl(Register dst, Address src); 1.987 + 1.988 + void cmpq(Address dst, int32_t imm32); 1.989 + void cmpq(Address dst, Register src); 1.990 + 1.991 + void cmpq(Register dst, int32_t imm32); 1.992 + void cmpq(Register dst, Register src); 1.993 + void cmpq(Register dst, Address src); 1.994 + 1.995 + // these are dummies used to catch attempting to convert NULL to Register 1.996 + void cmpl(Register dst, void* junk); // dummy 1.997 + void cmpq(Register dst, void* junk); // dummy 1.998 + 1.999 + void cmpw(Address dst, int imm16); 1.1000 + 1.1001 + void cmpxchg8 (Address adr); 1.1002 + 1.1003 + void cmpxchgl(Register reg, Address adr); 1.1004 + 1.1005 + void cmpxchgq(Register reg, Address adr); 1.1006 + 1.1007 + // Ordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS 1.1008 + void comisd(XMMRegister dst, Address src); 1.1009 + void comisd(XMMRegister dst, XMMRegister src); 1.1010 + 1.1011 + // Ordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS 1.1012 + void comiss(XMMRegister dst, Address src); 1.1013 + void comiss(XMMRegister dst, XMMRegister src); 1.1014 + 1.1015 + // Identify processor type and features 1.1016 + void cpuid(); 1.1017 + 1.1018 + // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value 1.1019 + void cvtsd2ss(XMMRegister dst, XMMRegister src); 1.1020 + void cvtsd2ss(XMMRegister dst, Address src); 1.1021 + 1.1022 + // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value 1.1023 + void cvtsi2sdl(XMMRegister dst, Register src); 1.1024 + void cvtsi2sdl(XMMRegister dst, Address src); 1.1025 + void cvtsi2sdq(XMMRegister dst, Register src); 1.1026 + void cvtsi2sdq(XMMRegister dst, Address src); 1.1027 + 1.1028 + // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value 1.1029 + void cvtsi2ssl(XMMRegister dst, Register src); 1.1030 + void cvtsi2ssl(XMMRegister dst, Address src); 1.1031 + void cvtsi2ssq(XMMRegister dst, Register src); 1.1032 + void cvtsi2ssq(XMMRegister dst, Address src); 1.1033 + 1.1034 + // Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value 1.1035 + void cvtdq2pd(XMMRegister dst, XMMRegister src); 1.1036 + 1.1037 + // Convert Packed Signed Doubleword Integers to Packed Single-Precision Floating-Point Value 1.1038 + void cvtdq2ps(XMMRegister dst, XMMRegister src); 1.1039 + 1.1040 + // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value 1.1041 + void cvtss2sd(XMMRegister dst, XMMRegister src); 1.1042 + void cvtss2sd(XMMRegister dst, Address src); 1.1043 + 1.1044 + // Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer 1.1045 + void cvttsd2sil(Register dst, Address src); 1.1046 + void cvttsd2sil(Register dst, XMMRegister src); 1.1047 + void cvttsd2siq(Register dst, XMMRegister src); 1.1048 + 1.1049 + // Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer 1.1050 + void cvttss2sil(Register dst, XMMRegister src); 1.1051 + void cvttss2siq(Register dst, XMMRegister src); 1.1052 + 1.1053 + // Divide Scalar Double-Precision Floating-Point Values 1.1054 + void divsd(XMMRegister dst, Address src); 1.1055 + void divsd(XMMRegister dst, XMMRegister src); 1.1056 + 1.1057 + // Divide Scalar Single-Precision Floating-Point Values 1.1058 + void divss(XMMRegister dst, Address src); 1.1059 + void divss(XMMRegister dst, XMMRegister src); 1.1060 + 1.1061 + void emms(); 1.1062 + 1.1063 + void fabs(); 1.1064 + 1.1065 + void fadd(int i); 1.1066 + 1.1067 + void fadd_d(Address src); 1.1068 + void fadd_s(Address src); 1.1069 + 1.1070 + // "Alternate" versions of x87 instructions place result down in FPU 1.1071 + // stack instead of on TOS 1.1072 + 1.1073 + void fadda(int i); // "alternate" fadd 1.1074 + void faddp(int i = 1); 1.1075 + 1.1076 + void fchs(); 1.1077 + 1.1078 + void fcom(int i); 1.1079 + 1.1080 + void fcomp(int i = 1); 1.1081 + void fcomp_d(Address src); 1.1082 + void fcomp_s(Address src); 1.1083 + 1.1084 + void fcompp(); 1.1085 + 1.1086 + void fcos(); 1.1087 + 1.1088 + void fdecstp(); 1.1089 + 1.1090 + void fdiv(int i); 1.1091 + void fdiv_d(Address src); 1.1092 + void fdivr_s(Address src); 1.1093 + void fdiva(int i); // "alternate" fdiv 1.1094 + void fdivp(int i = 1); 1.1095 + 1.1096 + void fdivr(int i); 1.1097 + void fdivr_d(Address src); 1.1098 + void fdiv_s(Address src); 1.1099 + 1.1100 + void fdivra(int i); // "alternate" reversed fdiv 1.1101 + 1.1102 + void fdivrp(int i = 1); 1.1103 + 1.1104 + void ffree(int i = 0); 1.1105 + 1.1106 + void fild_d(Address adr); 1.1107 + void fild_s(Address adr); 1.1108 + 1.1109 + void fincstp(); 1.1110 + 1.1111 + void finit(); 1.1112 + 1.1113 + void fist_s (Address adr); 1.1114 + void fistp_d(Address adr); 1.1115 + void fistp_s(Address adr); 1.1116 + 1.1117 + void fld1(); 1.1118 + 1.1119 + void fld_d(Address adr); 1.1120 + void fld_s(Address adr); 1.1121 + void fld_s(int index); 1.1122 + void fld_x(Address adr); // extended-precision (80-bit) format 1.1123 + 1.1124 + void fldcw(Address src); 1.1125 + 1.1126 + void fldenv(Address src); 1.1127 + 1.1128 + void fldlg2(); 1.1129 + 1.1130 + void fldln2(); 1.1131 + 1.1132 + void fldz(); 1.1133 + 1.1134 + void flog(); 1.1135 + void flog10(); 1.1136 + 1.1137 + void fmul(int i); 1.1138 + 1.1139 + void fmul_d(Address src); 1.1140 + void fmul_s(Address src); 1.1141 + 1.1142 + void fmula(int i); // "alternate" fmul 1.1143 + 1.1144 + void fmulp(int i = 1); 1.1145 + 1.1146 + void fnsave(Address dst); 1.1147 + 1.1148 + void fnstcw(Address src); 1.1149 + 1.1150 + void fnstsw_ax(); 1.1151 + 1.1152 + void fprem(); 1.1153 + void fprem1(); 1.1154 + 1.1155 + void frstor(Address src); 1.1156 + 1.1157 + void fsin(); 1.1158 + 1.1159 + void fsqrt(); 1.1160 + 1.1161 + void fst_d(Address adr); 1.1162 + void fst_s(Address adr); 1.1163 + 1.1164 + void fstp_d(Address adr); 1.1165 + void fstp_d(int index); 1.1166 + void fstp_s(Address adr); 1.1167 + void fstp_x(Address adr); // extended-precision (80-bit) format 1.1168 + 1.1169 + void fsub(int i); 1.1170 + void fsub_d(Address src); 1.1171 + void fsub_s(Address src); 1.1172 + 1.1173 + void fsuba(int i); // "alternate" fsub 1.1174 + 1.1175 + void fsubp(int i = 1); 1.1176 + 1.1177 + void fsubr(int i); 1.1178 + void fsubr_d(Address src); 1.1179 + void fsubr_s(Address src); 1.1180 + 1.1181 + void fsubra(int i); // "alternate" reversed fsub 1.1182 + 1.1183 + void fsubrp(int i = 1); 1.1184 + 1.1185 + void ftan(); 1.1186 + 1.1187 + void ftst(); 1.1188 + 1.1189 + void fucomi(int i = 1); 1.1190 + void fucomip(int i = 1); 1.1191 + 1.1192 + void fwait(); 1.1193 + 1.1194 + void fxch(int i = 1); 1.1195 + 1.1196 + void fxrstor(Address src); 1.1197 + 1.1198 + void fxsave(Address dst); 1.1199 + 1.1200 + void fyl2x(); 1.1201 + void frndint(); 1.1202 + void f2xm1(); 1.1203 + void fldl2e(); 1.1204 + 1.1205 + void hlt(); 1.1206 + 1.1207 + void idivl(Register src); 1.1208 + void divl(Register src); // Unsigned division 1.1209 + 1.1210 + void idivq(Register src); 1.1211 + 1.1212 + void imull(Register dst, Register src); 1.1213 + void imull(Register dst, Register src, int value); 1.1214 + void imull(Register dst, Address src); 1.1215 + 1.1216 + void imulq(Register dst, Register src); 1.1217 + void imulq(Register dst, Register src, int value); 1.1218 +#ifdef _LP64 1.1219 + void imulq(Register dst, Address src); 1.1220 +#endif 1.1221 + 1.1222 + 1.1223 + // jcc is the generic conditional branch generator to run- 1.1224 + // time routines, jcc is used for branches to labels. jcc 1.1225 + // takes a branch opcode (cc) and a label (L) and generates 1.1226 + // either a backward branch or a forward branch and links it 1.1227 + // to the label fixup chain. Usage: 1.1228 + // 1.1229 + // Label L; // unbound label 1.1230 + // jcc(cc, L); // forward branch to unbound label 1.1231 + // bind(L); // bind label to the current pc 1.1232 + // jcc(cc, L); // backward branch to bound label 1.1233 + // bind(L); // illegal: a label may be bound only once 1.1234 + // 1.1235 + // Note: The same Label can be used for forward and backward branches 1.1236 + // but it may be bound only once. 1.1237 + 1.1238 + void jcc(Condition cc, Label& L, bool maybe_short = true); 1.1239 + 1.1240 + // Conditional jump to a 8-bit offset to L. 1.1241 + // WARNING: be very careful using this for forward jumps. If the label is 1.1242 + // not bound within an 8-bit offset of this instruction, a run-time error 1.1243 + // will occur. 1.1244 + void jccb(Condition cc, Label& L); 1.1245 + 1.1246 + void jmp(Address entry); // pc <- entry 1.1247 + 1.1248 + // Label operations & relative jumps (PPUM Appendix D) 1.1249 + void jmp(Label& L, bool maybe_short = true); // unconditional jump to L 1.1250 + 1.1251 + void jmp(Register entry); // pc <- entry 1.1252 + 1.1253 + // Unconditional 8-bit offset jump to L. 1.1254 + // WARNING: be very careful using this for forward jumps. If the label is 1.1255 + // not bound within an 8-bit offset of this instruction, a run-time error 1.1256 + // will occur. 1.1257 + void jmpb(Label& L); 1.1258 + 1.1259 + void ldmxcsr( Address src ); 1.1260 + 1.1261 + void leal(Register dst, Address src); 1.1262 + 1.1263 + void leaq(Register dst, Address src); 1.1264 + 1.1265 + void lfence(); 1.1266 + 1.1267 + void lock(); 1.1268 + 1.1269 + void lzcntl(Register dst, Register src); 1.1270 + 1.1271 +#ifdef _LP64 1.1272 + void lzcntq(Register dst, Register src); 1.1273 +#endif 1.1274 + 1.1275 + enum Membar_mask_bits { 1.1276 + StoreStore = 1 << 3, 1.1277 + LoadStore = 1 << 2, 1.1278 + StoreLoad = 1 << 1, 1.1279 + LoadLoad = 1 << 0 1.1280 + }; 1.1281 + 1.1282 + // Serializes memory and blows flags 1.1283 + void membar(Membar_mask_bits order_constraint) { 1.1284 + if (os::is_MP()) { 1.1285 + // We only have to handle StoreLoad 1.1286 + if (order_constraint & StoreLoad) { 1.1287 + // All usable chips support "locked" instructions which suffice 1.1288 + // as barriers, and are much faster than the alternative of 1.1289 + // using cpuid instruction. We use here a locked add [esp],0. 1.1290 + // This is conveniently otherwise a no-op except for blowing 1.1291 + // flags. 1.1292 + // Any change to this code may need to revisit other places in 1.1293 + // the code where this idiom is used, in particular the 1.1294 + // orderAccess code. 1.1295 + lock(); 1.1296 + addl(Address(rsp, 0), 0);// Assert the lock# signal here 1.1297 + } 1.1298 + } 1.1299 + } 1.1300 + 1.1301 + void mfence(); 1.1302 + 1.1303 + // Moves 1.1304 + 1.1305 + void mov64(Register dst, int64_t imm64); 1.1306 + 1.1307 + void movb(Address dst, Register src); 1.1308 + void movb(Address dst, int imm8); 1.1309 + void movb(Register dst, Address src); 1.1310 + 1.1311 + void movdl(XMMRegister dst, Register src); 1.1312 + void movdl(Register dst, XMMRegister src); 1.1313 + void movdl(XMMRegister dst, Address src); 1.1314 + void movdl(Address dst, XMMRegister src); 1.1315 + 1.1316 + // Move Double Quadword 1.1317 + void movdq(XMMRegister dst, Register src); 1.1318 + void movdq(Register dst, XMMRegister src); 1.1319 + 1.1320 + // Move Aligned Double Quadword 1.1321 + void movdqa(XMMRegister dst, XMMRegister src); 1.1322 + void movdqa(XMMRegister dst, Address src); 1.1323 + 1.1324 + // Move Unaligned Double Quadword 1.1325 + void movdqu(Address dst, XMMRegister src); 1.1326 + void movdqu(XMMRegister dst, Address src); 1.1327 + void movdqu(XMMRegister dst, XMMRegister src); 1.1328 + 1.1329 + // Move Unaligned 256bit Vector 1.1330 + void vmovdqu(Address dst, XMMRegister src); 1.1331 + void vmovdqu(XMMRegister dst, Address src); 1.1332 + void vmovdqu(XMMRegister dst, XMMRegister src); 1.1333 + 1.1334 + // Move lower 64bit to high 64bit in 128bit register 1.1335 + void movlhps(XMMRegister dst, XMMRegister src); 1.1336 + 1.1337 + void movl(Register dst, int32_t imm32); 1.1338 + void movl(Address dst, int32_t imm32); 1.1339 + void movl(Register dst, Register src); 1.1340 + void movl(Register dst, Address src); 1.1341 + void movl(Address dst, Register src); 1.1342 + 1.1343 + // These dummies prevent using movl from converting a zero (like NULL) into Register 1.1344 + // by giving the compiler two choices it can't resolve 1.1345 + 1.1346 + void movl(Address dst, void* junk); 1.1347 + void movl(Register dst, void* junk); 1.1348 + 1.1349 +#ifdef _LP64 1.1350 + void movq(Register dst, Register src); 1.1351 + void movq(Register dst, Address src); 1.1352 + void movq(Address dst, Register src); 1.1353 +#endif 1.1354 + 1.1355 + void movq(Address dst, MMXRegister src ); 1.1356 + void movq(MMXRegister dst, Address src ); 1.1357 + 1.1358 +#ifdef _LP64 1.1359 + // These dummies prevent using movq from converting a zero (like NULL) into Register 1.1360 + // by giving the compiler two choices it can't resolve 1.1361 + 1.1362 + void movq(Address dst, void* dummy); 1.1363 + void movq(Register dst, void* dummy); 1.1364 +#endif 1.1365 + 1.1366 + // Move Quadword 1.1367 + void movq(Address dst, XMMRegister src); 1.1368 + void movq(XMMRegister dst, Address src); 1.1369 + 1.1370 + void movsbl(Register dst, Address src); 1.1371 + void movsbl(Register dst, Register src); 1.1372 + 1.1373 +#ifdef _LP64 1.1374 + void movsbq(Register dst, Address src); 1.1375 + void movsbq(Register dst, Register src); 1.1376 + 1.1377 + // Move signed 32bit immediate to 64bit extending sign 1.1378 + void movslq(Address dst, int32_t imm64); 1.1379 + void movslq(Register dst, int32_t imm64); 1.1380 + 1.1381 + void movslq(Register dst, Address src); 1.1382 + void movslq(Register dst, Register src); 1.1383 + void movslq(Register dst, void* src); // Dummy declaration to cause NULL to be ambiguous 1.1384 +#endif 1.1385 + 1.1386 + void movswl(Register dst, Address src); 1.1387 + void movswl(Register dst, Register src); 1.1388 + 1.1389 +#ifdef _LP64 1.1390 + void movswq(Register dst, Address src); 1.1391 + void movswq(Register dst, Register src); 1.1392 +#endif 1.1393 + 1.1394 + void movw(Address dst, int imm16); 1.1395 + void movw(Register dst, Address src); 1.1396 + void movw(Address dst, Register src); 1.1397 + 1.1398 + void movzbl(Register dst, Address src); 1.1399 + void movzbl(Register dst, Register src); 1.1400 + 1.1401 +#ifdef _LP64 1.1402 + void movzbq(Register dst, Address src); 1.1403 + void movzbq(Register dst, Register src); 1.1404 +#endif 1.1405 + 1.1406 + void movzwl(Register dst, Address src); 1.1407 + void movzwl(Register dst, Register src); 1.1408 + 1.1409 +#ifdef _LP64 1.1410 + void movzwq(Register dst, Address src); 1.1411 + void movzwq(Register dst, Register src); 1.1412 +#endif 1.1413 + 1.1414 + void mull(Address src); 1.1415 + void mull(Register src); 1.1416 + 1.1417 + // Multiply Scalar Double-Precision Floating-Point Values 1.1418 + void mulsd(XMMRegister dst, Address src); 1.1419 + void mulsd(XMMRegister dst, XMMRegister src); 1.1420 + 1.1421 + // Multiply Scalar Single-Precision Floating-Point Values 1.1422 + void mulss(XMMRegister dst, Address src); 1.1423 + void mulss(XMMRegister dst, XMMRegister src); 1.1424 + 1.1425 + void negl(Register dst); 1.1426 + 1.1427 +#ifdef _LP64 1.1428 + void negq(Register dst); 1.1429 +#endif 1.1430 + 1.1431 + void nop(int i = 1); 1.1432 + 1.1433 + void notl(Register dst); 1.1434 + 1.1435 +#ifdef _LP64 1.1436 + void notq(Register dst); 1.1437 +#endif 1.1438 + 1.1439 + void orl(Address dst, int32_t imm32); 1.1440 + void orl(Register dst, int32_t imm32); 1.1441 + void orl(Register dst, Address src); 1.1442 + void orl(Register dst, Register src); 1.1443 + 1.1444 + void orq(Address dst, int32_t imm32); 1.1445 + void orq(Register dst, int32_t imm32); 1.1446 + void orq(Register dst, Address src); 1.1447 + void orq(Register dst, Register src); 1.1448 + 1.1449 + // Pack with unsigned saturation 1.1450 + void packuswb(XMMRegister dst, XMMRegister src); 1.1451 + void packuswb(XMMRegister dst, Address src); 1.1452 + void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.1453 + 1.1454 + // Pemutation of 64bit words 1.1455 + void vpermq(XMMRegister dst, XMMRegister src, int imm8, bool vector256); 1.1456 + 1.1457 + void pause(); 1.1458 + 1.1459 + // SSE4.2 string instructions 1.1460 + void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8); 1.1461 + void pcmpestri(XMMRegister xmm1, Address src, int imm8); 1.1462 + 1.1463 + // SSE 4.1 extract 1.1464 + void pextrd(Register dst, XMMRegister src, int imm8); 1.1465 + void pextrq(Register dst, XMMRegister src, int imm8); 1.1466 + 1.1467 + // SSE 4.1 insert 1.1468 + void pinsrd(XMMRegister dst, Register src, int imm8); 1.1469 + void pinsrq(XMMRegister dst, Register src, int imm8); 1.1470 + 1.1471 + // SSE4.1 packed move 1.1472 + void pmovzxbw(XMMRegister dst, XMMRegister src); 1.1473 + void pmovzxbw(XMMRegister dst, Address src); 1.1474 + 1.1475 +#ifndef _LP64 // no 32bit push/pop on amd64 1.1476 + void popl(Address dst); 1.1477 +#endif 1.1478 + 1.1479 +#ifdef _LP64 1.1480 + void popq(Address dst); 1.1481 +#endif 1.1482 + 1.1483 + void popcntl(Register dst, Address src); 1.1484 + void popcntl(Register dst, Register src); 1.1485 + 1.1486 +#ifdef _LP64 1.1487 + void popcntq(Register dst, Address src); 1.1488 + void popcntq(Register dst, Register src); 1.1489 +#endif 1.1490 + 1.1491 + // Prefetches (SSE, SSE2, 3DNOW only) 1.1492 + 1.1493 + void prefetchnta(Address src); 1.1494 + void prefetchr(Address src); 1.1495 + void prefetcht0(Address src); 1.1496 + void prefetcht1(Address src); 1.1497 + void prefetcht2(Address src); 1.1498 + void prefetchw(Address src); 1.1499 + 1.1500 + // Shuffle Bytes 1.1501 + void pshufb(XMMRegister dst, XMMRegister src); 1.1502 + void pshufb(XMMRegister dst, Address src); 1.1503 + 1.1504 + // Shuffle Packed Doublewords 1.1505 + void pshufd(XMMRegister dst, XMMRegister src, int mode); 1.1506 + void pshufd(XMMRegister dst, Address src, int mode); 1.1507 + 1.1508 + // Shuffle Packed Low Words 1.1509 + void pshuflw(XMMRegister dst, XMMRegister src, int mode); 1.1510 + void pshuflw(XMMRegister dst, Address src, int mode); 1.1511 + 1.1512 + // Shift Right by bytes Logical DoubleQuadword Immediate 1.1513 + void psrldq(XMMRegister dst, int shift); 1.1514 + 1.1515 + // Logical Compare 128bit 1.1516 + void ptest(XMMRegister dst, XMMRegister src); 1.1517 + void ptest(XMMRegister dst, Address src); 1.1518 + // Logical Compare 256bit 1.1519 + void vptest(XMMRegister dst, XMMRegister src); 1.1520 + void vptest(XMMRegister dst, Address src); 1.1521 + 1.1522 + // Interleave Low Bytes 1.1523 + void punpcklbw(XMMRegister dst, XMMRegister src); 1.1524 + void punpcklbw(XMMRegister dst, Address src); 1.1525 + 1.1526 + // Interleave Low Doublewords 1.1527 + void punpckldq(XMMRegister dst, XMMRegister src); 1.1528 + void punpckldq(XMMRegister dst, Address src); 1.1529 + 1.1530 + // Interleave Low Quadwords 1.1531 + void punpcklqdq(XMMRegister dst, XMMRegister src); 1.1532 + 1.1533 +#ifndef _LP64 // no 32bit push/pop on amd64 1.1534 + void pushl(Address src); 1.1535 +#endif 1.1536 + 1.1537 + void pushq(Address src); 1.1538 + 1.1539 + void rcll(Register dst, int imm8); 1.1540 + 1.1541 + void rclq(Register dst, int imm8); 1.1542 + 1.1543 + void rdtsc(); 1.1544 + 1.1545 + void ret(int imm16); 1.1546 + 1.1547 + void sahf(); 1.1548 + 1.1549 + void sarl(Register dst, int imm8); 1.1550 + void sarl(Register dst); 1.1551 + 1.1552 + void sarq(Register dst, int imm8); 1.1553 + void sarq(Register dst); 1.1554 + 1.1555 + void sbbl(Address dst, int32_t imm32); 1.1556 + void sbbl(Register dst, int32_t imm32); 1.1557 + void sbbl(Register dst, Address src); 1.1558 + void sbbl(Register dst, Register src); 1.1559 + 1.1560 + void sbbq(Address dst, int32_t imm32); 1.1561 + void sbbq(Register dst, int32_t imm32); 1.1562 + void sbbq(Register dst, Address src); 1.1563 + void sbbq(Register dst, Register src); 1.1564 + 1.1565 + void setb(Condition cc, Register dst); 1.1566 + 1.1567 + void shldl(Register dst, Register src); 1.1568 + 1.1569 + void shll(Register dst, int imm8); 1.1570 + void shll(Register dst); 1.1571 + 1.1572 + void shlq(Register dst, int imm8); 1.1573 + void shlq(Register dst); 1.1574 + 1.1575 + void shrdl(Register dst, Register src); 1.1576 + 1.1577 + void shrl(Register dst, int imm8); 1.1578 + void shrl(Register dst); 1.1579 + 1.1580 + void shrq(Register dst, int imm8); 1.1581 + void shrq(Register dst); 1.1582 + 1.1583 + void smovl(); // QQQ generic? 1.1584 + 1.1585 + // Compute Square Root of Scalar Double-Precision Floating-Point Value 1.1586 + void sqrtsd(XMMRegister dst, Address src); 1.1587 + void sqrtsd(XMMRegister dst, XMMRegister src); 1.1588 + 1.1589 + // Compute Square Root of Scalar Single-Precision Floating-Point Value 1.1590 + void sqrtss(XMMRegister dst, Address src); 1.1591 + void sqrtss(XMMRegister dst, XMMRegister src); 1.1592 + 1.1593 + void std(); 1.1594 + 1.1595 + void stmxcsr( Address dst ); 1.1596 + 1.1597 + void subl(Address dst, int32_t imm32); 1.1598 + void subl(Address dst, Register src); 1.1599 + void subl(Register dst, int32_t imm32); 1.1600 + void subl(Register dst, Address src); 1.1601 + void subl(Register dst, Register src); 1.1602 + 1.1603 + void subq(Address dst, int32_t imm32); 1.1604 + void subq(Address dst, Register src); 1.1605 + void subq(Register dst, int32_t imm32); 1.1606 + void subq(Register dst, Address src); 1.1607 + void subq(Register dst, Register src); 1.1608 + 1.1609 + // Force generation of a 4 byte immediate value even if it fits into 8bit 1.1610 + void subl_imm32(Register dst, int32_t imm32); 1.1611 + void subq_imm32(Register dst, int32_t imm32); 1.1612 + 1.1613 + // Subtract Scalar Double-Precision Floating-Point Values 1.1614 + void subsd(XMMRegister dst, Address src); 1.1615 + void subsd(XMMRegister dst, XMMRegister src); 1.1616 + 1.1617 + // Subtract Scalar Single-Precision Floating-Point Values 1.1618 + void subss(XMMRegister dst, Address src); 1.1619 + void subss(XMMRegister dst, XMMRegister src); 1.1620 + 1.1621 + void testb(Register dst, int imm8); 1.1622 + 1.1623 + void testl(Register dst, int32_t imm32); 1.1624 + void testl(Register dst, Register src); 1.1625 + void testl(Register dst, Address src); 1.1626 + 1.1627 + void testq(Register dst, int32_t imm32); 1.1628 + void testq(Register dst, Register src); 1.1629 + 1.1630 + // BMI - count trailing zeros 1.1631 + void tzcntl(Register dst, Register src); 1.1632 + void tzcntq(Register dst, Register src); 1.1633 + 1.1634 + // Unordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS 1.1635 + void ucomisd(XMMRegister dst, Address src); 1.1636 + void ucomisd(XMMRegister dst, XMMRegister src); 1.1637 + 1.1638 + // Unordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS 1.1639 + void ucomiss(XMMRegister dst, Address src); 1.1640 + void ucomiss(XMMRegister dst, XMMRegister src); 1.1641 + 1.1642 + void xabort(int8_t imm8); 1.1643 + 1.1644 + void xaddl(Address dst, Register src); 1.1645 + 1.1646 + void xaddq(Address dst, Register src); 1.1647 + 1.1648 + void xbegin(Label& abort, relocInfo::relocType rtype = relocInfo::none); 1.1649 + 1.1650 + void xchgl(Register reg, Address adr); 1.1651 + void xchgl(Register dst, Register src); 1.1652 + 1.1653 + void xchgq(Register reg, Address adr); 1.1654 + void xchgq(Register dst, Register src); 1.1655 + 1.1656 + void xend(); 1.1657 + 1.1658 + // Get Value of Extended Control Register 1.1659 + void xgetbv(); 1.1660 + 1.1661 + void xorl(Register dst, int32_t imm32); 1.1662 + void xorl(Register dst, Address src); 1.1663 + void xorl(Register dst, Register src); 1.1664 + 1.1665 + void xorq(Register dst, Address src); 1.1666 + void xorq(Register dst, Register src); 1.1667 + 1.1668 + void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0 1.1669 + 1.1670 + // AVX 3-operands scalar instructions (encoded with VEX prefix) 1.1671 + 1.1672 + void vaddsd(XMMRegister dst, XMMRegister nds, Address src); 1.1673 + void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src); 1.1674 + void vaddss(XMMRegister dst, XMMRegister nds, Address src); 1.1675 + void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src); 1.1676 + void vdivsd(XMMRegister dst, XMMRegister nds, Address src); 1.1677 + void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src); 1.1678 + void vdivss(XMMRegister dst, XMMRegister nds, Address src); 1.1679 + void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src); 1.1680 + void vmulsd(XMMRegister dst, XMMRegister nds, Address src); 1.1681 + void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src); 1.1682 + void vmulss(XMMRegister dst, XMMRegister nds, Address src); 1.1683 + void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src); 1.1684 + void vsubsd(XMMRegister dst, XMMRegister nds, Address src); 1.1685 + void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src); 1.1686 + void vsubss(XMMRegister dst, XMMRegister nds, Address src); 1.1687 + void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src); 1.1688 + 1.1689 + 1.1690 + //====================VECTOR ARITHMETIC===================================== 1.1691 + 1.1692 + // Add Packed Floating-Point Values 1.1693 + void addpd(XMMRegister dst, XMMRegister src); 1.1694 + void addps(XMMRegister dst, XMMRegister src); 1.1695 + void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.1696 + void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.1697 + void vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.1698 + void vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.1699 + 1.1700 + // Subtract Packed Floating-Point Values 1.1701 + void subpd(XMMRegister dst, XMMRegister src); 1.1702 + void subps(XMMRegister dst, XMMRegister src); 1.1703 + void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.1704 + void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.1705 + void vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.1706 + void vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.1707 + 1.1708 + // Multiply Packed Floating-Point Values 1.1709 + void mulpd(XMMRegister dst, XMMRegister src); 1.1710 + void mulps(XMMRegister dst, XMMRegister src); 1.1711 + void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.1712 + void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.1713 + void vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.1714 + void vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.1715 + 1.1716 + // Divide Packed Floating-Point Values 1.1717 + void divpd(XMMRegister dst, XMMRegister src); 1.1718 + void divps(XMMRegister dst, XMMRegister src); 1.1719 + void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.1720 + void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.1721 + void vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.1722 + void vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.1723 + 1.1724 + // Bitwise Logical AND of Packed Floating-Point Values 1.1725 + void andpd(XMMRegister dst, XMMRegister src); 1.1726 + void andps(XMMRegister dst, XMMRegister src); 1.1727 + void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.1728 + void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.1729 + void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.1730 + void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.1731 + 1.1732 + // Bitwise Logical XOR of Packed Floating-Point Values 1.1733 + void xorpd(XMMRegister dst, XMMRegister src); 1.1734 + void xorps(XMMRegister dst, XMMRegister src); 1.1735 + void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.1736 + void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.1737 + void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.1738 + void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.1739 + 1.1740 + // Add packed integers 1.1741 + void paddb(XMMRegister dst, XMMRegister src); 1.1742 + void paddw(XMMRegister dst, XMMRegister src); 1.1743 + void paddd(XMMRegister dst, XMMRegister src); 1.1744 + void paddq(XMMRegister dst, XMMRegister src); 1.1745 + void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.1746 + void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.1747 + void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.1748 + void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.1749 + void vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.1750 + void vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.1751 + void vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.1752 + void vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.1753 + 1.1754 + // Sub packed integers 1.1755 + void psubb(XMMRegister dst, XMMRegister src); 1.1756 + void psubw(XMMRegister dst, XMMRegister src); 1.1757 + void psubd(XMMRegister dst, XMMRegister src); 1.1758 + void psubq(XMMRegister dst, XMMRegister src); 1.1759 + void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.1760 + void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.1761 + void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.1762 + void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.1763 + void vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.1764 + void vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.1765 + void vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.1766 + void vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.1767 + 1.1768 + // Multiply packed integers (only shorts and ints) 1.1769 + void pmullw(XMMRegister dst, XMMRegister src); 1.1770 + void pmulld(XMMRegister dst, XMMRegister src); 1.1771 + void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.1772 + void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.1773 + void vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.1774 + void vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.1775 + 1.1776 + // Shift left packed integers 1.1777 + void psllw(XMMRegister dst, int shift); 1.1778 + void pslld(XMMRegister dst, int shift); 1.1779 + void psllq(XMMRegister dst, int shift); 1.1780 + void psllw(XMMRegister dst, XMMRegister shift); 1.1781 + void pslld(XMMRegister dst, XMMRegister shift); 1.1782 + void psllq(XMMRegister dst, XMMRegister shift); 1.1783 + void vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256); 1.1784 + void vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256); 1.1785 + void vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256); 1.1786 + void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); 1.1787 + void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); 1.1788 + void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); 1.1789 + 1.1790 + // Logical shift right packed integers 1.1791 + void psrlw(XMMRegister dst, int shift); 1.1792 + void psrld(XMMRegister dst, int shift); 1.1793 + void psrlq(XMMRegister dst, int shift); 1.1794 + void psrlw(XMMRegister dst, XMMRegister shift); 1.1795 + void psrld(XMMRegister dst, XMMRegister shift); 1.1796 + void psrlq(XMMRegister dst, XMMRegister shift); 1.1797 + void vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256); 1.1798 + void vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256); 1.1799 + void vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256); 1.1800 + void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); 1.1801 + void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); 1.1802 + void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); 1.1803 + 1.1804 + // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs) 1.1805 + void psraw(XMMRegister dst, int shift); 1.1806 + void psrad(XMMRegister dst, int shift); 1.1807 + void psraw(XMMRegister dst, XMMRegister shift); 1.1808 + void psrad(XMMRegister dst, XMMRegister shift); 1.1809 + void vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256); 1.1810 + void vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256); 1.1811 + void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); 1.1812 + void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); 1.1813 + 1.1814 + // And packed integers 1.1815 + void pand(XMMRegister dst, XMMRegister src); 1.1816 + void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.1817 + void vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.1818 + 1.1819 + // Or packed integers 1.1820 + void por(XMMRegister dst, XMMRegister src); 1.1821 + void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.1822 + void vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.1823 + 1.1824 + // Xor packed integers 1.1825 + void pxor(XMMRegister dst, XMMRegister src); 1.1826 + void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.1827 + void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.1828 + 1.1829 + // Copy low 128bit into high 128bit of YMM registers. 1.1830 + void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src); 1.1831 + void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src); 1.1832 + 1.1833 + // Load/store high 128bit of YMM registers which does not destroy other half. 1.1834 + void vinsertf128h(XMMRegister dst, Address src); 1.1835 + void vinserti128h(XMMRegister dst, Address src); 1.1836 + void vextractf128h(Address dst, XMMRegister src); 1.1837 + void vextracti128h(Address dst, XMMRegister src); 1.1838 + 1.1839 + // duplicate 4-bytes integer data from src into 8 locations in dest 1.1840 + void vpbroadcastd(XMMRegister dst, XMMRegister src); 1.1841 + 1.1842 + // Carry-Less Multiplication Quadword 1.1843 + void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask); 1.1844 + 1.1845 + // AVX instruction which is used to clear upper 128 bits of YMM registers and 1.1846 + // to avoid transaction penalty between AVX and SSE states. There is no 1.1847 + // penalty if legacy SSE instructions are encoded using VEX prefix because 1.1848 + // they always clear upper 128 bits. It should be used before calling 1.1849 + // runtime code and native libraries. 1.1850 + void vzeroupper(); 1.1851 + 1.1852 + protected: 1.1853 + // Next instructions require address alignment 16 bytes SSE mode. 1.1854 + // They should be called only from corresponding MacroAssembler instructions. 1.1855 + void andpd(XMMRegister dst, Address src); 1.1856 + void andps(XMMRegister dst, Address src); 1.1857 + void xorpd(XMMRegister dst, Address src); 1.1858 + void xorps(XMMRegister dst, Address src); 1.1859 + 1.1860 +}; 1.1861 + 1.1862 +#endif // CPU_X86_VM_ASSEMBLER_X86_HPP