1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/cpu/x86/vm/assembler_x86.hpp Wed Aug 27 00:21:55 2008 -0700 1.3 @@ -0,0 +1,2044 @@ 1.4 +/* 1.5 + * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. 1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 + * 1.8 + * This code is free software; you can redistribute it and/or modify it 1.9 + * under the terms of the GNU General Public License version 2 only, as 1.10 + * published by the Free Software Foundation. 1.11 + * 1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.15 + * version 2 for more details (a copy is included in the LICENSE file that 1.16 + * accompanied this code). 1.17 + * 1.18 + * You should have received a copy of the GNU General Public License version 1.19 + * 2 along with this work; if not, write to the Free Software Foundation, 1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.21 + * 1.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 1.23 + * CA 95054 USA or visit www.sun.com if you need additional information or 1.24 + * have any questions. 1.25 + * 1.26 + */ 1.27 + 1.28 +class BiasedLockingCounters; 1.29 + 1.30 +// Contains all the definitions needed for x86 assembly code generation. 1.31 + 1.32 +// Calling convention 1.33 +class Argument VALUE_OBJ_CLASS_SPEC { 1.34 + public: 1.35 + enum { 1.36 +#ifdef _LP64 1.37 +#ifdef _WIN64 1.38 + n_int_register_parameters_c = 4, // rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...) 1.39 + n_float_register_parameters_c = 4, // xmm0 - xmm3 (c_farg0, c_farg1, ... ) 1.40 +#else 1.41 + n_int_register_parameters_c = 6, // rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...) 1.42 + n_float_register_parameters_c = 8, // xmm0 - xmm7 (c_farg0, c_farg1, ... ) 1.43 +#endif // _WIN64 1.44 + n_int_register_parameters_j = 6, // j_rarg0, j_rarg1, ... 1.45 + n_float_register_parameters_j = 8 // j_farg0, j_farg1, ... 1.46 +#else 1.47 + n_register_parameters = 0 // 0 registers used to pass arguments 1.48 +#endif // _LP64 1.49 + }; 1.50 +}; 1.51 + 1.52 + 1.53 +#ifdef _LP64 1.54 +// Symbolically name the register arguments used by the c calling convention. 1.55 +// Windows is different from linux/solaris. So much for standards... 1.56 + 1.57 +#ifdef _WIN64 1.58 + 1.59 +REGISTER_DECLARATION(Register, c_rarg0, rcx); 1.60 +REGISTER_DECLARATION(Register, c_rarg1, rdx); 1.61 +REGISTER_DECLARATION(Register, c_rarg2, r8); 1.62 +REGISTER_DECLARATION(Register, c_rarg3, r9); 1.63 + 1.64 +REGISTER_DECLARATION(XMMRegister, c_farg0, xmm0); 1.65 +REGISTER_DECLARATION(XMMRegister, c_farg1, xmm1); 1.66 +REGISTER_DECLARATION(XMMRegister, c_farg2, xmm2); 1.67 +REGISTER_DECLARATION(XMMRegister, c_farg3, xmm3); 1.68 + 1.69 +#else 1.70 + 1.71 +REGISTER_DECLARATION(Register, c_rarg0, rdi); 1.72 +REGISTER_DECLARATION(Register, c_rarg1, rsi); 1.73 +REGISTER_DECLARATION(Register, c_rarg2, rdx); 1.74 +REGISTER_DECLARATION(Register, c_rarg3, rcx); 1.75 +REGISTER_DECLARATION(Register, c_rarg4, r8); 1.76 +REGISTER_DECLARATION(Register, c_rarg5, r9); 1.77 + 1.78 +REGISTER_DECLARATION(XMMRegister, c_farg0, xmm0); 1.79 +REGISTER_DECLARATION(XMMRegister, c_farg1, xmm1); 1.80 +REGISTER_DECLARATION(XMMRegister, c_farg2, xmm2); 1.81 +REGISTER_DECLARATION(XMMRegister, c_farg3, xmm3); 1.82 +REGISTER_DECLARATION(XMMRegister, c_farg4, xmm4); 1.83 +REGISTER_DECLARATION(XMMRegister, c_farg5, xmm5); 1.84 +REGISTER_DECLARATION(XMMRegister, c_farg6, xmm6); 1.85 +REGISTER_DECLARATION(XMMRegister, c_farg7, xmm7); 1.86 + 1.87 +#endif // _WIN64 1.88 + 1.89 +// Symbolically name the register arguments used by the Java calling convention. 1.90 +// We have control over the convention for java so we can do what we please. 1.91 +// What pleases us is to offset the java calling convention so that when 1.92 +// we call a suitable jni method the arguments are lined up and we don't 1.93 +// have to do little shuffling. A suitable jni method is non-static and a 1.94 +// small number of arguments (two fewer args on windows) 1.95 +// 1.96 +// |-------------------------------------------------------| 1.97 +// | c_rarg0 c_rarg1 c_rarg2 c_rarg3 c_rarg4 c_rarg5 | 1.98 +// |-------------------------------------------------------| 1.99 +// | rcx rdx r8 r9 rdi* rsi* | windows (* not a c_rarg) 1.100 +// | rdi rsi rdx rcx r8 r9 | solaris/linux 1.101 +// |-------------------------------------------------------| 1.102 +// | j_rarg5 j_rarg0 j_rarg1 j_rarg2 j_rarg3 j_rarg4 | 1.103 +// |-------------------------------------------------------| 1.104 + 1.105 +REGISTER_DECLARATION(Register, j_rarg0, c_rarg1); 1.106 +REGISTER_DECLARATION(Register, j_rarg1, c_rarg2); 1.107 +REGISTER_DECLARATION(Register, j_rarg2, c_rarg3); 1.108 +// Windows runs out of register args here 1.109 +#ifdef _WIN64 1.110 +REGISTER_DECLARATION(Register, j_rarg3, rdi); 1.111 +REGISTER_DECLARATION(Register, j_rarg4, rsi); 1.112 +#else 1.113 +REGISTER_DECLARATION(Register, j_rarg3, c_rarg4); 1.114 +REGISTER_DECLARATION(Register, j_rarg4, c_rarg5); 1.115 +#endif /* _WIN64 */ 1.116 +REGISTER_DECLARATION(Register, j_rarg5, c_rarg0); 1.117 + 1.118 +REGISTER_DECLARATION(XMMRegister, j_farg0, xmm0); 1.119 +REGISTER_DECLARATION(XMMRegister, j_farg1, xmm1); 1.120 +REGISTER_DECLARATION(XMMRegister, j_farg2, xmm2); 1.121 +REGISTER_DECLARATION(XMMRegister, j_farg3, xmm3); 1.122 +REGISTER_DECLARATION(XMMRegister, j_farg4, xmm4); 1.123 +REGISTER_DECLARATION(XMMRegister, j_farg5, xmm5); 1.124 +REGISTER_DECLARATION(XMMRegister, j_farg6, xmm6); 1.125 +REGISTER_DECLARATION(XMMRegister, j_farg7, xmm7); 1.126 + 1.127 +REGISTER_DECLARATION(Register, rscratch1, r10); // volatile 1.128 +REGISTER_DECLARATION(Register, rscratch2, r11); // volatile 1.129 + 1.130 +REGISTER_DECLARATION(Register, r12_heapbase, r12); // callee-saved 1.131 +REGISTER_DECLARATION(Register, r15_thread, r15); // callee-saved 1.132 + 1.133 +#else 1.134 +// rscratch1 will apear in 32bit code that is dead but of course must compile 1.135 +// Using noreg ensures if the dead code is incorrectly live and executed it 1.136 +// will cause an assertion failure 1.137 +#define rscratch1 noreg 1.138 + 1.139 +#endif // _LP64 1.140 + 1.141 +// Address is an abstraction used to represent a memory location 1.142 +// using any of the amd64 addressing modes with one object. 1.143 +// 1.144 +// Note: A register location is represented via a Register, not 1.145 +// via an address for efficiency & simplicity reasons. 1.146 + 1.147 +class ArrayAddress; 1.148 + 1.149 +class Address VALUE_OBJ_CLASS_SPEC { 1.150 + public: 1.151 + enum ScaleFactor { 1.152 + no_scale = -1, 1.153 + times_1 = 0, 1.154 + times_2 = 1, 1.155 + times_4 = 2, 1.156 + times_8 = 3, 1.157 + times_ptr = LP64_ONLY(times_8) NOT_LP64(times_4) 1.158 + }; 1.159 + 1.160 + private: 1.161 + Register _base; 1.162 + Register _index; 1.163 + ScaleFactor _scale; 1.164 + int _disp; 1.165 + RelocationHolder _rspec; 1.166 + 1.167 + // Easily misused constructors make them private 1.168 + // %%% can we make these go away? 1.169 + NOT_LP64(Address(address loc, RelocationHolder spec);) 1.170 + Address(int disp, address loc, relocInfo::relocType rtype); 1.171 + Address(int disp, address loc, RelocationHolder spec); 1.172 + 1.173 + public: 1.174 + 1.175 + int disp() { return _disp; } 1.176 + // creation 1.177 + Address() 1.178 + : _base(noreg), 1.179 + _index(noreg), 1.180 + _scale(no_scale), 1.181 + _disp(0) { 1.182 + } 1.183 + 1.184 + // No default displacement otherwise Register can be implicitly 1.185 + // converted to 0(Register) which is quite a different animal. 1.186 + 1.187 + Address(Register base, int disp) 1.188 + : _base(base), 1.189 + _index(noreg), 1.190 + _scale(no_scale), 1.191 + _disp(disp) { 1.192 + } 1.193 + 1.194 + Address(Register base, Register index, ScaleFactor scale, int disp = 0) 1.195 + : _base (base), 1.196 + _index(index), 1.197 + _scale(scale), 1.198 + _disp (disp) { 1.199 + assert(!index->is_valid() == (scale == Address::no_scale), 1.200 + "inconsistent address"); 1.201 + } 1.202 + 1.203 + // The following two overloads are used in connection with the 1.204 + // ByteSize type (see sizes.hpp). They simplify the use of 1.205 + // ByteSize'd arguments in assembly code. Note that their equivalent 1.206 + // for the optimized build are the member functions with int disp 1.207 + // argument since ByteSize is mapped to an int type in that case. 1.208 + // 1.209 + // Note: DO NOT introduce similar overloaded functions for WordSize 1.210 + // arguments as in the optimized mode, both ByteSize and WordSize 1.211 + // are mapped to the same type and thus the compiler cannot make a 1.212 + // distinction anymore (=> compiler errors). 1.213 + 1.214 +#ifdef ASSERT 1.215 + Address(Register base, ByteSize disp) 1.216 + : _base(base), 1.217 + _index(noreg), 1.218 + _scale(no_scale), 1.219 + _disp(in_bytes(disp)) { 1.220 + } 1.221 + 1.222 + Address(Register base, Register index, ScaleFactor scale, ByteSize disp) 1.223 + : _base(base), 1.224 + _index(index), 1.225 + _scale(scale), 1.226 + _disp(in_bytes(disp)) { 1.227 + assert(!index->is_valid() == (scale == Address::no_scale), 1.228 + "inconsistent address"); 1.229 + } 1.230 +#endif // ASSERT 1.231 + 1.232 + // accessors 1.233 + bool uses(Register reg) const { 1.234 + return _base == reg || _index == reg; 1.235 + } 1.236 + 1.237 + // Convert the raw encoding form into the form expected by the constructor for 1.238 + // Address. An index of 4 (rsp) corresponds to having no index, so convert 1.239 + // that to noreg for the Address constructor. 1.240 + static Address make_raw(int base, int index, int scale, int disp); 1.241 + 1.242 + static Address make_array(ArrayAddress); 1.243 + 1.244 + 1.245 + private: 1.246 + bool base_needs_rex() const { 1.247 + return _base != noreg && _base->encoding() >= 8; 1.248 + } 1.249 + 1.250 + bool index_needs_rex() const { 1.251 + return _index != noreg &&_index->encoding() >= 8; 1.252 + } 1.253 + 1.254 + relocInfo::relocType reloc() const { return _rspec.type(); } 1.255 + 1.256 + friend class Assembler; 1.257 + friend class MacroAssembler; 1.258 + friend class LIR_Assembler; // base/index/scale/disp 1.259 +}; 1.260 + 1.261 +// 1.262 +// AddressLiteral has been split out from Address because operands of this type 1.263 +// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out 1.264 +// the few instructions that need to deal with address literals are unique and the 1.265 +// MacroAssembler does not have to implement every instruction in the Assembler 1.266 +// in order to search for address literals that may need special handling depending 1.267 +// on the instruction and the platform. As small step on the way to merging i486/amd64 1.268 +// directories. 1.269 +// 1.270 +class AddressLiteral VALUE_OBJ_CLASS_SPEC { 1.271 + friend class ArrayAddress; 1.272 + RelocationHolder _rspec; 1.273 + // Typically we use AddressLiterals we want to use their rval 1.274 + // However in some situations we want the lval (effect address) of the item. 1.275 + // We provide a special factory for making those lvals. 1.276 + bool _is_lval; 1.277 + 1.278 + // If the target is far we'll need to load the ea of this to 1.279 + // a register to reach it. Otherwise if near we can do rip 1.280 + // relative addressing. 1.281 + 1.282 + address _target; 1.283 + 1.284 + protected: 1.285 + // creation 1.286 + AddressLiteral() 1.287 + : _is_lval(false), 1.288 + _target(NULL) 1.289 + {} 1.290 + 1.291 + public: 1.292 + 1.293 + 1.294 + AddressLiteral(address target, relocInfo::relocType rtype); 1.295 + 1.296 + AddressLiteral(address target, RelocationHolder const& rspec) 1.297 + : _rspec(rspec), 1.298 + _is_lval(false), 1.299 + _target(target) 1.300 + {} 1.301 + 1.302 + AddressLiteral addr() { 1.303 + AddressLiteral ret = *this; 1.304 + ret._is_lval = true; 1.305 + return ret; 1.306 + } 1.307 + 1.308 + 1.309 + private: 1.310 + 1.311 + address target() { return _target; } 1.312 + bool is_lval() { return _is_lval; } 1.313 + 1.314 + relocInfo::relocType reloc() const { return _rspec.type(); } 1.315 + const RelocationHolder& rspec() const { return _rspec; } 1.316 + 1.317 + friend class Assembler; 1.318 + friend class MacroAssembler; 1.319 + friend class Address; 1.320 + friend class LIR_Assembler; 1.321 +}; 1.322 + 1.323 +// Convience classes 1.324 +class RuntimeAddress: public AddressLiteral { 1.325 + 1.326 + public: 1.327 + 1.328 + RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {} 1.329 + 1.330 +}; 1.331 + 1.332 +class OopAddress: public AddressLiteral { 1.333 + 1.334 + public: 1.335 + 1.336 + OopAddress(address target) : AddressLiteral(target, relocInfo::oop_type){} 1.337 + 1.338 +}; 1.339 + 1.340 +class ExternalAddress: public AddressLiteral { 1.341 + 1.342 + public: 1.343 + 1.344 + ExternalAddress(address target) : AddressLiteral(target, relocInfo::external_word_type){} 1.345 + 1.346 +}; 1.347 + 1.348 +class InternalAddress: public AddressLiteral { 1.349 + 1.350 + public: 1.351 + 1.352 + InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {} 1.353 + 1.354 +}; 1.355 + 1.356 +// x86 can do array addressing as a single operation since disp can be an absolute 1.357 +// address amd64 can't. We create a class that expresses the concept but does extra 1.358 +// magic on amd64 to get the final result 1.359 + 1.360 +class ArrayAddress VALUE_OBJ_CLASS_SPEC { 1.361 + private: 1.362 + 1.363 + AddressLiteral _base; 1.364 + Address _index; 1.365 + 1.366 + public: 1.367 + 1.368 + ArrayAddress() {}; 1.369 + ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {}; 1.370 + AddressLiteral base() { return _base; } 1.371 + Address index() { return _index; } 1.372 + 1.373 +}; 1.374 + 1.375 +const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY( 512 / wordSize); 1.376 + 1.377 +// The Intel x86/Amd64 Assembler: Pure assembler doing NO optimizations on the instruction 1.378 +// level (e.g. mov rax, 0 is not translated into xor rax, rax!); i.e., what you write 1.379 +// is what you get. The Assembler is generating code into a CodeBuffer. 1.380 + 1.381 +class Assembler : public AbstractAssembler { 1.382 + friend class AbstractAssembler; // for the non-virtual hack 1.383 + friend class LIR_Assembler; // as_Address() 1.384 + friend class StubGenerator; 1.385 + 1.386 + public: 1.387 + enum Condition { // The x86 condition codes used for conditional jumps/moves. 1.388 + zero = 0x4, 1.389 + notZero = 0x5, 1.390 + equal = 0x4, 1.391 + notEqual = 0x5, 1.392 + less = 0xc, 1.393 + lessEqual = 0xe, 1.394 + greater = 0xf, 1.395 + greaterEqual = 0xd, 1.396 + below = 0x2, 1.397 + belowEqual = 0x6, 1.398 + above = 0x7, 1.399 + aboveEqual = 0x3, 1.400 + overflow = 0x0, 1.401 + noOverflow = 0x1, 1.402 + carrySet = 0x2, 1.403 + carryClear = 0x3, 1.404 + negative = 0x8, 1.405 + positive = 0x9, 1.406 + parity = 0xa, 1.407 + noParity = 0xb 1.408 + }; 1.409 + 1.410 + enum Prefix { 1.411 + // segment overrides 1.412 + CS_segment = 0x2e, 1.413 + SS_segment = 0x36, 1.414 + DS_segment = 0x3e, 1.415 + ES_segment = 0x26, 1.416 + FS_segment = 0x64, 1.417 + GS_segment = 0x65, 1.418 + 1.419 + REX = 0x40, 1.420 + 1.421 + REX_B = 0x41, 1.422 + REX_X = 0x42, 1.423 + REX_XB = 0x43, 1.424 + REX_R = 0x44, 1.425 + REX_RB = 0x45, 1.426 + REX_RX = 0x46, 1.427 + REX_RXB = 0x47, 1.428 + 1.429 + REX_W = 0x48, 1.430 + 1.431 + REX_WB = 0x49, 1.432 + REX_WX = 0x4A, 1.433 + REX_WXB = 0x4B, 1.434 + REX_WR = 0x4C, 1.435 + REX_WRB = 0x4D, 1.436 + REX_WRX = 0x4E, 1.437 + REX_WRXB = 0x4F 1.438 + }; 1.439 + 1.440 + enum WhichOperand { 1.441 + // input to locate_operand, and format code for relocations 1.442 + imm_operand = 0, // embedded 32-bit|64-bit immediate operand 1.443 + disp32_operand = 1, // embedded 32-bit displacement or address 1.444 + call32_operand = 2, // embedded 32-bit self-relative displacement 1.445 +#ifndef _LP64 1.446 + _WhichOperand_limit = 3 1.447 +#else 1.448 + narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop 1.449 + _WhichOperand_limit = 4 1.450 +#endif 1.451 + }; 1.452 + 1.453 + 1.454 + 1.455 + // NOTE: The general philopsophy of the declarations here is that 64bit versions 1.456 + // of instructions are freely declared without the need for wrapping them an ifdef. 1.457 + // (Some dangerous instructions are ifdef's out of inappropriate jvm's.) 1.458 + // In the .cpp file the implementations are wrapped so that they are dropped out 1.459 + // of the resulting jvm. This is done mostly to keep the footprint of KERNEL 1.460 + // to the size it was prior to merging up the 32bit and 64bit assemblers. 1.461 + // 1.462 + // This does mean you'll get a linker/runtime error if you use a 64bit only instruction 1.463 + // in a 32bit vm. This is somewhat unfortunate but keeps the ifdef noise down. 1.464 + 1.465 +private: 1.466 + 1.467 + 1.468 + // 64bit prefixes 1.469 + int prefix_and_encode(int reg_enc, bool byteinst = false); 1.470 + int prefixq_and_encode(int reg_enc); 1.471 + 1.472 + int prefix_and_encode(int dst_enc, int src_enc, bool byteinst = false); 1.473 + int prefixq_and_encode(int dst_enc, int src_enc); 1.474 + 1.475 + void prefix(Register reg); 1.476 + void prefix(Address adr); 1.477 + void prefixq(Address adr); 1.478 + 1.479 + void prefix(Address adr, Register reg, bool byteinst = false); 1.480 + void prefixq(Address adr, Register reg); 1.481 + 1.482 + void prefix(Address adr, XMMRegister reg); 1.483 + 1.484 + void prefetch_prefix(Address src); 1.485 + 1.486 + // Helper functions for groups of instructions 1.487 + void emit_arith_b(int op1, int op2, Register dst, int imm8); 1.488 + 1.489 + void emit_arith(int op1, int op2, Register dst, int32_t imm32); 1.490 + // only 32bit?? 1.491 + void emit_arith(int op1, int op2, Register dst, jobject obj); 1.492 + void emit_arith(int op1, int op2, Register dst, Register src); 1.493 + 1.494 + void emit_operand(Register reg, 1.495 + Register base, Register index, Address::ScaleFactor scale, 1.496 + int disp, 1.497 + RelocationHolder const& rspec, 1.498 + int rip_relative_correction = 0); 1.499 + 1.500 + void emit_operand(Register reg, Address adr, int rip_relative_correction = 0); 1.501 + 1.502 + // operands that only take the original 32bit registers 1.503 + void emit_operand32(Register reg, Address adr); 1.504 + 1.505 + void emit_operand(XMMRegister reg, 1.506 + Register base, Register index, Address::ScaleFactor scale, 1.507 + int disp, 1.508 + RelocationHolder const& rspec); 1.509 + 1.510 + void emit_operand(XMMRegister reg, Address adr); 1.511 + 1.512 + void emit_operand(MMXRegister reg, Address adr); 1.513 + 1.514 + // workaround gcc (3.2.1-7) bug 1.515 + void emit_operand(Address adr, MMXRegister reg); 1.516 + 1.517 + 1.518 + // Immediate-to-memory forms 1.519 + void emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32); 1.520 + 1.521 + void emit_farith(int b1, int b2, int i); 1.522 + 1.523 + 1.524 + protected: 1.525 + #ifdef ASSERT 1.526 + void check_relocation(RelocationHolder const& rspec, int format); 1.527 + #endif 1.528 + 1.529 + inline void emit_long64(jlong x); 1.530 + 1.531 + void emit_data(jint data, relocInfo::relocType rtype, int format); 1.532 + void emit_data(jint data, RelocationHolder const& rspec, int format); 1.533 + void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0); 1.534 + void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0); 1.535 + 1.536 + 1.537 + bool reachable(AddressLiteral adr) NOT_LP64({ return true;}); 1.538 + 1.539 + // These are all easily abused and hence protected 1.540 + 1.541 + void mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec, int format = 0); 1.542 + 1.543 + // 32BIT ONLY SECTION 1.544 +#ifndef _LP64 1.545 + // Make these disappear in 64bit mode since they would never be correct 1.546 + void cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY 1.547 + void cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY 1.548 + 1.549 + void mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY 1.550 + 1.551 + void push_literal32(int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY 1.552 +#else 1.553 + // 64BIT ONLY SECTION 1.554 + void mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec); // 64BIT ONLY 1.555 +#endif // _LP64 1.556 + 1.557 + // These are unique in that we are ensured by the caller that the 32bit 1.558 + // relative in these instructions will always be able to reach the potentially 1.559 + // 64bit address described by entry. Since they can take a 64bit address they 1.560 + // don't have the 32 suffix like the other instructions in this class. 1.561 + 1.562 + void call_literal(address entry, RelocationHolder const& rspec); 1.563 + void jmp_literal(address entry, RelocationHolder const& rspec); 1.564 + 1.565 + // Avoid using directly section 1.566 + // Instructions in this section are actually usable by anyone without danger 1.567 + // of failure but have performance issues that are addressed my enhanced 1.568 + // instructions which will do the proper thing base on the particular cpu. 1.569 + // We protect them because we don't trust you... 1.570 + 1.571 + // Don't use next inc() and dec() methods directly. INC & DEC instructions 1.572 + // could cause a partial flag stall since they don't set CF flag. 1.573 + // Use MacroAssembler::decrement() & MacroAssembler::increment() methods 1.574 + // which call inc() & dec() or add() & sub() in accordance with 1.575 + // the product flag UseIncDec value. 1.576 + 1.577 + void decl(Register dst); 1.578 + void decl(Address dst); 1.579 + void decq(Register dst); 1.580 + void decq(Address dst); 1.581 + 1.582 + void incl(Register dst); 1.583 + void incl(Address dst); 1.584 + void incq(Register dst); 1.585 + void incq(Address dst); 1.586 + 1.587 + // New cpus require use of movsd and movss to avoid partial register stall 1.588 + // when loading from memory. But for old Opteron use movlpd instead of movsd. 1.589 + // The selection is done in MacroAssembler::movdbl() and movflt(). 1.590 + 1.591 + // Move Scalar Single-Precision Floating-Point Values 1.592 + void movss(XMMRegister dst, Address src); 1.593 + void movss(XMMRegister dst, XMMRegister src); 1.594 + void movss(Address dst, XMMRegister src); 1.595 + 1.596 + // Move Scalar Double-Precision Floating-Point Values 1.597 + void movsd(XMMRegister dst, Address src); 1.598 + void movsd(XMMRegister dst, XMMRegister src); 1.599 + void movsd(Address dst, XMMRegister src); 1.600 + void movlpd(XMMRegister dst, Address src); 1.601 + 1.602 + // New cpus require use of movaps and movapd to avoid partial register stall 1.603 + // when moving between registers. 1.604 + void movaps(XMMRegister dst, XMMRegister src); 1.605 + void movapd(XMMRegister dst, XMMRegister src); 1.606 + 1.607 + // End avoid using directly 1.608 + 1.609 + 1.610 + // Instruction prefixes 1.611 + void prefix(Prefix p); 1.612 + 1.613 + public: 1.614 + 1.615 + // Creation 1.616 + Assembler(CodeBuffer* code) : AbstractAssembler(code) {} 1.617 + 1.618 + // Decoding 1.619 + static address locate_operand(address inst, WhichOperand which); 1.620 + static address locate_next_instruction(address inst); 1.621 + 1.622 + // Utilities 1.623 + 1.624 +#ifdef _LP64 1.625 + static bool is_simm(int64_t x, int nbits) { return -( CONST64(1) << (nbits-1) ) <= x && x < ( CONST64(1) << (nbits-1) ); } 1.626 + static bool is_simm32(int64_t x) { return x == (int64_t)(int32_t)x; } 1.627 +#else 1.628 + static bool is_simm(int32_t x, int nbits) { return -( 1 << (nbits-1) ) <= x && x < ( 1 << (nbits-1) ); } 1.629 + static bool is_simm32(int32_t x) { return true; } 1.630 +#endif // LP64 1.631 + 1.632 + // Generic instructions 1.633 + // Does 32bit or 64bit as needed for the platform. In some sense these 1.634 + // belong in macro assembler but there is no need for both varieties to exist 1.635 + 1.636 + void lea(Register dst, Address src); 1.637 + 1.638 + void mov(Register dst, Register src); 1.639 + 1.640 + void pusha(); 1.641 + void popa(); 1.642 + 1.643 + void pushf(); 1.644 + void popf(); 1.645 + 1.646 + void push(int32_t imm32); 1.647 + 1.648 + void push(Register src); 1.649 + 1.650 + void pop(Register dst); 1.651 + 1.652 + // These are dummies to prevent surprise implicit conversions to Register 1.653 + void push(void* v); 1.654 + void pop(void* v); 1.655 + 1.656 + 1.657 + // These do register sized moves/scans 1.658 + void rep_mov(); 1.659 + void rep_set(); 1.660 + void repne_scan(); 1.661 +#ifdef _LP64 1.662 + void repne_scanl(); 1.663 +#endif 1.664 + 1.665 + // Vanilla instructions in lexical order 1.666 + 1.667 + void adcl(Register dst, int32_t imm32); 1.668 + void adcl(Register dst, Address src); 1.669 + void adcl(Register dst, Register src); 1.670 + 1.671 + void adcq(Register dst, int32_t imm32); 1.672 + void adcq(Register dst, Address src); 1.673 + void adcq(Register dst, Register src); 1.674 + 1.675 + 1.676 + void addl(Address dst, int32_t imm32); 1.677 + void addl(Address dst, Register src); 1.678 + void addl(Register dst, int32_t imm32); 1.679 + void addl(Register dst, Address src); 1.680 + void addl(Register dst, Register src); 1.681 + 1.682 + void addq(Address dst, int32_t imm32); 1.683 + void addq(Address dst, Register src); 1.684 + void addq(Register dst, int32_t imm32); 1.685 + void addq(Register dst, Address src); 1.686 + void addq(Register dst, Register src); 1.687 + 1.688 + 1.689 + void addr_nop_4(); 1.690 + void addr_nop_5(); 1.691 + void addr_nop_7(); 1.692 + void addr_nop_8(); 1.693 + 1.694 + // Add Scalar Double-Precision Floating-Point Values 1.695 + void addsd(XMMRegister dst, Address src); 1.696 + void addsd(XMMRegister dst, XMMRegister src); 1.697 + 1.698 + // Add Scalar Single-Precision Floating-Point Values 1.699 + void addss(XMMRegister dst, Address src); 1.700 + void addss(XMMRegister dst, XMMRegister src); 1.701 + 1.702 + void andl(Register dst, int32_t imm32); 1.703 + void andl(Register dst, Address src); 1.704 + void andl(Register dst, Register src); 1.705 + 1.706 + void andq(Register dst, int32_t imm32); 1.707 + void andq(Register dst, Address src); 1.708 + void andq(Register dst, Register src); 1.709 + 1.710 + 1.711 + // Bitwise Logical AND of Packed Double-Precision Floating-Point Values 1.712 + void andpd(XMMRegister dst, Address src); 1.713 + void andpd(XMMRegister dst, XMMRegister src); 1.714 + 1.715 + void bswapl(Register reg); 1.716 + 1.717 + void bswapq(Register reg); 1.718 + 1.719 + void call(Label& L, relocInfo::relocType rtype); 1.720 + void call(Register reg); // push pc; pc <- reg 1.721 + void call(Address adr); // push pc; pc <- adr 1.722 + 1.723 + void cdql(); 1.724 + 1.725 + void cdqq(); 1.726 + 1.727 + void cld() { emit_byte(0xfc); } 1.728 + 1.729 + void clflush(Address adr); 1.730 + 1.731 + void cmovl(Condition cc, Register dst, Register src); 1.732 + void cmovl(Condition cc, Register dst, Address src); 1.733 + 1.734 + void cmovq(Condition cc, Register dst, Register src); 1.735 + void cmovq(Condition cc, Register dst, Address src); 1.736 + 1.737 + 1.738 + void cmpb(Address dst, int imm8); 1.739 + 1.740 + void cmpl(Address dst, int32_t imm32); 1.741 + 1.742 + void cmpl(Register dst, int32_t imm32); 1.743 + void cmpl(Register dst, Register src); 1.744 + void cmpl(Register dst, Address src); 1.745 + 1.746 + void cmpq(Address dst, int32_t imm32); 1.747 + void cmpq(Address dst, Register src); 1.748 + 1.749 + void cmpq(Register dst, int32_t imm32); 1.750 + void cmpq(Register dst, Register src); 1.751 + void cmpq(Register dst, Address src); 1.752 + 1.753 + // these are dummies used to catch attempting to convert NULL to Register 1.754 + void cmpl(Register dst, void* junk); // dummy 1.755 + void cmpq(Register dst, void* junk); // dummy 1.756 + 1.757 + void cmpw(Address dst, int imm16); 1.758 + 1.759 + void cmpxchg8 (Address adr); 1.760 + 1.761 + void cmpxchgl(Register reg, Address adr); 1.762 + 1.763 + void cmpxchgq(Register reg, Address adr); 1.764 + 1.765 + // Ordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS 1.766 + void comisd(XMMRegister dst, Address src); 1.767 + 1.768 + // Ordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS 1.769 + void comiss(XMMRegister dst, Address src); 1.770 + 1.771 + // Identify processor type and features 1.772 + void cpuid() { 1.773 + emit_byte(0x0F); 1.774 + emit_byte(0xA2); 1.775 + } 1.776 + 1.777 + // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value 1.778 + void cvtsd2ss(XMMRegister dst, XMMRegister src); 1.779 + 1.780 + // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value 1.781 + void cvtsi2sdl(XMMRegister dst, Register src); 1.782 + void cvtsi2sdq(XMMRegister dst, Register src); 1.783 + 1.784 + // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value 1.785 + void cvtsi2ssl(XMMRegister dst, Register src); 1.786 + void cvtsi2ssq(XMMRegister dst, Register src); 1.787 + 1.788 + // Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value 1.789 + void cvtdq2pd(XMMRegister dst, XMMRegister src); 1.790 + 1.791 + // Convert Packed Signed Doubleword Integers to Packed Single-Precision Floating-Point Value 1.792 + void cvtdq2ps(XMMRegister dst, XMMRegister src); 1.793 + 1.794 + // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value 1.795 + void cvtss2sd(XMMRegister dst, XMMRegister src); 1.796 + 1.797 + // Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer 1.798 + void cvttsd2sil(Register dst, Address src); 1.799 + void cvttsd2sil(Register dst, XMMRegister src); 1.800 + void cvttsd2siq(Register dst, XMMRegister src); 1.801 + 1.802 + // Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer 1.803 + void cvttss2sil(Register dst, XMMRegister src); 1.804 + void cvttss2siq(Register dst, XMMRegister src); 1.805 + 1.806 + // Divide Scalar Double-Precision Floating-Point Values 1.807 + void divsd(XMMRegister dst, Address src); 1.808 + void divsd(XMMRegister dst, XMMRegister src); 1.809 + 1.810 + // Divide Scalar Single-Precision Floating-Point Values 1.811 + void divss(XMMRegister dst, Address src); 1.812 + void divss(XMMRegister dst, XMMRegister src); 1.813 + 1.814 + void emms(); 1.815 + 1.816 + void fabs(); 1.817 + 1.818 + void fadd(int i); 1.819 + 1.820 + void fadd_d(Address src); 1.821 + void fadd_s(Address src); 1.822 + 1.823 + // "Alternate" versions of x87 instructions place result down in FPU 1.824 + // stack instead of on TOS 1.825 + 1.826 + void fadda(int i); // "alternate" fadd 1.827 + void faddp(int i = 1); 1.828 + 1.829 + void fchs(); 1.830 + 1.831 + void fcom(int i); 1.832 + 1.833 + void fcomp(int i = 1); 1.834 + void fcomp_d(Address src); 1.835 + void fcomp_s(Address src); 1.836 + 1.837 + void fcompp(); 1.838 + 1.839 + void fcos(); 1.840 + 1.841 + void fdecstp(); 1.842 + 1.843 + void fdiv(int i); 1.844 + void fdiv_d(Address src); 1.845 + void fdivr_s(Address src); 1.846 + void fdiva(int i); // "alternate" fdiv 1.847 + void fdivp(int i = 1); 1.848 + 1.849 + void fdivr(int i); 1.850 + void fdivr_d(Address src); 1.851 + void fdiv_s(Address src); 1.852 + 1.853 + void fdivra(int i); // "alternate" reversed fdiv 1.854 + 1.855 + void fdivrp(int i = 1); 1.856 + 1.857 + void ffree(int i = 0); 1.858 + 1.859 + void fild_d(Address adr); 1.860 + void fild_s(Address adr); 1.861 + 1.862 + void fincstp(); 1.863 + 1.864 + void finit(); 1.865 + 1.866 + void fist_s (Address adr); 1.867 + void fistp_d(Address adr); 1.868 + void fistp_s(Address adr); 1.869 + 1.870 + void fld1(); 1.871 + 1.872 + void fld_d(Address adr); 1.873 + void fld_s(Address adr); 1.874 + void fld_s(int index); 1.875 + void fld_x(Address adr); // extended-precision (80-bit) format 1.876 + 1.877 + void fldcw(Address src); 1.878 + 1.879 + void fldenv(Address src); 1.880 + 1.881 + void fldlg2(); 1.882 + 1.883 + void fldln2(); 1.884 + 1.885 + void fldz(); 1.886 + 1.887 + void flog(); 1.888 + void flog10(); 1.889 + 1.890 + void fmul(int i); 1.891 + 1.892 + void fmul_d(Address src); 1.893 + void fmul_s(Address src); 1.894 + 1.895 + void fmula(int i); // "alternate" fmul 1.896 + 1.897 + void fmulp(int i = 1); 1.898 + 1.899 + void fnsave(Address dst); 1.900 + 1.901 + void fnstcw(Address src); 1.902 + 1.903 + void fnstsw_ax(); 1.904 + 1.905 + void fprem(); 1.906 + void fprem1(); 1.907 + 1.908 + void frstor(Address src); 1.909 + 1.910 + void fsin(); 1.911 + 1.912 + void fsqrt(); 1.913 + 1.914 + void fst_d(Address adr); 1.915 + void fst_s(Address adr); 1.916 + 1.917 + void fstp_d(Address adr); 1.918 + void fstp_d(int index); 1.919 + void fstp_s(Address adr); 1.920 + void fstp_x(Address adr); // extended-precision (80-bit) format 1.921 + 1.922 + void fsub(int i); 1.923 + void fsub_d(Address src); 1.924 + void fsub_s(Address src); 1.925 + 1.926 + void fsuba(int i); // "alternate" fsub 1.927 + 1.928 + void fsubp(int i = 1); 1.929 + 1.930 + void fsubr(int i); 1.931 + void fsubr_d(Address src); 1.932 + void fsubr_s(Address src); 1.933 + 1.934 + void fsubra(int i); // "alternate" reversed fsub 1.935 + 1.936 + void fsubrp(int i = 1); 1.937 + 1.938 + void ftan(); 1.939 + 1.940 + void ftst(); 1.941 + 1.942 + void fucomi(int i = 1); 1.943 + void fucomip(int i = 1); 1.944 + 1.945 + void fwait(); 1.946 + 1.947 + void fxch(int i = 1); 1.948 + 1.949 + void fxrstor(Address src); 1.950 + 1.951 + void fxsave(Address dst); 1.952 + 1.953 + void fyl2x(); 1.954 + 1.955 + void hlt(); 1.956 + 1.957 + void idivl(Register src); 1.958 + 1.959 + void idivq(Register src); 1.960 + 1.961 + void imull(Register dst, Register src); 1.962 + void imull(Register dst, Register src, int value); 1.963 + 1.964 + void imulq(Register dst, Register src); 1.965 + void imulq(Register dst, Register src, int value); 1.966 + 1.967 + 1.968 + // jcc is the generic conditional branch generator to run- 1.969 + // time routines, jcc is used for branches to labels. jcc 1.970 + // takes a branch opcode (cc) and a label (L) and generates 1.971 + // either a backward branch or a forward branch and links it 1.972 + // to the label fixup chain. Usage: 1.973 + // 1.974 + // Label L; // unbound label 1.975 + // jcc(cc, L); // forward branch to unbound label 1.976 + // bind(L); // bind label to the current pc 1.977 + // jcc(cc, L); // backward branch to bound label 1.978 + // bind(L); // illegal: a label may be bound only once 1.979 + // 1.980 + // Note: The same Label can be used for forward and backward branches 1.981 + // but it may be bound only once. 1.982 + 1.983 + void jcc(Condition cc, Label& L, 1.984 + relocInfo::relocType rtype = relocInfo::none); 1.985 + 1.986 + // Conditional jump to a 8-bit offset to L. 1.987 + // WARNING: be very careful using this for forward jumps. If the label is 1.988 + // not bound within an 8-bit offset of this instruction, a run-time error 1.989 + // will occur. 1.990 + void jccb(Condition cc, Label& L); 1.991 + 1.992 + void jmp(Address entry); // pc <- entry 1.993 + 1.994 + // Label operations & relative jumps (PPUM Appendix D) 1.995 + void jmp(Label& L, relocInfo::relocType rtype = relocInfo::none); // unconditional jump to L 1.996 + 1.997 + void jmp(Register entry); // pc <- entry 1.998 + 1.999 + // Unconditional 8-bit offset jump to L. 1.1000 + // WARNING: be very careful using this for forward jumps. If the label is 1.1001 + // not bound within an 8-bit offset of this instruction, a run-time error 1.1002 + // will occur. 1.1003 + void jmpb(Label& L); 1.1004 + 1.1005 + void ldmxcsr( Address src ); 1.1006 + 1.1007 + void leal(Register dst, Address src); 1.1008 + 1.1009 + void leaq(Register dst, Address src); 1.1010 + 1.1011 + void lfence() { 1.1012 + emit_byte(0x0F); 1.1013 + emit_byte(0xAE); 1.1014 + emit_byte(0xE8); 1.1015 + } 1.1016 + 1.1017 + void lock(); 1.1018 + 1.1019 + enum Membar_mask_bits { 1.1020 + StoreStore = 1 << 3, 1.1021 + LoadStore = 1 << 2, 1.1022 + StoreLoad = 1 << 1, 1.1023 + LoadLoad = 1 << 0 1.1024 + }; 1.1025 + 1.1026 + // Serializes memory. 1.1027 + void membar(Membar_mask_bits order_constraint) { 1.1028 + // We only have to handle StoreLoad and LoadLoad 1.1029 + if (order_constraint & StoreLoad) { 1.1030 + // MFENCE subsumes LFENCE 1.1031 + mfence(); 1.1032 + } /* [jk] not needed currently: else if (order_constraint & LoadLoad) { 1.1033 + lfence(); 1.1034 + } */ 1.1035 + } 1.1036 + 1.1037 + void mfence(); 1.1038 + 1.1039 + // Moves 1.1040 + 1.1041 + void mov64(Register dst, int64_t imm64); 1.1042 + 1.1043 + void movb(Address dst, Register src); 1.1044 + void movb(Address dst, int imm8); 1.1045 + void movb(Register dst, Address src); 1.1046 + 1.1047 + void movdl(XMMRegister dst, Register src); 1.1048 + void movdl(Register dst, XMMRegister src); 1.1049 + 1.1050 + // Move Double Quadword 1.1051 + void movdq(XMMRegister dst, Register src); 1.1052 + void movdq(Register dst, XMMRegister src); 1.1053 + 1.1054 + // Move Aligned Double Quadword 1.1055 + void movdqa(Address dst, XMMRegister src); 1.1056 + void movdqa(XMMRegister dst, Address src); 1.1057 + void movdqa(XMMRegister dst, XMMRegister src); 1.1058 + 1.1059 + void movl(Register dst, int32_t imm32); 1.1060 + void movl(Address dst, int32_t imm32); 1.1061 + void movl(Register dst, Register src); 1.1062 + void movl(Register dst, Address src); 1.1063 + void movl(Address dst, Register src); 1.1064 + 1.1065 + // These dummies prevent using movl from converting a zero (like NULL) into Register 1.1066 + // by giving the compiler two choices it can't resolve 1.1067 + 1.1068 + void movl(Address dst, void* junk); 1.1069 + void movl(Register dst, void* junk); 1.1070 + 1.1071 +#ifdef _LP64 1.1072 + void movq(Register dst, Register src); 1.1073 + void movq(Register dst, Address src); 1.1074 + void movq(Address dst, Register src); 1.1075 +#endif 1.1076 + 1.1077 + void movq(Address dst, MMXRegister src ); 1.1078 + void movq(MMXRegister dst, Address src ); 1.1079 + 1.1080 +#ifdef _LP64 1.1081 + // These dummies prevent using movq from converting a zero (like NULL) into Register 1.1082 + // by giving the compiler two choices it can't resolve 1.1083 + 1.1084 + void movq(Address dst, void* dummy); 1.1085 + void movq(Register dst, void* dummy); 1.1086 +#endif 1.1087 + 1.1088 + // Move Quadword 1.1089 + void movq(Address dst, XMMRegister src); 1.1090 + void movq(XMMRegister dst, Address src); 1.1091 + 1.1092 + void movsbl(Register dst, Address src); 1.1093 + void movsbl(Register dst, Register src); 1.1094 + 1.1095 +#ifdef _LP64 1.1096 + // Move signed 32bit immediate to 64bit extending sign 1.1097 + void movslq(Address dst, int32_t imm64); 1.1098 + void movslq(Register dst, int32_t imm64); 1.1099 + 1.1100 + void movslq(Register dst, Address src); 1.1101 + void movslq(Register dst, Register src); 1.1102 + void movslq(Register dst, void* src); // Dummy declaration to cause NULL to be ambiguous 1.1103 +#endif 1.1104 + 1.1105 + void movswl(Register dst, Address src); 1.1106 + void movswl(Register dst, Register src); 1.1107 + 1.1108 + void movw(Address dst, int imm16); 1.1109 + void movw(Register dst, Address src); 1.1110 + void movw(Address dst, Register src); 1.1111 + 1.1112 + void movzbl(Register dst, Address src); 1.1113 + void movzbl(Register dst, Register src); 1.1114 + 1.1115 + void movzwl(Register dst, Address src); 1.1116 + void movzwl(Register dst, Register src); 1.1117 + 1.1118 + void mull(Address src); 1.1119 + void mull(Register src); 1.1120 + 1.1121 + // Multiply Scalar Double-Precision Floating-Point Values 1.1122 + void mulsd(XMMRegister dst, Address src); 1.1123 + void mulsd(XMMRegister dst, XMMRegister src); 1.1124 + 1.1125 + // Multiply Scalar Single-Precision Floating-Point Values 1.1126 + void mulss(XMMRegister dst, Address src); 1.1127 + void mulss(XMMRegister dst, XMMRegister src); 1.1128 + 1.1129 + void negl(Register dst); 1.1130 + 1.1131 +#ifdef _LP64 1.1132 + void negq(Register dst); 1.1133 +#endif 1.1134 + 1.1135 + void nop(int i = 1); 1.1136 + 1.1137 + void notl(Register dst); 1.1138 + 1.1139 +#ifdef _LP64 1.1140 + void notq(Register dst); 1.1141 +#endif 1.1142 + 1.1143 + void orl(Address dst, int32_t imm32); 1.1144 + void orl(Register dst, int32_t imm32); 1.1145 + void orl(Register dst, Address src); 1.1146 + void orl(Register dst, Register src); 1.1147 + 1.1148 + void orq(Address dst, int32_t imm32); 1.1149 + void orq(Register dst, int32_t imm32); 1.1150 + void orq(Register dst, Address src); 1.1151 + void orq(Register dst, Register src); 1.1152 + 1.1153 + void popl(Address dst); 1.1154 + 1.1155 +#ifdef _LP64 1.1156 + void popq(Address dst); 1.1157 +#endif 1.1158 + 1.1159 + // Prefetches (SSE, SSE2, 3DNOW only) 1.1160 + 1.1161 + void prefetchnta(Address src); 1.1162 + void prefetchr(Address src); 1.1163 + void prefetcht0(Address src); 1.1164 + void prefetcht1(Address src); 1.1165 + void prefetcht2(Address src); 1.1166 + void prefetchw(Address src); 1.1167 + 1.1168 + // Shuffle Packed Doublewords 1.1169 + void pshufd(XMMRegister dst, XMMRegister src, int mode); 1.1170 + void pshufd(XMMRegister dst, Address src, int mode); 1.1171 + 1.1172 + // Shuffle Packed Low Words 1.1173 + void pshuflw(XMMRegister dst, XMMRegister src, int mode); 1.1174 + void pshuflw(XMMRegister dst, Address src, int mode); 1.1175 + 1.1176 + // Shift Right Logical Quadword Immediate 1.1177 + void psrlq(XMMRegister dst, int shift); 1.1178 + 1.1179 + // Interleave Low Bytes 1.1180 + void punpcklbw(XMMRegister dst, XMMRegister src); 1.1181 + 1.1182 + void pushl(Address src); 1.1183 + 1.1184 + void pushq(Address src); 1.1185 + 1.1186 + // Xor Packed Byte Integer Values 1.1187 + void pxor(XMMRegister dst, Address src); 1.1188 + void pxor(XMMRegister dst, XMMRegister src); 1.1189 + 1.1190 + void rcll(Register dst, int imm8); 1.1191 + 1.1192 + void rclq(Register dst, int imm8); 1.1193 + 1.1194 + void ret(int imm16); 1.1195 + 1.1196 + void sahf(); 1.1197 + 1.1198 + void sarl(Register dst, int imm8); 1.1199 + void sarl(Register dst); 1.1200 + 1.1201 + void sarq(Register dst, int imm8); 1.1202 + void sarq(Register dst); 1.1203 + 1.1204 + void sbbl(Address dst, int32_t imm32); 1.1205 + void sbbl(Register dst, int32_t imm32); 1.1206 + void sbbl(Register dst, Address src); 1.1207 + void sbbl(Register dst, Register src); 1.1208 + 1.1209 + void sbbq(Address dst, int32_t imm32); 1.1210 + void sbbq(Register dst, int32_t imm32); 1.1211 + void sbbq(Register dst, Address src); 1.1212 + void sbbq(Register dst, Register src); 1.1213 + 1.1214 + void setb(Condition cc, Register dst); 1.1215 + 1.1216 + void shldl(Register dst, Register src); 1.1217 + 1.1218 + void shll(Register dst, int imm8); 1.1219 + void shll(Register dst); 1.1220 + 1.1221 + void shlq(Register dst, int imm8); 1.1222 + void shlq(Register dst); 1.1223 + 1.1224 + void shrdl(Register dst, Register src); 1.1225 + 1.1226 + void shrl(Register dst, int imm8); 1.1227 + void shrl(Register dst); 1.1228 + 1.1229 + void shrq(Register dst, int imm8); 1.1230 + void shrq(Register dst); 1.1231 + 1.1232 + void smovl(); // QQQ generic? 1.1233 + 1.1234 + // Compute Square Root of Scalar Double-Precision Floating-Point Value 1.1235 + void sqrtsd(XMMRegister dst, Address src); 1.1236 + void sqrtsd(XMMRegister dst, XMMRegister src); 1.1237 + 1.1238 + void std() { emit_byte(0xfd); } 1.1239 + 1.1240 + void stmxcsr( Address dst ); 1.1241 + 1.1242 + void subl(Address dst, int32_t imm32); 1.1243 + void subl(Address dst, Register src); 1.1244 + void subl(Register dst, int32_t imm32); 1.1245 + void subl(Register dst, Address src); 1.1246 + void subl(Register dst, Register src); 1.1247 + 1.1248 + void subq(Address dst, int32_t imm32); 1.1249 + void subq(Address dst, Register src); 1.1250 + void subq(Register dst, int32_t imm32); 1.1251 + void subq(Register dst, Address src); 1.1252 + void subq(Register dst, Register src); 1.1253 + 1.1254 + 1.1255 + // Subtract Scalar Double-Precision Floating-Point Values 1.1256 + void subsd(XMMRegister dst, Address src); 1.1257 + void subsd(XMMRegister dst, XMMRegister src); 1.1258 + 1.1259 + // Subtract Scalar Single-Precision Floating-Point Values 1.1260 + void subss(XMMRegister dst, Address src); 1.1261 + void subss(XMMRegister dst, XMMRegister src); 1.1262 + 1.1263 + void testb(Register dst, int imm8); 1.1264 + 1.1265 + void testl(Register dst, int32_t imm32); 1.1266 + void testl(Register dst, Register src); 1.1267 + void testl(Register dst, Address src); 1.1268 + 1.1269 + void testq(Register dst, int32_t imm32); 1.1270 + void testq(Register dst, Register src); 1.1271 + 1.1272 + 1.1273 + // Unordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS 1.1274 + void ucomisd(XMMRegister dst, Address src); 1.1275 + void ucomisd(XMMRegister dst, XMMRegister src); 1.1276 + 1.1277 + // Unordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS 1.1278 + void ucomiss(XMMRegister dst, Address src); 1.1279 + void ucomiss(XMMRegister dst, XMMRegister src); 1.1280 + 1.1281 + void xaddl(Address dst, Register src); 1.1282 + 1.1283 + void xaddq(Address dst, Register src); 1.1284 + 1.1285 + void xchgl(Register reg, Address adr); 1.1286 + void xchgl(Register dst, Register src); 1.1287 + 1.1288 + void xchgq(Register reg, Address adr); 1.1289 + void xchgq(Register dst, Register src); 1.1290 + 1.1291 + void xorl(Register dst, int32_t imm32); 1.1292 + void xorl(Register dst, Address src); 1.1293 + void xorl(Register dst, Register src); 1.1294 + 1.1295 + void xorq(Register dst, Address src); 1.1296 + void xorq(Register dst, Register src); 1.1297 + 1.1298 + // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values 1.1299 + void xorpd(XMMRegister dst, Address src); 1.1300 + void xorpd(XMMRegister dst, XMMRegister src); 1.1301 + 1.1302 + // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values 1.1303 + void xorps(XMMRegister dst, Address src); 1.1304 + void xorps(XMMRegister dst, XMMRegister src); 1.1305 + 1.1306 + void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0 1.1307 +}; 1.1308 + 1.1309 + 1.1310 +// MacroAssembler extends Assembler by frequently used macros. 1.1311 +// 1.1312 +// Instructions for which a 'better' code sequence exists depending 1.1313 +// on arguments should also go in here. 1.1314 + 1.1315 +class MacroAssembler: public Assembler { 1.1316 + friend class LIR_Assembler; 1.1317 + protected: 1.1318 + 1.1319 + Address as_Address(AddressLiteral adr); 1.1320 + Address as_Address(ArrayAddress adr); 1.1321 + 1.1322 + // Support for VM calls 1.1323 + // 1.1324 + // This is the base routine called by the different versions of call_VM_leaf. The interpreter 1.1325 + // may customize this version by overriding it for its purposes (e.g., to save/restore 1.1326 + // additional registers when doing a VM call). 1.1327 +#ifdef CC_INTERP 1.1328 + // c++ interpreter never wants to use interp_masm version of call_VM 1.1329 + #define VIRTUAL 1.1330 +#else 1.1331 + #define VIRTUAL virtual 1.1332 +#endif 1.1333 + 1.1334 + VIRTUAL void call_VM_leaf_base( 1.1335 + address entry_point, // the entry point 1.1336 + int number_of_arguments // the number of arguments to pop after the call 1.1337 + ); 1.1338 + 1.1339 + // This is the base routine called by the different versions of call_VM. The interpreter 1.1340 + // may customize this version by overriding it for its purposes (e.g., to save/restore 1.1341 + // additional registers when doing a VM call). 1.1342 + // 1.1343 + // If no java_thread register is specified (noreg) than rdi will be used instead. call_VM_base 1.1344 + // returns the register which contains the thread upon return. If a thread register has been 1.1345 + // specified, the return value will correspond to that register. If no last_java_sp is specified 1.1346 + // (noreg) than rsp will be used instead. 1.1347 + VIRTUAL void call_VM_base( // returns the register containing the thread upon return 1.1348 + Register oop_result, // where an oop-result ends up if any; use noreg otherwise 1.1349 + Register java_thread, // the thread if computed before ; use noreg otherwise 1.1350 + Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise 1.1351 + address entry_point, // the entry point 1.1352 + int number_of_arguments, // the number of arguments (w/o thread) to pop after the call 1.1353 + bool check_exceptions // whether to check for pending exceptions after return 1.1354 + ); 1.1355 + 1.1356 + // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code. 1.1357 + // The implementation is only non-empty for the InterpreterMacroAssembler, 1.1358 + // as only the interpreter handles PopFrame and ForceEarlyReturn requests. 1.1359 + virtual void check_and_handle_popframe(Register java_thread); 1.1360 + virtual void check_and_handle_earlyret(Register java_thread); 1.1361 + 1.1362 + void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true); 1.1363 + 1.1364 + // helpers for FPU flag access 1.1365 + // tmp is a temporary register, if none is available use noreg 1.1366 + void save_rax (Register tmp); 1.1367 + void restore_rax(Register tmp); 1.1368 + 1.1369 + public: 1.1370 + MacroAssembler(CodeBuffer* code) : Assembler(code) {} 1.1371 + 1.1372 + // Support for NULL-checks 1.1373 + // 1.1374 + // Generates code that causes a NULL OS exception if the content of reg is NULL. 1.1375 + // If the accessed location is M[reg + offset] and the offset is known, provide the 1.1376 + // offset. No explicit code generation is needed if the offset is within a certain 1.1377 + // range (0 <= offset <= page_size). 1.1378 + 1.1379 + void null_check(Register reg, int offset = -1); 1.1380 + static bool needs_explicit_null_check(intptr_t offset); 1.1381 + 1.1382 + // Required platform-specific helpers for Label::patch_instructions. 1.1383 + // They _shadow_ the declarations in AbstractAssembler, which are undefined. 1.1384 + void pd_patch_instruction(address branch, address target); 1.1385 +#ifndef PRODUCT 1.1386 + static void pd_print_patched_instruction(address branch); 1.1387 +#endif 1.1388 + 1.1389 + // The following 4 methods return the offset of the appropriate move instruction 1.1390 + 1.1391 + // Support for fast byte/word loading with zero extension (depending on particular CPU) 1.1392 + int load_unsigned_byte(Register dst, Address src); 1.1393 + int load_unsigned_word(Register dst, Address src); 1.1394 + 1.1395 + // Support for fast byte/word loading with sign extension (depending on particular CPU) 1.1396 + int load_signed_byte(Register dst, Address src); 1.1397 + int load_signed_word(Register dst, Address src); 1.1398 + 1.1399 + // Support for sign-extension (hi:lo = extend_sign(lo)) 1.1400 + void extend_sign(Register hi, Register lo); 1.1401 + 1.1402 + // Support for inc/dec with optimal instruction selection depending on value 1.1403 + 1.1404 + void increment(Register reg, int value = 1) { LP64_ONLY(incrementq(reg, value)) NOT_LP64(incrementl(reg, value)) ; } 1.1405 + void decrement(Register reg, int value = 1) { LP64_ONLY(decrementq(reg, value)) NOT_LP64(decrementl(reg, value)) ; } 1.1406 + 1.1407 + void decrementl(Address dst, int value = 1); 1.1408 + void decrementl(Register reg, int value = 1); 1.1409 + 1.1410 + void decrementq(Register reg, int value = 1); 1.1411 + void decrementq(Address dst, int value = 1); 1.1412 + 1.1413 + void incrementl(Address dst, int value = 1); 1.1414 + void incrementl(Register reg, int value = 1); 1.1415 + 1.1416 + void incrementq(Register reg, int value = 1); 1.1417 + void incrementq(Address dst, int value = 1); 1.1418 + 1.1419 + 1.1420 + // Support optimal SSE move instructions. 1.1421 + void movflt(XMMRegister dst, XMMRegister src) { 1.1422 + if (UseXmmRegToRegMoveAll) { movaps(dst, src); return; } 1.1423 + else { movss (dst, src); return; } 1.1424 + } 1.1425 + void movflt(XMMRegister dst, Address src) { movss(dst, src); } 1.1426 + void movflt(XMMRegister dst, AddressLiteral src); 1.1427 + void movflt(Address dst, XMMRegister src) { movss(dst, src); } 1.1428 + 1.1429 + void movdbl(XMMRegister dst, XMMRegister src) { 1.1430 + if (UseXmmRegToRegMoveAll) { movapd(dst, src); return; } 1.1431 + else { movsd (dst, src); return; } 1.1432 + } 1.1433 + 1.1434 + void movdbl(XMMRegister dst, AddressLiteral src); 1.1435 + 1.1436 + void movdbl(XMMRegister dst, Address src) { 1.1437 + if (UseXmmLoadAndClearUpper) { movsd (dst, src); return; } 1.1438 + else { movlpd(dst, src); return; } 1.1439 + } 1.1440 + void movdbl(Address dst, XMMRegister src) { movsd(dst, src); } 1.1441 + 1.1442 + void incrementl(AddressLiteral dst); 1.1443 + void incrementl(ArrayAddress dst); 1.1444 + 1.1445 + // Alignment 1.1446 + void align(int modulus); 1.1447 + 1.1448 + // Misc 1.1449 + void fat_nop(); // 5 byte nop 1.1450 + 1.1451 + // Stack frame creation/removal 1.1452 + void enter(); 1.1453 + void leave(); 1.1454 + 1.1455 + // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) 1.1456 + // The pointer will be loaded into the thread register. 1.1457 + void get_thread(Register thread); 1.1458 + 1.1459 + // Support for VM calls 1.1460 + // 1.1461 + // It is imperative that all calls into the VM are handled via the call_VM macros. 1.1462 + // They make sure that the stack linkage is setup correctly. call_VM's correspond 1.1463 + // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. 1.1464 + 1.1465 + 1.1466 + void call_VM(Register oop_result, 1.1467 + address entry_point, 1.1468 + bool check_exceptions = true); 1.1469 + void call_VM(Register oop_result, 1.1470 + address entry_point, 1.1471 + Register arg_1, 1.1472 + bool check_exceptions = true); 1.1473 + void call_VM(Register oop_result, 1.1474 + address entry_point, 1.1475 + Register arg_1, Register arg_2, 1.1476 + bool check_exceptions = true); 1.1477 + void call_VM(Register oop_result, 1.1478 + address entry_point, 1.1479 + Register arg_1, Register arg_2, Register arg_3, 1.1480 + bool check_exceptions = true); 1.1481 + 1.1482 + // Overloadings with last_Java_sp 1.1483 + void call_VM(Register oop_result, 1.1484 + Register last_java_sp, 1.1485 + address entry_point, 1.1486 + int number_of_arguments = 0, 1.1487 + bool check_exceptions = true); 1.1488 + void call_VM(Register oop_result, 1.1489 + Register last_java_sp, 1.1490 + address entry_point, 1.1491 + Register arg_1, bool 1.1492 + check_exceptions = true); 1.1493 + void call_VM(Register oop_result, 1.1494 + Register last_java_sp, 1.1495 + address entry_point, 1.1496 + Register arg_1, Register arg_2, 1.1497 + bool check_exceptions = true); 1.1498 + void call_VM(Register oop_result, 1.1499 + Register last_java_sp, 1.1500 + address entry_point, 1.1501 + Register arg_1, Register arg_2, Register arg_3, 1.1502 + bool check_exceptions = true); 1.1503 + 1.1504 + void call_VM_leaf(address entry_point, 1.1505 + int number_of_arguments = 0); 1.1506 + void call_VM_leaf(address entry_point, 1.1507 + Register arg_1); 1.1508 + void call_VM_leaf(address entry_point, 1.1509 + Register arg_1, Register arg_2); 1.1510 + void call_VM_leaf(address entry_point, 1.1511 + Register arg_1, Register arg_2, Register arg_3); 1.1512 + 1.1513 + // last Java Frame (fills frame anchor) 1.1514 + void set_last_Java_frame(Register thread, 1.1515 + Register last_java_sp, 1.1516 + Register last_java_fp, 1.1517 + address last_java_pc); 1.1518 + 1.1519 + // thread in the default location (r15_thread on 64bit) 1.1520 + void set_last_Java_frame(Register last_java_sp, 1.1521 + Register last_java_fp, 1.1522 + address last_java_pc); 1.1523 + 1.1524 + void reset_last_Java_frame(Register thread, bool clear_fp, bool clear_pc); 1.1525 + 1.1526 + // thread in the default location (r15_thread on 64bit) 1.1527 + void reset_last_Java_frame(bool clear_fp, bool clear_pc); 1.1528 + 1.1529 + // Stores 1.1530 + void store_check(Register obj); // store check for obj - register is destroyed afterwards 1.1531 + void store_check(Register obj, Address dst); // same as above, dst is exact store location (reg. is destroyed) 1.1532 + 1.1533 + // split store_check(Register obj) to enhance instruction interleaving 1.1534 + void store_check_part_1(Register obj); 1.1535 + void store_check_part_2(Register obj); 1.1536 + 1.1537 + // C 'boolean' to Java boolean: x == 0 ? 0 : 1 1.1538 + void c2bool(Register x); 1.1539 + 1.1540 + // C++ bool manipulation 1.1541 + 1.1542 + void movbool(Register dst, Address src); 1.1543 + void movbool(Address dst, bool boolconst); 1.1544 + void movbool(Address dst, Register src); 1.1545 + void testbool(Register dst); 1.1546 + 1.1547 + // oop manipulations 1.1548 + void load_klass(Register dst, Register src); 1.1549 + void store_klass(Register dst, Register src); 1.1550 + 1.1551 + void load_prototype_header(Register dst, Register src); 1.1552 + 1.1553 +#ifdef _LP64 1.1554 + void store_klass_gap(Register dst, Register src); 1.1555 + 1.1556 + void load_heap_oop(Register dst, Address src); 1.1557 + void store_heap_oop(Address dst, Register src); 1.1558 + void encode_heap_oop(Register r); 1.1559 + void decode_heap_oop(Register r); 1.1560 + void encode_heap_oop_not_null(Register r); 1.1561 + void decode_heap_oop_not_null(Register r); 1.1562 + void encode_heap_oop_not_null(Register dst, Register src); 1.1563 + void decode_heap_oop_not_null(Register dst, Register src); 1.1564 + 1.1565 + void set_narrow_oop(Register dst, jobject obj); 1.1566 + 1.1567 + // if heap base register is used - reinit it with the correct value 1.1568 + void reinit_heapbase(); 1.1569 +#endif // _LP64 1.1570 + 1.1571 + // Int division/remainder for Java 1.1572 + // (as idivl, but checks for special case as described in JVM spec.) 1.1573 + // returns idivl instruction offset for implicit exception handling 1.1574 + int corrected_idivl(Register reg); 1.1575 + 1.1576 + // Long division/remainder for Java 1.1577 + // (as idivq, but checks for special case as described in JVM spec.) 1.1578 + // returns idivq instruction offset for implicit exception handling 1.1579 + int corrected_idivq(Register reg); 1.1580 + 1.1581 + void int3(); 1.1582 + 1.1583 + // Long operation macros for a 32bit cpu 1.1584 + // Long negation for Java 1.1585 + void lneg(Register hi, Register lo); 1.1586 + 1.1587 + // Long multiplication for Java 1.1588 + // (destroys contents of eax, ebx, ecx and edx) 1.1589 + void lmul(int x_rsp_offset, int y_rsp_offset); // rdx:rax = x * y 1.1590 + 1.1591 + // Long shifts for Java 1.1592 + // (semantics as described in JVM spec.) 1.1593 + void lshl(Register hi, Register lo); // hi:lo << (rcx & 0x3f) 1.1594 + void lshr(Register hi, Register lo, bool sign_extension = false); // hi:lo >> (rcx & 0x3f) 1.1595 + 1.1596 + // Long compare for Java 1.1597 + // (semantics as described in JVM spec.) 1.1598 + void lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo); // x_hi = lcmp(x, y) 1.1599 + 1.1600 + 1.1601 + // misc 1.1602 + 1.1603 + // Sign extension 1.1604 + void sign_extend_short(Register reg); 1.1605 + void sign_extend_byte(Register reg); 1.1606 + 1.1607 + // Division by power of 2, rounding towards 0 1.1608 + void division_with_shift(Register reg, int shift_value); 1.1609 + 1.1610 + // Compares the top-most stack entries on the FPU stack and sets the eflags as follows: 1.1611 + // 1.1612 + // CF (corresponds to C0) if x < y 1.1613 + // PF (corresponds to C2) if unordered 1.1614 + // ZF (corresponds to C3) if x = y 1.1615 + // 1.1616 + // The arguments are in reversed order on the stack (i.e., top of stack is first argument). 1.1617 + // tmp is a temporary register, if none is available use noreg (only matters for non-P6 code) 1.1618 + void fcmp(Register tmp); 1.1619 + // Variant of the above which allows y to be further down the stack 1.1620 + // and which only pops x and y if specified. If pop_right is 1.1621 + // specified then pop_left must also be specified. 1.1622 + void fcmp(Register tmp, int index, bool pop_left, bool pop_right); 1.1623 + 1.1624 + // Floating-point comparison for Java 1.1625 + // Compares the top-most stack entries on the FPU stack and stores the result in dst. 1.1626 + // The arguments are in reversed order on the stack (i.e., top of stack is first argument). 1.1627 + // (semantics as described in JVM spec.) 1.1628 + void fcmp2int(Register dst, bool unordered_is_less); 1.1629 + // Variant of the above which allows y to be further down the stack 1.1630 + // and which only pops x and y if specified. If pop_right is 1.1631 + // specified then pop_left must also be specified. 1.1632 + void fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right); 1.1633 + 1.1634 + // Floating-point remainder for Java (ST0 = ST0 fremr ST1, ST1 is empty afterwards) 1.1635 + // tmp is a temporary register, if none is available use noreg 1.1636 + void fremr(Register tmp); 1.1637 + 1.1638 + 1.1639 + // same as fcmp2int, but using SSE2 1.1640 + void cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less); 1.1641 + void cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less); 1.1642 + 1.1643 + // Inlined sin/cos generator for Java; must not use CPU instruction 1.1644 + // directly on Intel as it does not have high enough precision 1.1645 + // outside of the range [-pi/4, pi/4]. Extra argument indicate the 1.1646 + // number of FPU stack slots in use; all but the topmost will 1.1647 + // require saving if a slow case is necessary. Assumes argument is 1.1648 + // on FP TOS; result is on FP TOS. No cpu registers are changed by 1.1649 + // this code. 1.1650 + void trigfunc(char trig, int num_fpu_regs_in_use = 1); 1.1651 + 1.1652 + // branch to L if FPU flag C2 is set/not set 1.1653 + // tmp is a temporary register, if none is available use noreg 1.1654 + void jC2 (Register tmp, Label& L); 1.1655 + void jnC2(Register tmp, Label& L); 1.1656 + 1.1657 + // Pop ST (ffree & fincstp combined) 1.1658 + void fpop(); 1.1659 + 1.1660 + // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack 1.1661 + void push_fTOS(); 1.1662 + 1.1663 + // pops double TOS element from CPU stack and pushes on FPU stack 1.1664 + void pop_fTOS(); 1.1665 + 1.1666 + void empty_FPU_stack(); 1.1667 + 1.1668 + void push_IU_state(); 1.1669 + void pop_IU_state(); 1.1670 + 1.1671 + void push_FPU_state(); 1.1672 + void pop_FPU_state(); 1.1673 + 1.1674 + void push_CPU_state(); 1.1675 + void pop_CPU_state(); 1.1676 + 1.1677 + // Round up to a power of two 1.1678 + void round_to(Register reg, int modulus); 1.1679 + 1.1680 + // Callee saved registers handling 1.1681 + void push_callee_saved_registers(); 1.1682 + void pop_callee_saved_registers(); 1.1683 + 1.1684 + // allocation 1.1685 + void eden_allocate( 1.1686 + Register obj, // result: pointer to object after successful allocation 1.1687 + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise 1.1688 + int con_size_in_bytes, // object size in bytes if known at compile time 1.1689 + Register t1, // temp register 1.1690 + Label& slow_case // continuation point if fast allocation fails 1.1691 + ); 1.1692 + void tlab_allocate( 1.1693 + Register obj, // result: pointer to object after successful allocation 1.1694 + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise 1.1695 + int con_size_in_bytes, // object size in bytes if known at compile time 1.1696 + Register t1, // temp register 1.1697 + Register t2, // temp register 1.1698 + Label& slow_case // continuation point if fast allocation fails 1.1699 + ); 1.1700 + void tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case); 1.1701 + 1.1702 + //---- 1.1703 + void set_word_if_not_zero(Register reg); // sets reg to 1 if not zero, otherwise 0 1.1704 + 1.1705 + // Debugging 1.1706 + 1.1707 + // only if +VerifyOops 1.1708 + void verify_oop(Register reg, const char* s = "broken oop"); 1.1709 + void verify_oop_addr(Address addr, const char * s = "broken oop addr"); 1.1710 + 1.1711 + // only if +VerifyFPU 1.1712 + void verify_FPU(int stack_depth, const char* s = "illegal FPU state"); 1.1713 + 1.1714 + // prints msg, dumps registers and stops execution 1.1715 + void stop(const char* msg); 1.1716 + 1.1717 + // prints msg and continues 1.1718 + void warn(const char* msg); 1.1719 + 1.1720 + static void debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg); 1.1721 + static void debug64(char* msg, int64_t pc, int64_t regs[]); 1.1722 + 1.1723 + void os_breakpoint(); 1.1724 + 1.1725 + void untested() { stop("untested"); } 1.1726 + 1.1727 + void unimplemented(const char* what = "") { char* b = new char[1024]; jio_snprintf(b, sizeof(b), "unimplemented: %s", what); stop(b); } 1.1728 + 1.1729 + void should_not_reach_here() { stop("should not reach here"); } 1.1730 + 1.1731 + void print_CPU_state(); 1.1732 + 1.1733 + // Stack overflow checking 1.1734 + void bang_stack_with_offset(int offset) { 1.1735 + // stack grows down, caller passes positive offset 1.1736 + assert(offset > 0, "must bang with negative offset"); 1.1737 + movl(Address(rsp, (-offset)), rax); 1.1738 + } 1.1739 + 1.1740 + // Writes to stack successive pages until offset reached to check for 1.1741 + // stack overflow + shadow pages. Also, clobbers tmp 1.1742 + void bang_stack_size(Register size, Register tmp); 1.1743 + 1.1744 + // Support for serializing memory accesses between threads 1.1745 + void serialize_memory(Register thread, Register tmp); 1.1746 + 1.1747 + void verify_tlab(); 1.1748 + 1.1749 + // Biased locking support 1.1750 + // lock_reg and obj_reg must be loaded up with the appropriate values. 1.1751 + // swap_reg must be rax, and is killed. 1.1752 + // tmp_reg is optional. If it is supplied (i.e., != noreg) it will 1.1753 + // be killed; if not supplied, push/pop will be used internally to 1.1754 + // allocate a temporary (inefficient, avoid if possible). 1.1755 + // Optional slow case is for implementations (interpreter and C1) which branch to 1.1756 + // slow case directly. Leaves condition codes set for C2's Fast_Lock node. 1.1757 + // Returns offset of first potentially-faulting instruction for null 1.1758 + // check info (currently consumed only by C1). If 1.1759 + // swap_reg_contains_mark is true then returns -1 as it is assumed 1.1760 + // the calling code has already passed any potential faults. 1.1761 + int biased_locking_enter(Register lock_reg, Register obj_reg, Register swap_reg, Register tmp_reg, 1.1762 + bool swap_reg_contains_mark, 1.1763 + Label& done, Label* slow_case = NULL, 1.1764 + BiasedLockingCounters* counters = NULL); 1.1765 + void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done); 1.1766 + 1.1767 + 1.1768 + Condition negate_condition(Condition cond); 1.1769 + 1.1770 + // Instructions that use AddressLiteral operands. These instruction can handle 32bit/64bit 1.1771 + // operands. In general the names are modified to avoid hiding the instruction in Assembler 1.1772 + // so that we don't need to implement all the varieties in the Assembler with trivial wrappers 1.1773 + // here in MacroAssembler. The major exception to this rule is call 1.1774 + 1.1775 + // Arithmetics 1.1776 + 1.1777 + 1.1778 + void addptr(Address dst, int32_t src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)) ; } 1.1779 + void addptr(Address dst, Register src); 1.1780 + 1.1781 + void addptr(Register dst, Address src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); } 1.1782 + void addptr(Register dst, int32_t src); 1.1783 + void addptr(Register dst, Register src); 1.1784 + 1.1785 + void andptr(Register dst, int32_t src); 1.1786 + void andptr(Register src1, Register src2) { LP64_ONLY(andq(src1, src2)) NOT_LP64(andl(src1, src2)) ; } 1.1787 + 1.1788 + void cmp8(AddressLiteral src1, int imm); 1.1789 + 1.1790 + // renamed to drag out the casting of address to int32_t/intptr_t 1.1791 + void cmp32(Register src1, int32_t imm); 1.1792 + 1.1793 + void cmp32(AddressLiteral src1, int32_t imm); 1.1794 + // compare reg - mem, or reg - &mem 1.1795 + void cmp32(Register src1, AddressLiteral src2); 1.1796 + 1.1797 + void cmp32(Register src1, Address src2); 1.1798 + 1.1799 +#ifndef _LP64 1.1800 + void cmpoop(Address dst, jobject obj); 1.1801 + void cmpoop(Register dst, jobject obj); 1.1802 +#endif // _LP64 1.1803 + 1.1804 + // NOTE src2 must be the lval. This is NOT an mem-mem compare 1.1805 + void cmpptr(Address src1, AddressLiteral src2); 1.1806 + 1.1807 + void cmpptr(Register src1, AddressLiteral src2); 1.1808 + 1.1809 + void cmpptr(Register src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } 1.1810 + void cmpptr(Register src1, Address src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } 1.1811 + // void cmpptr(Address src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } 1.1812 + 1.1813 + void cmpptr(Register src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } 1.1814 + void cmpptr(Address src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } 1.1815 + 1.1816 + // cmp64 to avoild hiding cmpq 1.1817 + void cmp64(Register src1, AddressLiteral src); 1.1818 + 1.1819 + void cmpxchgptr(Register reg, Address adr); 1.1820 + 1.1821 + void locked_cmpxchgptr(Register reg, AddressLiteral adr); 1.1822 + 1.1823 + 1.1824 + void imulptr(Register dst, Register src) { LP64_ONLY(imulq(dst, src)) NOT_LP64(imull(dst, src)); } 1.1825 + 1.1826 + 1.1827 + void negptr(Register dst) { LP64_ONLY(negq(dst)) NOT_LP64(negl(dst)); } 1.1828 + 1.1829 + void notptr(Register dst) { LP64_ONLY(notq(dst)) NOT_LP64(notl(dst)); } 1.1830 + 1.1831 + void shlptr(Register dst, int32_t shift); 1.1832 + void shlptr(Register dst) { LP64_ONLY(shlq(dst)) NOT_LP64(shll(dst)); } 1.1833 + 1.1834 + void shrptr(Register dst, int32_t shift); 1.1835 + void shrptr(Register dst) { LP64_ONLY(shrq(dst)) NOT_LP64(shrl(dst)); } 1.1836 + 1.1837 + void sarptr(Register dst) { LP64_ONLY(sarq(dst)) NOT_LP64(sarl(dst)); } 1.1838 + void sarptr(Register dst, int32_t src) { LP64_ONLY(sarq(dst, src)) NOT_LP64(sarl(dst, src)); } 1.1839 + 1.1840 + void subptr(Address dst, int32_t src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); } 1.1841 + 1.1842 + void subptr(Register dst, Address src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); } 1.1843 + void subptr(Register dst, int32_t src); 1.1844 + void subptr(Register dst, Register src); 1.1845 + 1.1846 + 1.1847 + void sbbptr(Address dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); } 1.1848 + void sbbptr(Register dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); } 1.1849 + 1.1850 + void xchgptr(Register src1, Register src2) { LP64_ONLY(xchgq(src1, src2)) NOT_LP64(xchgl(src1, src2)) ; } 1.1851 + void xchgptr(Register src1, Address src2) { LP64_ONLY(xchgq(src1, src2)) NOT_LP64(xchgl(src1, src2)) ; } 1.1852 + 1.1853 + void xaddptr(Address src1, Register src2) { LP64_ONLY(xaddq(src1, src2)) NOT_LP64(xaddl(src1, src2)) ; } 1.1854 + 1.1855 + 1.1856 + 1.1857 + // Helper functions for statistics gathering. 1.1858 + // Conditionally (atomically, on MPs) increments passed counter address, preserving condition codes. 1.1859 + void cond_inc32(Condition cond, AddressLiteral counter_addr); 1.1860 + // Unconditional atomic increment. 1.1861 + void atomic_incl(AddressLiteral counter_addr); 1.1862 + 1.1863 + void lea(Register dst, AddressLiteral adr); 1.1864 + void lea(Address dst, AddressLiteral adr); 1.1865 + void lea(Register dst, Address adr) { Assembler::lea(dst, adr); } 1.1866 + 1.1867 + void leal32(Register dst, Address src) { leal(dst, src); } 1.1868 + 1.1869 + void test32(Register src1, AddressLiteral src2); 1.1870 + 1.1871 + void orptr(Register dst, Address src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); } 1.1872 + void orptr(Register dst, Register src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); } 1.1873 + void orptr(Register dst, int32_t src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); } 1.1874 + 1.1875 + void testptr(Register src, int32_t imm32) { LP64_ONLY(testq(src, imm32)) NOT_LP64(testl(src, imm32)); } 1.1876 + void testptr(Register src1, Register src2); 1.1877 + 1.1878 + void xorptr(Register dst, Register src) { LP64_ONLY(xorq(dst, src)) NOT_LP64(xorl(dst, src)); } 1.1879 + void xorptr(Register dst, Address src) { LP64_ONLY(xorq(dst, src)) NOT_LP64(xorl(dst, src)); } 1.1880 + 1.1881 + // Calls 1.1882 + 1.1883 + void call(Label& L, relocInfo::relocType rtype); 1.1884 + void call(Register entry); 1.1885 + 1.1886 + // NOTE: this call tranfers to the effective address of entry NOT 1.1887 + // the address contained by entry. This is because this is more natural 1.1888 + // for jumps/calls. 1.1889 + void call(AddressLiteral entry); 1.1890 + 1.1891 + // Jumps 1.1892 + 1.1893 + // NOTE: these jumps tranfer to the effective address of dst NOT 1.1894 + // the address contained by dst. This is because this is more natural 1.1895 + // for jumps/calls. 1.1896 + void jump(AddressLiteral dst); 1.1897 + void jump_cc(Condition cc, AddressLiteral dst); 1.1898 + 1.1899 + // 32bit can do a case table jump in one instruction but we no longer allow the base 1.1900 + // to be installed in the Address class. This jump will tranfers to the address 1.1901 + // contained in the location described by entry (not the address of entry) 1.1902 + void jump(ArrayAddress entry); 1.1903 + 1.1904 + // Floating 1.1905 + 1.1906 + void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); } 1.1907 + void andpd(XMMRegister dst, AddressLiteral src); 1.1908 + 1.1909 + void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); } 1.1910 + void comiss(XMMRegister dst, AddressLiteral src); 1.1911 + 1.1912 + void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); } 1.1913 + void comisd(XMMRegister dst, AddressLiteral src); 1.1914 + 1.1915 + void fldcw(Address src) { Assembler::fldcw(src); } 1.1916 + void fldcw(AddressLiteral src); 1.1917 + 1.1918 + void fld_s(int index) { Assembler::fld_s(index); } 1.1919 + void fld_s(Address src) { Assembler::fld_s(src); } 1.1920 + void fld_s(AddressLiteral src); 1.1921 + 1.1922 + void fld_d(Address src) { Assembler::fld_d(src); } 1.1923 + void fld_d(AddressLiteral src); 1.1924 + 1.1925 + void fld_x(Address src) { Assembler::fld_x(src); } 1.1926 + void fld_x(AddressLiteral src); 1.1927 + 1.1928 + void ldmxcsr(Address src) { Assembler::ldmxcsr(src); } 1.1929 + void ldmxcsr(AddressLiteral src); 1.1930 + 1.1931 +private: 1.1932 + // these are private because users should be doing movflt/movdbl 1.1933 + 1.1934 + void movss(Address dst, XMMRegister src) { Assembler::movss(dst, src); } 1.1935 + void movss(XMMRegister dst, XMMRegister src) { Assembler::movss(dst, src); } 1.1936 + void movss(XMMRegister dst, Address src) { Assembler::movss(dst, src); } 1.1937 + void movss(XMMRegister dst, AddressLiteral src); 1.1938 + 1.1939 + void movlpd(XMMRegister dst, Address src) {Assembler::movlpd(dst, src); } 1.1940 + void movlpd(XMMRegister dst, AddressLiteral src); 1.1941 + 1.1942 +public: 1.1943 + 1.1944 + void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); } 1.1945 + void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); } 1.1946 + void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); } 1.1947 + void movsd(XMMRegister dst, AddressLiteral src); 1.1948 + 1.1949 + void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); } 1.1950 + void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); } 1.1951 + void ucomiss(XMMRegister dst, AddressLiteral src); 1.1952 + 1.1953 + void ucomisd(XMMRegister dst, XMMRegister src) { Assembler::ucomisd(dst, src); } 1.1954 + void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); } 1.1955 + void ucomisd(XMMRegister dst, AddressLiteral src); 1.1956 + 1.1957 + // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values 1.1958 + void xorpd(XMMRegister dst, XMMRegister src) { Assembler::xorpd(dst, src); } 1.1959 + void xorpd(XMMRegister dst, Address src) { Assembler::xorpd(dst, src); } 1.1960 + void xorpd(XMMRegister dst, AddressLiteral src); 1.1961 + 1.1962 + // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values 1.1963 + void xorps(XMMRegister dst, XMMRegister src) { Assembler::xorps(dst, src); } 1.1964 + void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); } 1.1965 + void xorps(XMMRegister dst, AddressLiteral src); 1.1966 + 1.1967 + // Data 1.1968 + 1.1969 + void cmov(Condition cc, Register dst, Register src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmovl(cc, dst, src)); } 1.1970 + 1.1971 + void cmovptr(Condition cc, Register dst, Address src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmovl(cc, dst, src)); } 1.1972 + void cmovptr(Condition cc, Register dst, Register src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmovl(cc, dst, src)); } 1.1973 + 1.1974 + void movoop(Register dst, jobject obj); 1.1975 + void movoop(Address dst, jobject obj); 1.1976 + 1.1977 + void movptr(ArrayAddress dst, Register src); 1.1978 + // can this do an lea? 1.1979 + void movptr(Register dst, ArrayAddress src); 1.1980 + 1.1981 + void movptr(Register dst, Address src); 1.1982 + 1.1983 + void movptr(Register dst, AddressLiteral src); 1.1984 + 1.1985 + void movptr(Register dst, intptr_t src); 1.1986 + void movptr(Register dst, Register src); 1.1987 + void movptr(Address dst, intptr_t src); 1.1988 + 1.1989 + void movptr(Address dst, Register src); 1.1990 + 1.1991 +#ifdef _LP64 1.1992 + // Generally the next two are only used for moving NULL 1.1993 + // Although there are situations in initializing the mark word where 1.1994 + // they could be used. They are dangerous. 1.1995 + 1.1996 + // They only exist on LP64 so that int32_t and intptr_t are not the same 1.1997 + // and we have ambiguous declarations. 1.1998 + 1.1999 + void movptr(Address dst, int32_t imm32); 1.2000 + void movptr(Register dst, int32_t imm32); 1.2001 +#endif // _LP64 1.2002 + 1.2003 + // to avoid hiding movl 1.2004 + void mov32(AddressLiteral dst, Register src); 1.2005 + void mov32(Register dst, AddressLiteral src); 1.2006 + 1.2007 + // to avoid hiding movb 1.2008 + void movbyte(ArrayAddress dst, int src); 1.2009 + 1.2010 + // Can push value or effective address 1.2011 + void pushptr(AddressLiteral src); 1.2012 + 1.2013 + void pushptr(Address src) { LP64_ONLY(pushq(src)) NOT_LP64(pushl(src)); } 1.2014 + void popptr(Address src) { LP64_ONLY(popq(src)) NOT_LP64(popl(src)); } 1.2015 + 1.2016 + void pushoop(jobject obj); 1.2017 + 1.2018 + // sign extend as need a l to ptr sized element 1.2019 + void movl2ptr(Register dst, Address src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(movl(dst, src)); } 1.2020 + void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(if (dst != src) movl(dst, src)); } 1.2021 + 1.2022 + 1.2023 +#undef VIRTUAL 1.2024 + 1.2025 +}; 1.2026 + 1.2027 +/** 1.2028 + * class SkipIfEqual: 1.2029 + * 1.2030 + * Instantiating this class will result in assembly code being output that will 1.2031 + * jump around any code emitted between the creation of the instance and it's 1.2032 + * automatic destruction at the end of a scope block, depending on the value of 1.2033 + * the flag passed to the constructor, which will be checked at run-time. 1.2034 + */ 1.2035 +class SkipIfEqual { 1.2036 + private: 1.2037 + MacroAssembler* _masm; 1.2038 + Label _label; 1.2039 + 1.2040 + public: 1.2041 + SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value); 1.2042 + ~SkipIfEqual(); 1.2043 +}; 1.2044 + 1.2045 +#ifdef ASSERT 1.2046 +inline bool AbstractAssembler::pd_check_instruction_mark() { return true; } 1.2047 +#endif