aoqi@0: /* aoqi@0: * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. aoqi@0: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. aoqi@0: * aoqi@0: * This code is free software; you can redistribute it and/or modify it aoqi@0: * under the terms of the GNU General Public License version 2 only, as aoqi@0: * published by the Free Software Foundation. aoqi@0: * aoqi@0: * This code is distributed in the hope that it will be useful, but WITHOUT aoqi@0: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or aoqi@0: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License aoqi@0: * version 2 for more details (a copy is included in the LICENSE file that aoqi@0: * accompanied this code). aoqi@0: * aoqi@0: * You should have received a copy of the GNU General Public License version aoqi@0: * 2 along with this work; if not, write to the Free Software Foundation, aoqi@0: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. aoqi@0: * aoqi@0: * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA aoqi@0: * or visit www.oracle.com if you need additional information or have any aoqi@0: * questions. aoqi@0: * aoqi@0: */ aoqi@0: aoqi@0: #ifndef CPU_X86_VM_ASSEMBLER_X86_HPP aoqi@0: #define CPU_X86_VM_ASSEMBLER_X86_HPP aoqi@0: aoqi@0: #include "asm/register.hpp" aoqi@0: aoqi@0: class BiasedLockingCounters; aoqi@0: aoqi@0: // Contains all the definitions needed for x86 assembly code generation. aoqi@0: aoqi@0: // Calling convention aoqi@0: class Argument VALUE_OBJ_CLASS_SPEC { aoqi@0: public: aoqi@0: enum { aoqi@0: #ifdef _LP64 aoqi@0: #ifdef _WIN64 aoqi@0: n_int_register_parameters_c = 4, // rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...) aoqi@0: n_float_register_parameters_c = 4, // xmm0 - xmm3 (c_farg0, c_farg1, ... ) aoqi@0: #else aoqi@0: n_int_register_parameters_c = 6, // rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...) aoqi@0: n_float_register_parameters_c = 8, // xmm0 - xmm7 (c_farg0, c_farg1, ... ) aoqi@0: #endif // _WIN64 aoqi@0: n_int_register_parameters_j = 6, // j_rarg0, j_rarg1, ... aoqi@0: n_float_register_parameters_j = 8 // j_farg0, j_farg1, ... aoqi@0: #else aoqi@0: n_register_parameters = 0 // 0 registers used to pass arguments aoqi@0: #endif // _LP64 aoqi@0: }; aoqi@0: }; aoqi@0: aoqi@0: aoqi@0: #ifdef _LP64 aoqi@0: // Symbolically name the register arguments used by the c calling convention. aoqi@0: // Windows is different from linux/solaris. So much for standards... aoqi@0: aoqi@0: #ifdef _WIN64 aoqi@0: aoqi@0: REGISTER_DECLARATION(Register, c_rarg0, rcx); aoqi@0: REGISTER_DECLARATION(Register, c_rarg1, rdx); aoqi@0: REGISTER_DECLARATION(Register, c_rarg2, r8); aoqi@0: REGISTER_DECLARATION(Register, c_rarg3, r9); aoqi@0: aoqi@0: REGISTER_DECLARATION(XMMRegister, c_farg0, xmm0); aoqi@0: REGISTER_DECLARATION(XMMRegister, c_farg1, xmm1); aoqi@0: REGISTER_DECLARATION(XMMRegister, c_farg2, xmm2); aoqi@0: REGISTER_DECLARATION(XMMRegister, c_farg3, xmm3); aoqi@0: aoqi@0: #else aoqi@0: aoqi@0: REGISTER_DECLARATION(Register, c_rarg0, rdi); aoqi@0: REGISTER_DECLARATION(Register, c_rarg1, rsi); aoqi@0: REGISTER_DECLARATION(Register, c_rarg2, rdx); aoqi@0: REGISTER_DECLARATION(Register, c_rarg3, rcx); aoqi@0: REGISTER_DECLARATION(Register, c_rarg4, r8); aoqi@0: REGISTER_DECLARATION(Register, c_rarg5, r9); aoqi@0: aoqi@0: REGISTER_DECLARATION(XMMRegister, c_farg0, xmm0); aoqi@0: REGISTER_DECLARATION(XMMRegister, c_farg1, xmm1); aoqi@0: REGISTER_DECLARATION(XMMRegister, c_farg2, xmm2); aoqi@0: REGISTER_DECLARATION(XMMRegister, c_farg3, xmm3); aoqi@0: REGISTER_DECLARATION(XMMRegister, c_farg4, xmm4); aoqi@0: REGISTER_DECLARATION(XMMRegister, c_farg5, xmm5); aoqi@0: REGISTER_DECLARATION(XMMRegister, c_farg6, xmm6); aoqi@0: REGISTER_DECLARATION(XMMRegister, c_farg7, xmm7); aoqi@0: aoqi@0: #endif // _WIN64 aoqi@0: aoqi@0: // Symbolically name the register arguments used by the Java calling convention. aoqi@0: // We have control over the convention for java so we can do what we please. aoqi@0: // What pleases us is to offset the java calling convention so that when aoqi@0: // we call a suitable jni method the arguments are lined up and we don't aoqi@0: // have to do little shuffling. A suitable jni method is non-static and a aoqi@0: // small number of arguments (two fewer args on windows) aoqi@0: // aoqi@0: // |-------------------------------------------------------| aoqi@0: // | c_rarg0 c_rarg1 c_rarg2 c_rarg3 c_rarg4 c_rarg5 | aoqi@0: // |-------------------------------------------------------| aoqi@0: // | rcx rdx r8 r9 rdi* rsi* | windows (* not a c_rarg) aoqi@0: // | rdi rsi rdx rcx r8 r9 | solaris/linux aoqi@0: // |-------------------------------------------------------| aoqi@0: // | j_rarg5 j_rarg0 j_rarg1 j_rarg2 j_rarg3 j_rarg4 | aoqi@0: // |-------------------------------------------------------| aoqi@0: aoqi@0: REGISTER_DECLARATION(Register, j_rarg0, c_rarg1); aoqi@0: REGISTER_DECLARATION(Register, j_rarg1, c_rarg2); aoqi@0: REGISTER_DECLARATION(Register, j_rarg2, c_rarg3); aoqi@0: // Windows runs out of register args here aoqi@0: #ifdef _WIN64 aoqi@0: REGISTER_DECLARATION(Register, j_rarg3, rdi); aoqi@0: REGISTER_DECLARATION(Register, j_rarg4, rsi); aoqi@0: #else aoqi@0: REGISTER_DECLARATION(Register, j_rarg3, c_rarg4); aoqi@0: REGISTER_DECLARATION(Register, j_rarg4, c_rarg5); aoqi@0: #endif /* _WIN64 */ aoqi@0: REGISTER_DECLARATION(Register, j_rarg5, c_rarg0); aoqi@0: aoqi@0: REGISTER_DECLARATION(XMMRegister, j_farg0, xmm0); aoqi@0: REGISTER_DECLARATION(XMMRegister, j_farg1, xmm1); aoqi@0: REGISTER_DECLARATION(XMMRegister, j_farg2, xmm2); aoqi@0: REGISTER_DECLARATION(XMMRegister, j_farg3, xmm3); aoqi@0: REGISTER_DECLARATION(XMMRegister, j_farg4, xmm4); aoqi@0: REGISTER_DECLARATION(XMMRegister, j_farg5, xmm5); aoqi@0: REGISTER_DECLARATION(XMMRegister, j_farg6, xmm6); aoqi@0: REGISTER_DECLARATION(XMMRegister, j_farg7, xmm7); aoqi@0: aoqi@0: REGISTER_DECLARATION(Register, rscratch1, r10); // volatile aoqi@0: REGISTER_DECLARATION(Register, rscratch2, r11); // volatile aoqi@0: aoqi@0: REGISTER_DECLARATION(Register, r12_heapbase, r12); // callee-saved aoqi@0: REGISTER_DECLARATION(Register, r15_thread, r15); // callee-saved aoqi@0: aoqi@0: #else aoqi@0: // rscratch1 will apear in 32bit code that is dead but of course must compile aoqi@0: // Using noreg ensures if the dead code is incorrectly live and executed it aoqi@0: // will cause an assertion failure aoqi@0: #define rscratch1 noreg aoqi@0: #define rscratch2 noreg aoqi@0: aoqi@0: #endif // _LP64 aoqi@0: aoqi@0: // JSR 292 fixed register usages: aoqi@0: REGISTER_DECLARATION(Register, rbp_mh_SP_save, rbp); aoqi@0: aoqi@0: // Address is an abstraction used to represent a memory location aoqi@0: // using any of the amd64 addressing modes with one object. aoqi@0: // aoqi@0: // Note: A register location is represented via a Register, not aoqi@0: // via an address for efficiency & simplicity reasons. aoqi@0: aoqi@0: class ArrayAddress; aoqi@0: aoqi@0: class Address VALUE_OBJ_CLASS_SPEC { aoqi@0: public: aoqi@0: enum ScaleFactor { aoqi@0: no_scale = -1, aoqi@0: times_1 = 0, aoqi@0: times_2 = 1, aoqi@0: times_4 = 2, aoqi@0: times_8 = 3, aoqi@0: times_ptr = LP64_ONLY(times_8) NOT_LP64(times_4) aoqi@0: }; aoqi@0: static ScaleFactor times(int size) { aoqi@0: assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size"); aoqi@0: if (size == 8) return times_8; aoqi@0: if (size == 4) return times_4; aoqi@0: if (size == 2) return times_2; aoqi@0: return times_1; aoqi@0: } aoqi@0: static int scale_size(ScaleFactor scale) { aoqi@0: assert(scale != no_scale, ""); aoqi@0: assert(((1 << (int)times_1) == 1 && aoqi@0: (1 << (int)times_2) == 2 && aoqi@0: (1 << (int)times_4) == 4 && aoqi@0: (1 << (int)times_8) == 8), ""); aoqi@0: return (1 << (int)scale); aoqi@0: } aoqi@0: aoqi@0: private: aoqi@0: Register _base; aoqi@0: Register _index; aoqi@0: ScaleFactor _scale; aoqi@0: int _disp; aoqi@0: RelocationHolder _rspec; aoqi@0: aoqi@0: // Easily misused constructors make them private aoqi@0: // %%% can we make these go away? aoqi@0: NOT_LP64(Address(address loc, RelocationHolder spec);) aoqi@0: Address(int disp, address loc, relocInfo::relocType rtype); aoqi@0: Address(int disp, address loc, RelocationHolder spec); aoqi@0: aoqi@0: public: aoqi@0: aoqi@0: int disp() { return _disp; } aoqi@0: // creation aoqi@0: Address() aoqi@0: : _base(noreg), aoqi@0: _index(noreg), aoqi@0: _scale(no_scale), aoqi@0: _disp(0) { aoqi@0: } aoqi@0: aoqi@0: // No default displacement otherwise Register can be implicitly aoqi@0: // converted to 0(Register) which is quite a different animal. aoqi@0: aoqi@0: Address(Register base, int disp) aoqi@0: : _base(base), aoqi@0: _index(noreg), aoqi@0: _scale(no_scale), aoqi@0: _disp(disp) { aoqi@0: } aoqi@0: aoqi@0: Address(Register base, Register index, ScaleFactor scale, int disp = 0) aoqi@0: : _base (base), aoqi@0: _index(index), aoqi@0: _scale(scale), aoqi@0: _disp (disp) { aoqi@0: assert(!index->is_valid() == (scale == Address::no_scale), aoqi@0: "inconsistent address"); aoqi@0: } aoqi@0: aoqi@0: Address(Register base, RegisterOrConstant index, ScaleFactor scale = times_1, int disp = 0) aoqi@0: : _base (base), aoqi@0: _index(index.register_or_noreg()), aoqi@0: _scale(scale), aoqi@0: _disp (disp + (index.constant_or_zero() * scale_size(scale))) { aoqi@0: if (!index.is_register()) scale = Address::no_scale; aoqi@0: assert(!_index->is_valid() == (scale == Address::no_scale), aoqi@0: "inconsistent address"); aoqi@0: } aoqi@0: aoqi@0: Address plus_disp(int disp) const { aoqi@0: Address a = (*this); aoqi@0: a._disp += disp; aoqi@0: return a; aoqi@0: } aoqi@0: Address plus_disp(RegisterOrConstant disp, ScaleFactor scale = times_1) const { aoqi@0: Address a = (*this); aoqi@0: a._disp += disp.constant_or_zero() * scale_size(scale); aoqi@0: if (disp.is_register()) { aoqi@0: assert(!a.index()->is_valid(), "competing indexes"); aoqi@0: a._index = disp.as_register(); aoqi@0: a._scale = scale; aoqi@0: } aoqi@0: return a; aoqi@0: } aoqi@0: bool is_same_address(Address a) const { aoqi@0: // disregard _rspec aoqi@0: return _base == a._base && _disp == a._disp && _index == a._index && _scale == a._scale; aoqi@0: } aoqi@0: aoqi@0: // The following two overloads are used in connection with the aoqi@0: // ByteSize type (see sizes.hpp). They simplify the use of aoqi@0: // ByteSize'd arguments in assembly code. Note that their equivalent aoqi@0: // for the optimized build are the member functions with int disp aoqi@0: // argument since ByteSize is mapped to an int type in that case. aoqi@0: // aoqi@0: // Note: DO NOT introduce similar overloaded functions for WordSize aoqi@0: // arguments as in the optimized mode, both ByteSize and WordSize aoqi@0: // are mapped to the same type and thus the compiler cannot make a aoqi@0: // distinction anymore (=> compiler errors). aoqi@0: aoqi@0: #ifdef ASSERT aoqi@0: Address(Register base, ByteSize disp) aoqi@0: : _base(base), aoqi@0: _index(noreg), aoqi@0: _scale(no_scale), aoqi@0: _disp(in_bytes(disp)) { aoqi@0: } aoqi@0: aoqi@0: Address(Register base, Register index, ScaleFactor scale, ByteSize disp) aoqi@0: : _base(base), aoqi@0: _index(index), aoqi@0: _scale(scale), aoqi@0: _disp(in_bytes(disp)) { aoqi@0: assert(!index->is_valid() == (scale == Address::no_scale), aoqi@0: "inconsistent address"); aoqi@0: } aoqi@0: aoqi@0: Address(Register base, RegisterOrConstant index, ScaleFactor scale, ByteSize disp) aoqi@0: : _base (base), aoqi@0: _index(index.register_or_noreg()), aoqi@0: _scale(scale), aoqi@0: _disp (in_bytes(disp) + (index.constant_or_zero() * scale_size(scale))) { aoqi@0: if (!index.is_register()) scale = Address::no_scale; aoqi@0: assert(!_index->is_valid() == (scale == Address::no_scale), aoqi@0: "inconsistent address"); aoqi@0: } aoqi@0: aoqi@0: #endif // ASSERT aoqi@0: aoqi@0: // accessors aoqi@0: bool uses(Register reg) const { return _base == reg || _index == reg; } aoqi@0: Register base() const { return _base; } aoqi@0: Register index() const { return _index; } aoqi@0: ScaleFactor scale() const { return _scale; } aoqi@0: int disp() const { return _disp; } aoqi@0: aoqi@0: // Convert the raw encoding form into the form expected by the constructor for aoqi@0: // Address. An index of 4 (rsp) corresponds to having no index, so convert aoqi@0: // that to noreg for the Address constructor. aoqi@0: static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc); aoqi@0: aoqi@0: static Address make_array(ArrayAddress); aoqi@0: aoqi@0: private: aoqi@0: bool base_needs_rex() const { aoqi@0: return _base != noreg && _base->encoding() >= 8; aoqi@0: } aoqi@0: aoqi@0: bool index_needs_rex() const { aoqi@0: return _index != noreg &&_index->encoding() >= 8; aoqi@0: } aoqi@0: aoqi@0: relocInfo::relocType reloc() const { return _rspec.type(); } aoqi@0: aoqi@0: friend class Assembler; aoqi@0: friend class MacroAssembler; aoqi@0: friend class LIR_Assembler; // base/index/scale/disp aoqi@0: }; aoqi@0: aoqi@0: // aoqi@0: // AddressLiteral has been split out from Address because operands of this type aoqi@0: // need to be treated specially on 32bit vs. 64bit platforms. By splitting it out aoqi@0: // the few instructions that need to deal with address literals are unique and the aoqi@0: // MacroAssembler does not have to implement every instruction in the Assembler aoqi@0: // in order to search for address literals that may need special handling depending aoqi@0: // on the instruction and the platform. As small step on the way to merging i486/amd64 aoqi@0: // directories. aoqi@0: // aoqi@0: class AddressLiteral VALUE_OBJ_CLASS_SPEC { aoqi@0: friend class ArrayAddress; aoqi@0: RelocationHolder _rspec; aoqi@0: // Typically we use AddressLiterals we want to use their rval aoqi@0: // However in some situations we want the lval (effect address) of the item. aoqi@0: // We provide a special factory for making those lvals. aoqi@0: bool _is_lval; aoqi@0: aoqi@0: // If the target is far we'll need to load the ea of this to aoqi@0: // a register to reach it. Otherwise if near we can do rip aoqi@0: // relative addressing. aoqi@0: aoqi@0: address _target; aoqi@0: aoqi@0: protected: aoqi@0: // creation aoqi@0: AddressLiteral() aoqi@0: : _is_lval(false), aoqi@0: _target(NULL) aoqi@0: {} aoqi@0: aoqi@0: public: aoqi@0: aoqi@0: aoqi@0: AddressLiteral(address target, relocInfo::relocType rtype); aoqi@0: aoqi@0: AddressLiteral(address target, RelocationHolder const& rspec) aoqi@0: : _rspec(rspec), aoqi@0: _is_lval(false), aoqi@0: _target(target) aoqi@0: {} aoqi@0: aoqi@0: AddressLiteral addr() { aoqi@0: AddressLiteral ret = *this; aoqi@0: ret._is_lval = true; aoqi@0: return ret; aoqi@0: } aoqi@0: aoqi@0: aoqi@0: private: aoqi@0: aoqi@0: address target() { return _target; } aoqi@0: bool is_lval() { return _is_lval; } aoqi@0: aoqi@0: relocInfo::relocType reloc() const { return _rspec.type(); } aoqi@0: const RelocationHolder& rspec() const { return _rspec; } aoqi@0: aoqi@0: friend class Assembler; aoqi@0: friend class MacroAssembler; aoqi@0: friend class Address; aoqi@0: friend class LIR_Assembler; aoqi@0: }; aoqi@0: aoqi@0: // Convience classes aoqi@0: class RuntimeAddress: public AddressLiteral { aoqi@0: aoqi@0: public: aoqi@0: aoqi@0: RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {} aoqi@0: aoqi@0: }; aoqi@0: aoqi@0: class ExternalAddress: public AddressLiteral { aoqi@0: private: aoqi@0: static relocInfo::relocType reloc_for_target(address target) { aoqi@0: // Sometimes ExternalAddress is used for values which aren't aoqi@0: // exactly addresses, like the card table base. aoqi@0: // external_word_type can't be used for values in the first page aoqi@0: // so just skip the reloc in that case. aoqi@0: return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none; aoqi@0: } aoqi@0: aoqi@0: public: aoqi@0: aoqi@0: ExternalAddress(address target) : AddressLiteral(target, reloc_for_target(target)) {} aoqi@0: aoqi@0: }; aoqi@0: aoqi@0: class InternalAddress: public AddressLiteral { aoqi@0: aoqi@0: public: aoqi@0: aoqi@0: InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {} aoqi@0: aoqi@0: }; aoqi@0: aoqi@0: // x86 can do array addressing as a single operation since disp can be an absolute aoqi@0: // address amd64 can't. We create a class that expresses the concept but does extra aoqi@0: // magic on amd64 to get the final result aoqi@0: aoqi@0: class ArrayAddress VALUE_OBJ_CLASS_SPEC { aoqi@0: private: aoqi@0: aoqi@0: AddressLiteral _base; aoqi@0: Address _index; aoqi@0: aoqi@0: public: aoqi@0: aoqi@0: ArrayAddress() {}; aoqi@0: ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {}; aoqi@0: AddressLiteral base() { return _base; } aoqi@0: Address index() { return _index; } aoqi@0: aoqi@0: }; aoqi@0: aoqi@0: const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY( 512 / wordSize); aoqi@0: aoqi@0: // The Intel x86/Amd64 Assembler: Pure assembler doing NO optimizations on the instruction aoqi@0: // level (e.g. mov rax, 0 is not translated into xor rax, rax!); i.e., what you write aoqi@0: // is what you get. The Assembler is generating code into a CodeBuffer. aoqi@0: aoqi@0: class Assembler : public AbstractAssembler { aoqi@0: friend class AbstractAssembler; // for the non-virtual hack aoqi@0: friend class LIR_Assembler; // as_Address() aoqi@0: friend class StubGenerator; aoqi@0: aoqi@0: public: aoqi@0: enum Condition { // The x86 condition codes used for conditional jumps/moves. aoqi@0: zero = 0x4, aoqi@0: notZero = 0x5, aoqi@0: equal = 0x4, aoqi@0: notEqual = 0x5, aoqi@0: less = 0xc, aoqi@0: lessEqual = 0xe, aoqi@0: greater = 0xf, aoqi@0: greaterEqual = 0xd, aoqi@0: below = 0x2, aoqi@0: belowEqual = 0x6, aoqi@0: above = 0x7, aoqi@0: aboveEqual = 0x3, aoqi@0: overflow = 0x0, aoqi@0: noOverflow = 0x1, aoqi@0: carrySet = 0x2, aoqi@0: carryClear = 0x3, aoqi@0: negative = 0x8, aoqi@0: positive = 0x9, aoqi@0: parity = 0xa, aoqi@0: noParity = 0xb aoqi@0: }; aoqi@0: aoqi@0: enum Prefix { aoqi@0: // segment overrides aoqi@0: CS_segment = 0x2e, aoqi@0: SS_segment = 0x36, aoqi@0: DS_segment = 0x3e, aoqi@0: ES_segment = 0x26, aoqi@0: FS_segment = 0x64, aoqi@0: GS_segment = 0x65, aoqi@0: aoqi@0: REX = 0x40, aoqi@0: aoqi@0: REX_B = 0x41, aoqi@0: REX_X = 0x42, aoqi@0: REX_XB = 0x43, aoqi@0: REX_R = 0x44, aoqi@0: REX_RB = 0x45, aoqi@0: REX_RX = 0x46, aoqi@0: REX_RXB = 0x47, aoqi@0: aoqi@0: REX_W = 0x48, aoqi@0: aoqi@0: REX_WB = 0x49, aoqi@0: REX_WX = 0x4A, aoqi@0: REX_WXB = 0x4B, aoqi@0: REX_WR = 0x4C, aoqi@0: REX_WRB = 0x4D, aoqi@0: REX_WRX = 0x4E, aoqi@0: REX_WRXB = 0x4F, aoqi@0: aoqi@0: VEX_3bytes = 0xC4, aoqi@0: VEX_2bytes = 0xC5 aoqi@0: }; aoqi@0: aoqi@0: enum VexPrefix { aoqi@0: VEX_B = 0x20, aoqi@0: VEX_X = 0x40, aoqi@0: VEX_R = 0x80, aoqi@0: VEX_W = 0x80 aoqi@0: }; aoqi@0: aoqi@0: enum VexSimdPrefix { aoqi@0: VEX_SIMD_NONE = 0x0, aoqi@0: VEX_SIMD_66 = 0x1, aoqi@0: VEX_SIMD_F3 = 0x2, aoqi@0: VEX_SIMD_F2 = 0x3 aoqi@0: }; aoqi@0: aoqi@0: enum VexOpcode { aoqi@0: VEX_OPCODE_NONE = 0x0, aoqi@0: VEX_OPCODE_0F = 0x1, aoqi@0: VEX_OPCODE_0F_38 = 0x2, aoqi@0: VEX_OPCODE_0F_3A = 0x3 aoqi@0: }; aoqi@0: aoqi@0: enum WhichOperand { aoqi@0: // input to locate_operand, and format code for relocations aoqi@0: imm_operand = 0, // embedded 32-bit|64-bit immediate operand aoqi@0: disp32_operand = 1, // embedded 32-bit displacement or address aoqi@0: call32_operand = 2, // embedded 32-bit self-relative displacement aoqi@0: #ifndef _LP64 aoqi@0: _WhichOperand_limit = 3 aoqi@0: #else aoqi@0: narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop aoqi@0: _WhichOperand_limit = 4 aoqi@0: #endif aoqi@0: }; aoqi@0: aoqi@0: aoqi@0: aoqi@0: // NOTE: The general philopsophy of the declarations here is that 64bit versions aoqi@0: // of instructions are freely declared without the need for wrapping them an ifdef. aoqi@0: // (Some dangerous instructions are ifdef's out of inappropriate jvm's.) aoqi@0: // In the .cpp file the implementations are wrapped so that they are dropped out aoqi@0: // of the resulting jvm. This is done mostly to keep the footprint of MINIMAL aoqi@0: // to the size it was prior to merging up the 32bit and 64bit assemblers. aoqi@0: // aoqi@0: // This does mean you'll get a linker/runtime error if you use a 64bit only instruction aoqi@0: // in a 32bit vm. This is somewhat unfortunate but keeps the ifdef noise down. aoqi@0: aoqi@0: private: aoqi@0: aoqi@0: aoqi@0: // 64bit prefixes aoqi@0: int prefix_and_encode(int reg_enc, bool byteinst = false); aoqi@0: int prefixq_and_encode(int reg_enc); aoqi@0: aoqi@0: int prefix_and_encode(int dst_enc, int src_enc, bool byteinst = false); aoqi@0: int prefixq_and_encode(int dst_enc, int src_enc); aoqi@0: aoqi@0: void prefix(Register reg); aoqi@0: void prefix(Address adr); aoqi@0: void prefixq(Address adr); aoqi@0: aoqi@0: void prefix(Address adr, Register reg, bool byteinst = false); aoqi@0: void prefix(Address adr, XMMRegister reg); aoqi@0: void prefixq(Address adr, Register reg); aoqi@0: void prefixq(Address adr, XMMRegister reg); aoqi@0: aoqi@0: void prefetch_prefix(Address src); aoqi@0: aoqi@0: void rex_prefix(Address adr, XMMRegister xreg, aoqi@0: VexSimdPrefix pre, VexOpcode opc, bool rex_w); aoqi@0: int rex_prefix_and_encode(int dst_enc, int src_enc, aoqi@0: VexSimdPrefix pre, VexOpcode opc, bool rex_w); aoqi@0: aoqi@0: void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, aoqi@0: int nds_enc, VexSimdPrefix pre, VexOpcode opc, aoqi@0: bool vector256); aoqi@0: aoqi@0: void vex_prefix(Address adr, int nds_enc, int xreg_enc, aoqi@0: VexSimdPrefix pre, VexOpcode opc, aoqi@0: bool vex_w, bool vector256); aoqi@0: aoqi@0: void vex_prefix(XMMRegister dst, XMMRegister nds, Address src, aoqi@0: VexSimdPrefix pre, bool vector256 = false) { aoqi@0: int dst_enc = dst->encoding(); aoqi@0: int nds_enc = nds->is_valid() ? nds->encoding() : 0; aoqi@0: vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector256); aoqi@0: } aoqi@0: aoqi@0: void vex_prefix_0F38(Register dst, Register nds, Address src) { aoqi@0: bool vex_w = false; aoqi@0: bool vector256 = false; aoqi@0: vex_prefix(src, nds->encoding(), dst->encoding(), aoqi@0: VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256); aoqi@0: } aoqi@0: aoqi@0: void vex_prefix_0F38_q(Register dst, Register nds, Address src) { aoqi@0: bool vex_w = true; aoqi@0: bool vector256 = false; aoqi@0: vex_prefix(src, nds->encoding(), dst->encoding(), aoqi@0: VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256); aoqi@0: } aoqi@0: int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, aoqi@0: VexSimdPrefix pre, VexOpcode opc, aoqi@0: bool vex_w, bool vector256); aoqi@0: aoqi@0: int vex_prefix_0F38_and_encode(Register dst, Register nds, Register src) { aoqi@0: bool vex_w = false; aoqi@0: bool vector256 = false; aoqi@0: return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), aoqi@0: VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256); aoqi@0: } aoqi@0: int vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src) { aoqi@0: bool vex_w = true; aoqi@0: bool vector256 = false; aoqi@0: return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), aoqi@0: VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256); aoqi@0: } aoqi@0: int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, aoqi@0: VexSimdPrefix pre, bool vector256 = false, aoqi@0: VexOpcode opc = VEX_OPCODE_0F) { aoqi@0: int src_enc = src->encoding(); aoqi@0: int dst_enc = dst->encoding(); aoqi@0: int nds_enc = nds->is_valid() ? nds->encoding() : 0; aoqi@0: return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector256); aoqi@0: } aoqi@0: aoqi@0: void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, aoqi@0: VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F, aoqi@0: bool rex_w = false, bool vector256 = false); aoqi@0: aoqi@0: void simd_prefix(XMMRegister dst, Address src, aoqi@0: VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) { aoqi@0: simd_prefix(dst, xnoreg, src, pre, opc); aoqi@0: } aoqi@0: aoqi@0: void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre) { aoqi@0: simd_prefix(src, dst, pre); aoqi@0: } aoqi@0: void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src, aoqi@0: VexSimdPrefix pre) { aoqi@0: bool rex_w = true; aoqi@0: simd_prefix(dst, nds, src, pre, VEX_OPCODE_0F, rex_w); aoqi@0: } aoqi@0: aoqi@0: int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, aoqi@0: VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F, aoqi@0: bool rex_w = false, bool vector256 = false); aoqi@0: aoqi@0: // Move/convert 32-bit integer value. aoqi@0: int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src, aoqi@0: VexSimdPrefix pre) { aoqi@0: // It is OK to cast from Register to XMMRegister to pass argument here aoqi@0: // since only encoding is used in simd_prefix_and_encode() and number of aoqi@0: // Gen and Xmm registers are the same. aoqi@0: return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre); aoqi@0: } aoqi@0: int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre) { aoqi@0: return simd_prefix_and_encode(dst, xnoreg, src, pre); aoqi@0: } aoqi@0: int simd_prefix_and_encode(Register dst, XMMRegister src, aoqi@0: VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) { aoqi@0: return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc); aoqi@0: } aoqi@0: aoqi@0: // Move/convert 64-bit integer value. aoqi@0: int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src, aoqi@0: VexSimdPrefix pre) { aoqi@0: bool rex_w = true; aoqi@0: return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, VEX_OPCODE_0F, rex_w); aoqi@0: } aoqi@0: int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre) { aoqi@0: return simd_prefix_and_encode_q(dst, xnoreg, src, pre); aoqi@0: } aoqi@0: int simd_prefix_and_encode_q(Register dst, XMMRegister src, aoqi@0: VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) { aoqi@0: bool rex_w = true; aoqi@0: return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc, rex_w); aoqi@0: } aoqi@0: aoqi@0: // Helper functions for groups of instructions aoqi@0: void emit_arith_b(int op1, int op2, Register dst, int imm8); aoqi@0: aoqi@0: void emit_arith(int op1, int op2, Register dst, int32_t imm32); aoqi@0: // Force generation of a 4 byte immediate value even if it fits into 8bit aoqi@0: void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32); aoqi@0: void emit_arith(int op1, int op2, Register dst, Register src); aoqi@0: aoqi@0: void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre); aoqi@0: void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre); aoqi@0: void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre); aoqi@0: void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre); aoqi@0: void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, aoqi@0: Address src, VexSimdPrefix pre, bool vector256); aoqi@0: void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, aoqi@0: XMMRegister src, VexSimdPrefix pre, bool vector256); aoqi@0: aoqi@0: void emit_operand(Register reg, aoqi@0: Register base, Register index, Address::ScaleFactor scale, aoqi@0: int disp, aoqi@0: RelocationHolder const& rspec, aoqi@0: int rip_relative_correction = 0); aoqi@0: aoqi@0: void emit_operand(Register reg, Address adr, int rip_relative_correction = 0); aoqi@0: aoqi@0: // operands that only take the original 32bit registers aoqi@0: void emit_operand32(Register reg, Address adr); aoqi@0: aoqi@0: void emit_operand(XMMRegister reg, aoqi@0: Register base, Register index, Address::ScaleFactor scale, aoqi@0: int disp, aoqi@0: RelocationHolder const& rspec); aoqi@0: aoqi@0: void emit_operand(XMMRegister reg, Address adr); aoqi@0: aoqi@0: void emit_operand(MMXRegister reg, Address adr); aoqi@0: aoqi@0: // workaround gcc (3.2.1-7) bug aoqi@0: void emit_operand(Address adr, MMXRegister reg); aoqi@0: aoqi@0: aoqi@0: // Immediate-to-memory forms aoqi@0: void emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32); aoqi@0: aoqi@0: void emit_farith(int b1, int b2, int i); aoqi@0: aoqi@0: aoqi@0: protected: aoqi@0: #ifdef ASSERT aoqi@0: void check_relocation(RelocationHolder const& rspec, int format); aoqi@0: #endif aoqi@0: aoqi@0: void emit_data(jint data, relocInfo::relocType rtype, int format); aoqi@0: void emit_data(jint data, RelocationHolder const& rspec, int format); aoqi@0: void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0); aoqi@0: void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0); aoqi@0: aoqi@0: bool reachable(AddressLiteral adr) NOT_LP64({ return true;}); aoqi@0: aoqi@0: // These are all easily abused and hence protected aoqi@0: aoqi@0: // 32BIT ONLY SECTION aoqi@0: #ifndef _LP64 aoqi@0: // Make these disappear in 64bit mode since they would never be correct aoqi@0: void cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY aoqi@0: void cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY aoqi@0: aoqi@0: void mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY aoqi@0: void mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY aoqi@0: aoqi@0: void push_literal32(int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY aoqi@0: #else aoqi@0: // 64BIT ONLY SECTION aoqi@0: void mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec); // 64BIT ONLY aoqi@0: aoqi@0: void cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec); aoqi@0: void cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec); aoqi@0: aoqi@0: void mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec); aoqi@0: void mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec); aoqi@0: #endif // _LP64 aoqi@0: aoqi@0: // These are unique in that we are ensured by the caller that the 32bit aoqi@0: // relative in these instructions will always be able to reach the potentially aoqi@0: // 64bit address described by entry. Since they can take a 64bit address they aoqi@0: // don't have the 32 suffix like the other instructions in this class. aoqi@0: aoqi@0: void call_literal(address entry, RelocationHolder const& rspec); aoqi@0: void jmp_literal(address entry, RelocationHolder const& rspec); aoqi@0: aoqi@0: // Avoid using directly section aoqi@0: // Instructions in this section are actually usable by anyone without danger aoqi@0: // of failure but have performance issues that are addressed my enhanced aoqi@0: // instructions which will do the proper thing base on the particular cpu. aoqi@0: // We protect them because we don't trust you... aoqi@0: aoqi@0: // Don't use next inc() and dec() methods directly. INC & DEC instructions aoqi@0: // could cause a partial flag stall since they don't set CF flag. aoqi@0: // Use MacroAssembler::decrement() & MacroAssembler::increment() methods aoqi@0: // which call inc() & dec() or add() & sub() in accordance with aoqi@0: // the product flag UseIncDec value. aoqi@0: aoqi@0: void decl(Register dst); aoqi@0: void decl(Address dst); aoqi@0: void decq(Register dst); aoqi@0: void decq(Address dst); aoqi@0: aoqi@0: void incl(Register dst); aoqi@0: void incl(Address dst); aoqi@0: void incq(Register dst); aoqi@0: void incq(Address dst); aoqi@0: aoqi@0: // New cpus require use of movsd and movss to avoid partial register stall aoqi@0: // when loading from memory. But for old Opteron use movlpd instead of movsd. aoqi@0: // The selection is done in MacroAssembler::movdbl() and movflt(). aoqi@0: aoqi@0: // Move Scalar Single-Precision Floating-Point Values aoqi@0: void movss(XMMRegister dst, Address src); aoqi@0: void movss(XMMRegister dst, XMMRegister src); aoqi@0: void movss(Address dst, XMMRegister src); aoqi@0: aoqi@0: // Move Scalar Double-Precision Floating-Point Values aoqi@0: void movsd(XMMRegister dst, Address src); aoqi@0: void movsd(XMMRegister dst, XMMRegister src); aoqi@0: void movsd(Address dst, XMMRegister src); aoqi@0: void movlpd(XMMRegister dst, Address src); aoqi@0: aoqi@0: // New cpus require use of movaps and movapd to avoid partial register stall aoqi@0: // when moving between registers. aoqi@0: void movaps(XMMRegister dst, XMMRegister src); aoqi@0: void movapd(XMMRegister dst, XMMRegister src); aoqi@0: aoqi@0: // End avoid using directly aoqi@0: aoqi@0: aoqi@0: // Instruction prefixes aoqi@0: void prefix(Prefix p); aoqi@0: aoqi@0: public: aoqi@0: aoqi@0: // Creation aoqi@0: Assembler(CodeBuffer* code) : AbstractAssembler(code) {} aoqi@0: aoqi@0: // Decoding aoqi@0: static address locate_operand(address inst, WhichOperand which); aoqi@0: static address locate_next_instruction(address inst); aoqi@0: aoqi@0: // Utilities aoqi@0: static bool is_polling_page_far() NOT_LP64({ return false;}); aoqi@0: aoqi@0: // Generic instructions aoqi@0: // Does 32bit or 64bit as needed for the platform. In some sense these aoqi@0: // belong in macro assembler but there is no need for both varieties to exist aoqi@0: aoqi@0: void lea(Register dst, Address src); aoqi@0: aoqi@0: void mov(Register dst, Register src); aoqi@0: aoqi@0: void pusha(); aoqi@0: void popa(); aoqi@0: aoqi@0: void pushf(); aoqi@0: void popf(); aoqi@0: aoqi@0: void push(int32_t imm32); aoqi@0: aoqi@0: void push(Register src); aoqi@0: aoqi@0: void pop(Register dst); aoqi@0: aoqi@0: // These are dummies to prevent surprise implicit conversions to Register aoqi@0: void push(void* v); aoqi@0: void pop(void* v); aoqi@0: aoqi@0: // These do register sized moves/scans aoqi@0: void rep_mov(); aoqi@0: void rep_stos(); aoqi@0: void rep_stosb(); aoqi@0: void repne_scan(); aoqi@0: #ifdef _LP64 aoqi@0: void repne_scanl(); aoqi@0: #endif aoqi@0: aoqi@0: // Vanilla instructions in lexical order aoqi@0: aoqi@0: void adcl(Address dst, int32_t imm32); aoqi@0: void adcl(Address dst, Register src); aoqi@0: void adcl(Register dst, int32_t imm32); aoqi@0: void adcl(Register dst, Address src); aoqi@0: void adcl(Register dst, Register src); aoqi@0: aoqi@0: void adcq(Register dst, int32_t imm32); aoqi@0: void adcq(Register dst, Address src); aoqi@0: void adcq(Register dst, Register src); aoqi@0: aoqi@0: void addl(Address dst, int32_t imm32); aoqi@0: void addl(Address dst, Register src); aoqi@0: void addl(Register dst, int32_t imm32); aoqi@0: void addl(Register dst, Address src); aoqi@0: void addl(Register dst, Register src); aoqi@0: aoqi@0: void addq(Address dst, int32_t imm32); aoqi@0: void addq(Address dst, Register src); aoqi@0: void addq(Register dst, int32_t imm32); aoqi@0: void addq(Register dst, Address src); aoqi@0: void addq(Register dst, Register src); aoqi@0: aoqi@0: void addr_nop_4(); aoqi@0: void addr_nop_5(); aoqi@0: void addr_nop_7(); aoqi@0: void addr_nop_8(); aoqi@0: aoqi@0: // Add Scalar Double-Precision Floating-Point Values aoqi@0: void addsd(XMMRegister dst, Address src); aoqi@0: void addsd(XMMRegister dst, XMMRegister src); aoqi@0: aoqi@0: // Add Scalar Single-Precision Floating-Point Values aoqi@0: void addss(XMMRegister dst, Address src); aoqi@0: void addss(XMMRegister dst, XMMRegister src); aoqi@0: aoqi@0: // AES instructions aoqi@0: void aesdec(XMMRegister dst, Address src); aoqi@0: void aesdec(XMMRegister dst, XMMRegister src); aoqi@0: void aesdeclast(XMMRegister dst, Address src); aoqi@0: void aesdeclast(XMMRegister dst, XMMRegister src); aoqi@0: void aesenc(XMMRegister dst, Address src); aoqi@0: void aesenc(XMMRegister dst, XMMRegister src); aoqi@0: void aesenclast(XMMRegister dst, Address src); aoqi@0: void aesenclast(XMMRegister dst, XMMRegister src); aoqi@0: aoqi@0: aoqi@0: void andl(Address dst, int32_t imm32); aoqi@0: void andl(Register dst, int32_t imm32); aoqi@0: void andl(Register dst, Address src); aoqi@0: void andl(Register dst, Register src); aoqi@0: aoqi@0: void andq(Address dst, int32_t imm32); aoqi@0: void andq(Register dst, int32_t imm32); aoqi@0: void andq(Register dst, Address src); aoqi@0: void andq(Register dst, Register src); aoqi@0: aoqi@0: // BMI instructions aoqi@0: void andnl(Register dst, Register src1, Register src2); aoqi@0: void andnl(Register dst, Register src1, Address src2); aoqi@0: void andnq(Register dst, Register src1, Register src2); aoqi@0: void andnq(Register dst, Register src1, Address src2); aoqi@0: aoqi@0: void blsil(Register dst, Register src); aoqi@0: void blsil(Register dst, Address src); aoqi@0: void blsiq(Register dst, Register src); aoqi@0: void blsiq(Register dst, Address src); aoqi@0: aoqi@0: void blsmskl(Register dst, Register src); aoqi@0: void blsmskl(Register dst, Address src); aoqi@0: void blsmskq(Register dst, Register src); aoqi@0: void blsmskq(Register dst, Address src); aoqi@0: aoqi@0: void blsrl(Register dst, Register src); aoqi@0: void blsrl(Register dst, Address src); aoqi@0: void blsrq(Register dst, Register src); aoqi@0: void blsrq(Register dst, Address src); aoqi@0: aoqi@0: void bsfl(Register dst, Register src); aoqi@0: void bsrl(Register dst, Register src); aoqi@0: aoqi@0: #ifdef _LP64 aoqi@0: void bsfq(Register dst, Register src); aoqi@0: void bsrq(Register dst, Register src); aoqi@0: #endif aoqi@0: aoqi@0: void bswapl(Register reg); aoqi@0: aoqi@0: void bswapq(Register reg); aoqi@0: aoqi@0: void call(Label& L, relocInfo::relocType rtype); aoqi@0: void call(Register reg); // push pc; pc <- reg aoqi@0: void call(Address adr); // push pc; pc <- adr aoqi@0: aoqi@0: void cdql(); aoqi@0: aoqi@0: void cdqq(); aoqi@0: aoqi@0: void cld(); aoqi@0: aoqi@0: void clflush(Address adr); aoqi@0: aoqi@0: void cmovl(Condition cc, Register dst, Register src); aoqi@0: void cmovl(Condition cc, Register dst, Address src); aoqi@0: aoqi@0: void cmovq(Condition cc, Register dst, Register src); aoqi@0: void cmovq(Condition cc, Register dst, Address src); aoqi@0: aoqi@0: aoqi@0: void cmpb(Address dst, int imm8); aoqi@0: aoqi@0: void cmpl(Address dst, int32_t imm32); aoqi@0: aoqi@0: void cmpl(Register dst, int32_t imm32); aoqi@0: void cmpl(Register dst, Register src); aoqi@0: void cmpl(Register dst, Address src); aoqi@0: aoqi@0: void cmpq(Address dst, int32_t imm32); aoqi@0: void cmpq(Address dst, Register src); aoqi@0: aoqi@0: void cmpq(Register dst, int32_t imm32); aoqi@0: void cmpq(Register dst, Register src); aoqi@0: void cmpq(Register dst, Address src); aoqi@0: aoqi@0: // these are dummies used to catch attempting to convert NULL to Register aoqi@0: void cmpl(Register dst, void* junk); // dummy aoqi@0: void cmpq(Register dst, void* junk); // dummy aoqi@0: aoqi@0: void cmpw(Address dst, int imm16); aoqi@0: aoqi@0: void cmpxchg8 (Address adr); aoqi@0: aoqi@0: void cmpxchgl(Register reg, Address adr); aoqi@0: aoqi@0: void cmpxchgq(Register reg, Address adr); aoqi@0: aoqi@0: // Ordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS aoqi@0: void comisd(XMMRegister dst, Address src); aoqi@0: void comisd(XMMRegister dst, XMMRegister src); aoqi@0: aoqi@0: // Ordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS aoqi@0: void comiss(XMMRegister dst, Address src); aoqi@0: void comiss(XMMRegister dst, XMMRegister src); aoqi@0: aoqi@0: // Identify processor type and features aoqi@0: void cpuid(); aoqi@0: aoqi@0: // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value aoqi@0: void cvtsd2ss(XMMRegister dst, XMMRegister src); aoqi@0: void cvtsd2ss(XMMRegister dst, Address src); aoqi@0: aoqi@0: // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value aoqi@0: void cvtsi2sdl(XMMRegister dst, Register src); aoqi@0: void cvtsi2sdl(XMMRegister dst, Address src); aoqi@0: void cvtsi2sdq(XMMRegister dst, Register src); aoqi@0: void cvtsi2sdq(XMMRegister dst, Address src); aoqi@0: aoqi@0: // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value aoqi@0: void cvtsi2ssl(XMMRegister dst, Register src); aoqi@0: void cvtsi2ssl(XMMRegister dst, Address src); aoqi@0: void cvtsi2ssq(XMMRegister dst, Register src); aoqi@0: void cvtsi2ssq(XMMRegister dst, Address src); aoqi@0: aoqi@0: // Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value aoqi@0: void cvtdq2pd(XMMRegister dst, XMMRegister src); aoqi@0: aoqi@0: // Convert Packed Signed Doubleword Integers to Packed Single-Precision Floating-Point Value aoqi@0: void cvtdq2ps(XMMRegister dst, XMMRegister src); aoqi@0: aoqi@0: // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value aoqi@0: void cvtss2sd(XMMRegister dst, XMMRegister src); aoqi@0: void cvtss2sd(XMMRegister dst, Address src); aoqi@0: aoqi@0: // Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer aoqi@0: void cvttsd2sil(Register dst, Address src); aoqi@0: void cvttsd2sil(Register dst, XMMRegister src); aoqi@0: void cvttsd2siq(Register dst, XMMRegister src); aoqi@0: aoqi@0: // Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer aoqi@0: void cvttss2sil(Register dst, XMMRegister src); aoqi@0: void cvttss2siq(Register dst, XMMRegister src); aoqi@0: aoqi@0: // Divide Scalar Double-Precision Floating-Point Values aoqi@0: void divsd(XMMRegister dst, Address src); aoqi@0: void divsd(XMMRegister dst, XMMRegister src); aoqi@0: aoqi@0: // Divide Scalar Single-Precision Floating-Point Values aoqi@0: void divss(XMMRegister dst, Address src); aoqi@0: void divss(XMMRegister dst, XMMRegister src); aoqi@0: aoqi@0: void emms(); aoqi@0: aoqi@0: void fabs(); aoqi@0: aoqi@0: void fadd(int i); aoqi@0: aoqi@0: void fadd_d(Address src); aoqi@0: void fadd_s(Address src); aoqi@0: aoqi@0: // "Alternate" versions of x87 instructions place result down in FPU aoqi@0: // stack instead of on TOS aoqi@0: aoqi@0: void fadda(int i); // "alternate" fadd aoqi@0: void faddp(int i = 1); aoqi@0: aoqi@0: void fchs(); aoqi@0: aoqi@0: void fcom(int i); aoqi@0: aoqi@0: void fcomp(int i = 1); aoqi@0: void fcomp_d(Address src); aoqi@0: void fcomp_s(Address src); aoqi@0: aoqi@0: void fcompp(); aoqi@0: aoqi@0: void fcos(); aoqi@0: aoqi@0: void fdecstp(); aoqi@0: aoqi@0: void fdiv(int i); aoqi@0: void fdiv_d(Address src); aoqi@0: void fdivr_s(Address src); aoqi@0: void fdiva(int i); // "alternate" fdiv aoqi@0: void fdivp(int i = 1); aoqi@0: aoqi@0: void fdivr(int i); aoqi@0: void fdivr_d(Address src); aoqi@0: void fdiv_s(Address src); aoqi@0: aoqi@0: void fdivra(int i); // "alternate" reversed fdiv aoqi@0: aoqi@0: void fdivrp(int i = 1); aoqi@0: aoqi@0: void ffree(int i = 0); aoqi@0: aoqi@0: void fild_d(Address adr); aoqi@0: void fild_s(Address adr); aoqi@0: aoqi@0: void fincstp(); aoqi@0: aoqi@0: void finit(); aoqi@0: aoqi@0: void fist_s (Address adr); aoqi@0: void fistp_d(Address adr); aoqi@0: void fistp_s(Address adr); aoqi@0: aoqi@0: void fld1(); aoqi@0: aoqi@0: void fld_d(Address adr); aoqi@0: void fld_s(Address adr); aoqi@0: void fld_s(int index); aoqi@0: void fld_x(Address adr); // extended-precision (80-bit) format aoqi@0: aoqi@0: void fldcw(Address src); aoqi@0: aoqi@0: void fldenv(Address src); aoqi@0: aoqi@0: void fldlg2(); aoqi@0: aoqi@0: void fldln2(); aoqi@0: aoqi@0: void fldz(); aoqi@0: aoqi@0: void flog(); aoqi@0: void flog10(); aoqi@0: aoqi@0: void fmul(int i); aoqi@0: aoqi@0: void fmul_d(Address src); aoqi@0: void fmul_s(Address src); aoqi@0: aoqi@0: void fmula(int i); // "alternate" fmul aoqi@0: aoqi@0: void fmulp(int i = 1); aoqi@0: aoqi@0: void fnsave(Address dst); aoqi@0: aoqi@0: void fnstcw(Address src); aoqi@0: aoqi@0: void fnstsw_ax(); aoqi@0: aoqi@0: void fprem(); aoqi@0: void fprem1(); aoqi@0: aoqi@0: void frstor(Address src); aoqi@0: aoqi@0: void fsin(); aoqi@0: aoqi@0: void fsqrt(); aoqi@0: aoqi@0: void fst_d(Address adr); aoqi@0: void fst_s(Address adr); aoqi@0: aoqi@0: void fstp_d(Address adr); aoqi@0: void fstp_d(int index); aoqi@0: void fstp_s(Address adr); aoqi@0: void fstp_x(Address adr); // extended-precision (80-bit) format aoqi@0: aoqi@0: void fsub(int i); aoqi@0: void fsub_d(Address src); aoqi@0: void fsub_s(Address src); aoqi@0: aoqi@0: void fsuba(int i); // "alternate" fsub aoqi@0: aoqi@0: void fsubp(int i = 1); aoqi@0: aoqi@0: void fsubr(int i); aoqi@0: void fsubr_d(Address src); aoqi@0: void fsubr_s(Address src); aoqi@0: aoqi@0: void fsubra(int i); // "alternate" reversed fsub aoqi@0: aoqi@0: void fsubrp(int i = 1); aoqi@0: aoqi@0: void ftan(); aoqi@0: aoqi@0: void ftst(); aoqi@0: aoqi@0: void fucomi(int i = 1); aoqi@0: void fucomip(int i = 1); aoqi@0: aoqi@0: void fwait(); aoqi@0: aoqi@0: void fxch(int i = 1); aoqi@0: aoqi@0: void fxrstor(Address src); aoqi@0: aoqi@0: void fxsave(Address dst); aoqi@0: aoqi@0: void fyl2x(); aoqi@0: void frndint(); aoqi@0: void f2xm1(); aoqi@0: void fldl2e(); aoqi@0: aoqi@0: void hlt(); aoqi@0: aoqi@0: void idivl(Register src); aoqi@0: void divl(Register src); // Unsigned division aoqi@0: aoqi@0: void idivq(Register src); aoqi@0: aoqi@0: void imull(Register dst, Register src); aoqi@0: void imull(Register dst, Register src, int value); aoqi@0: void imull(Register dst, Address src); aoqi@0: aoqi@0: void imulq(Register dst, Register src); aoqi@0: void imulq(Register dst, Register src, int value); aoqi@0: #ifdef _LP64 aoqi@0: void imulq(Register dst, Address src); aoqi@0: #endif aoqi@0: aoqi@0: aoqi@0: // jcc is the generic conditional branch generator to run- aoqi@0: // time routines, jcc is used for branches to labels. jcc aoqi@0: // takes a branch opcode (cc) and a label (L) and generates aoqi@0: // either a backward branch or a forward branch and links it aoqi@0: // to the label fixup chain. Usage: aoqi@0: // aoqi@0: // Label L; // unbound label aoqi@0: // jcc(cc, L); // forward branch to unbound label aoqi@0: // bind(L); // bind label to the current pc aoqi@0: // jcc(cc, L); // backward branch to bound label aoqi@0: // bind(L); // illegal: a label may be bound only once aoqi@0: // aoqi@0: // Note: The same Label can be used for forward and backward branches aoqi@0: // but it may be bound only once. aoqi@0: aoqi@0: void jcc(Condition cc, Label& L, bool maybe_short = true); aoqi@0: aoqi@0: // Conditional jump to a 8-bit offset to L. aoqi@0: // WARNING: be very careful using this for forward jumps. If the label is aoqi@0: // not bound within an 8-bit offset of this instruction, a run-time error aoqi@0: // will occur. aoqi@0: void jccb(Condition cc, Label& L); aoqi@0: aoqi@0: void jmp(Address entry); // pc <- entry aoqi@0: aoqi@0: // Label operations & relative jumps (PPUM Appendix D) aoqi@0: void jmp(Label& L, bool maybe_short = true); // unconditional jump to L aoqi@0: aoqi@0: void jmp(Register entry); // pc <- entry aoqi@0: aoqi@0: // Unconditional 8-bit offset jump to L. aoqi@0: // WARNING: be very careful using this for forward jumps. If the label is aoqi@0: // not bound within an 8-bit offset of this instruction, a run-time error aoqi@0: // will occur. aoqi@0: void jmpb(Label& L); aoqi@0: aoqi@0: void ldmxcsr( Address src ); aoqi@0: aoqi@0: void leal(Register dst, Address src); aoqi@0: aoqi@0: void leaq(Register dst, Address src); aoqi@0: aoqi@0: void lfence(); aoqi@0: aoqi@0: void lock(); aoqi@0: aoqi@0: void lzcntl(Register dst, Register src); aoqi@0: aoqi@0: #ifdef _LP64 aoqi@0: void lzcntq(Register dst, Register src); aoqi@0: #endif aoqi@0: aoqi@0: enum Membar_mask_bits { aoqi@0: StoreStore = 1 << 3, aoqi@0: LoadStore = 1 << 2, aoqi@0: StoreLoad = 1 << 1, aoqi@0: LoadLoad = 1 << 0 aoqi@0: }; aoqi@0: aoqi@0: // Serializes memory and blows flags aoqi@0: void membar(Membar_mask_bits order_constraint) { aoqi@0: if (os::is_MP()) { aoqi@0: // We only have to handle StoreLoad aoqi@0: if (order_constraint & StoreLoad) { aoqi@0: // All usable chips support "locked" instructions which suffice aoqi@0: // as barriers, and are much faster than the alternative of aoqi@0: // using cpuid instruction. We use here a locked add [esp],0. aoqi@0: // This is conveniently otherwise a no-op except for blowing aoqi@0: // flags. aoqi@0: // Any change to this code may need to revisit other places in aoqi@0: // the code where this idiom is used, in particular the aoqi@0: // orderAccess code. aoqi@0: lock(); aoqi@0: addl(Address(rsp, 0), 0);// Assert the lock# signal here aoqi@0: } aoqi@0: } aoqi@0: } aoqi@0: aoqi@0: void mfence(); aoqi@0: aoqi@0: // Moves aoqi@0: aoqi@0: void mov64(Register dst, int64_t imm64); aoqi@0: aoqi@0: void movb(Address dst, Register src); aoqi@0: void movb(Address dst, int imm8); aoqi@0: void movb(Register dst, Address src); aoqi@0: aoqi@0: void movdl(XMMRegister dst, Register src); aoqi@0: void movdl(Register dst, XMMRegister src); aoqi@0: void movdl(XMMRegister dst, Address src); aoqi@0: void movdl(Address dst, XMMRegister src); aoqi@0: aoqi@0: // Move Double Quadword aoqi@0: void movdq(XMMRegister dst, Register src); aoqi@0: void movdq(Register dst, XMMRegister src); aoqi@0: aoqi@0: // Move Aligned Double Quadword aoqi@0: void movdqa(XMMRegister dst, XMMRegister src); aoqi@0: void movdqa(XMMRegister dst, Address src); aoqi@0: aoqi@0: // Move Unaligned Double Quadword aoqi@0: void movdqu(Address dst, XMMRegister src); aoqi@0: void movdqu(XMMRegister dst, Address src); aoqi@0: void movdqu(XMMRegister dst, XMMRegister src); aoqi@0: aoqi@0: // Move Unaligned 256bit Vector aoqi@0: void vmovdqu(Address dst, XMMRegister src); aoqi@0: void vmovdqu(XMMRegister dst, Address src); aoqi@0: void vmovdqu(XMMRegister dst, XMMRegister src); aoqi@0: aoqi@0: // Move lower 64bit to high 64bit in 128bit register aoqi@0: void movlhps(XMMRegister dst, XMMRegister src); aoqi@0: aoqi@0: void movl(Register dst, int32_t imm32); aoqi@0: void movl(Address dst, int32_t imm32); aoqi@0: void movl(Register dst, Register src); aoqi@0: void movl(Register dst, Address src); aoqi@0: void movl(Address dst, Register src); aoqi@0: aoqi@0: // These dummies prevent using movl from converting a zero (like NULL) into Register aoqi@0: // by giving the compiler two choices it can't resolve aoqi@0: aoqi@0: void movl(Address dst, void* junk); aoqi@0: void movl(Register dst, void* junk); aoqi@0: aoqi@0: #ifdef _LP64 aoqi@0: void movq(Register dst, Register src); aoqi@0: void movq(Register dst, Address src); aoqi@0: void movq(Address dst, Register src); aoqi@0: #endif aoqi@0: aoqi@0: void movq(Address dst, MMXRegister src ); aoqi@0: void movq(MMXRegister dst, Address src ); aoqi@0: aoqi@0: #ifdef _LP64 aoqi@0: // These dummies prevent using movq from converting a zero (like NULL) into Register aoqi@0: // by giving the compiler two choices it can't resolve aoqi@0: aoqi@0: void movq(Address dst, void* dummy); aoqi@0: void movq(Register dst, void* dummy); aoqi@0: #endif aoqi@0: aoqi@0: // Move Quadword aoqi@0: void movq(Address dst, XMMRegister src); aoqi@0: void movq(XMMRegister dst, Address src); aoqi@0: aoqi@0: void movsbl(Register dst, Address src); aoqi@0: void movsbl(Register dst, Register src); aoqi@0: aoqi@0: #ifdef _LP64 aoqi@0: void movsbq(Register dst, Address src); aoqi@0: void movsbq(Register dst, Register src); aoqi@0: aoqi@0: // Move signed 32bit immediate to 64bit extending sign aoqi@0: void movslq(Address dst, int32_t imm64); aoqi@0: void movslq(Register dst, int32_t imm64); aoqi@0: aoqi@0: void movslq(Register dst, Address src); aoqi@0: void movslq(Register dst, Register src); aoqi@0: void movslq(Register dst, void* src); // Dummy declaration to cause NULL to be ambiguous aoqi@0: #endif aoqi@0: aoqi@0: void movswl(Register dst, Address src); aoqi@0: void movswl(Register dst, Register src); aoqi@0: aoqi@0: #ifdef _LP64 aoqi@0: void movswq(Register dst, Address src); aoqi@0: void movswq(Register dst, Register src); aoqi@0: #endif aoqi@0: aoqi@0: void movw(Address dst, int imm16); aoqi@0: void movw(Register dst, Address src); aoqi@0: void movw(Address dst, Register src); aoqi@0: aoqi@0: void movzbl(Register dst, Address src); aoqi@0: void movzbl(Register dst, Register src); aoqi@0: aoqi@0: #ifdef _LP64 aoqi@0: void movzbq(Register dst, Address src); aoqi@0: void movzbq(Register dst, Register src); aoqi@0: #endif aoqi@0: aoqi@0: void movzwl(Register dst, Address src); aoqi@0: void movzwl(Register dst, Register src); aoqi@0: aoqi@0: #ifdef _LP64 aoqi@0: void movzwq(Register dst, Address src); aoqi@0: void movzwq(Register dst, Register src); aoqi@0: #endif aoqi@0: aoqi@0: void mull(Address src); aoqi@0: void mull(Register src); aoqi@0: aoqi@0: // Multiply Scalar Double-Precision Floating-Point Values aoqi@0: void mulsd(XMMRegister dst, Address src); aoqi@0: void mulsd(XMMRegister dst, XMMRegister src); aoqi@0: aoqi@0: // Multiply Scalar Single-Precision Floating-Point Values aoqi@0: void mulss(XMMRegister dst, Address src); aoqi@0: void mulss(XMMRegister dst, XMMRegister src); aoqi@0: aoqi@0: void negl(Register dst); aoqi@0: aoqi@0: #ifdef _LP64 aoqi@0: void negq(Register dst); aoqi@0: #endif aoqi@0: aoqi@0: void nop(int i = 1); aoqi@0: aoqi@0: void notl(Register dst); aoqi@0: aoqi@0: #ifdef _LP64 aoqi@0: void notq(Register dst); aoqi@0: #endif aoqi@0: aoqi@0: void orl(Address dst, int32_t imm32); aoqi@0: void orl(Register dst, int32_t imm32); aoqi@0: void orl(Register dst, Address src); aoqi@0: void orl(Register dst, Register src); aoqi@0: aoqi@0: void orq(Address dst, int32_t imm32); aoqi@0: void orq(Register dst, int32_t imm32); aoqi@0: void orq(Register dst, Address src); aoqi@0: void orq(Register dst, Register src); aoqi@0: aoqi@0: // Pack with unsigned saturation aoqi@0: void packuswb(XMMRegister dst, XMMRegister src); aoqi@0: void packuswb(XMMRegister dst, Address src); aoqi@0: void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); aoqi@0: aoqi@0: // Pemutation of 64bit words aoqi@0: void vpermq(XMMRegister dst, XMMRegister src, int imm8, bool vector256); aoqi@0: aoqi@0: void pause(); aoqi@0: aoqi@0: // SSE4.2 string instructions aoqi@0: void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8); aoqi@0: void pcmpestri(XMMRegister xmm1, Address src, int imm8); aoqi@0: aoqi@0: // SSE 4.1 extract aoqi@0: void pextrd(Register dst, XMMRegister src, int imm8); aoqi@0: void pextrq(Register dst, XMMRegister src, int imm8); aoqi@0: aoqi@0: // SSE 4.1 insert aoqi@0: void pinsrd(XMMRegister dst, Register src, int imm8); aoqi@0: void pinsrq(XMMRegister dst, Register src, int imm8); aoqi@0: aoqi@0: // SSE4.1 packed move aoqi@0: void pmovzxbw(XMMRegister dst, XMMRegister src); aoqi@0: void pmovzxbw(XMMRegister dst, Address src); aoqi@0: aoqi@0: #ifndef _LP64 // no 32bit push/pop on amd64 aoqi@0: void popl(Address dst); aoqi@0: #endif aoqi@0: aoqi@0: #ifdef _LP64 aoqi@0: void popq(Address dst); aoqi@0: #endif aoqi@0: aoqi@0: void popcntl(Register dst, Address src); aoqi@0: void popcntl(Register dst, Register src); aoqi@0: aoqi@0: #ifdef _LP64 aoqi@0: void popcntq(Register dst, Address src); aoqi@0: void popcntq(Register dst, Register src); aoqi@0: #endif aoqi@0: aoqi@0: // Prefetches (SSE, SSE2, 3DNOW only) aoqi@0: aoqi@0: void prefetchnta(Address src); aoqi@0: void prefetchr(Address src); aoqi@0: void prefetcht0(Address src); aoqi@0: void prefetcht1(Address src); aoqi@0: void prefetcht2(Address src); aoqi@0: void prefetchw(Address src); aoqi@0: aoqi@0: // Shuffle Bytes aoqi@0: void pshufb(XMMRegister dst, XMMRegister src); aoqi@0: void pshufb(XMMRegister dst, Address src); aoqi@0: aoqi@0: // Shuffle Packed Doublewords aoqi@0: void pshufd(XMMRegister dst, XMMRegister src, int mode); aoqi@0: void pshufd(XMMRegister dst, Address src, int mode); aoqi@0: aoqi@0: // Shuffle Packed Low Words aoqi@0: void pshuflw(XMMRegister dst, XMMRegister src, int mode); aoqi@0: void pshuflw(XMMRegister dst, Address src, int mode); aoqi@0: aoqi@0: // Shift Right by bytes Logical DoubleQuadword Immediate aoqi@0: void psrldq(XMMRegister dst, int shift); aoqi@0: aoqi@0: // Logical Compare 128bit aoqi@0: void ptest(XMMRegister dst, XMMRegister src); aoqi@0: void ptest(XMMRegister dst, Address src); aoqi@0: // Logical Compare 256bit aoqi@0: void vptest(XMMRegister dst, XMMRegister src); aoqi@0: void vptest(XMMRegister dst, Address src); aoqi@0: aoqi@0: // Interleave Low Bytes aoqi@0: void punpcklbw(XMMRegister dst, XMMRegister src); aoqi@0: void punpcklbw(XMMRegister dst, Address src); aoqi@0: aoqi@0: // Interleave Low Doublewords aoqi@0: void punpckldq(XMMRegister dst, XMMRegister src); aoqi@0: void punpckldq(XMMRegister dst, Address src); aoqi@0: aoqi@0: // Interleave Low Quadwords aoqi@0: void punpcklqdq(XMMRegister dst, XMMRegister src); aoqi@0: aoqi@0: #ifndef _LP64 // no 32bit push/pop on amd64 aoqi@0: void pushl(Address src); aoqi@0: #endif aoqi@0: aoqi@0: void pushq(Address src); aoqi@0: aoqi@0: void rcll(Register dst, int imm8); aoqi@0: aoqi@0: void rclq(Register dst, int imm8); aoqi@0: aoqi@0: void rdtsc(); aoqi@0: aoqi@0: void ret(int imm16); aoqi@0: aoqi@0: void sahf(); aoqi@0: aoqi@0: void sarl(Register dst, int imm8); aoqi@0: void sarl(Register dst); aoqi@0: aoqi@0: void sarq(Register dst, int imm8); aoqi@0: void sarq(Register dst); aoqi@0: aoqi@0: void sbbl(Address dst, int32_t imm32); aoqi@0: void sbbl(Register dst, int32_t imm32); aoqi@0: void sbbl(Register dst, Address src); aoqi@0: void sbbl(Register dst, Register src); aoqi@0: aoqi@0: void sbbq(Address dst, int32_t imm32); aoqi@0: void sbbq(Register dst, int32_t imm32); aoqi@0: void sbbq(Register dst, Address src); aoqi@0: void sbbq(Register dst, Register src); aoqi@0: aoqi@0: void setb(Condition cc, Register dst); aoqi@0: aoqi@0: void shldl(Register dst, Register src); aoqi@0: aoqi@0: void shll(Register dst, int imm8); aoqi@0: void shll(Register dst); aoqi@0: aoqi@0: void shlq(Register dst, int imm8); aoqi@0: void shlq(Register dst); aoqi@0: aoqi@0: void shrdl(Register dst, Register src); aoqi@0: aoqi@0: void shrl(Register dst, int imm8); aoqi@0: void shrl(Register dst); aoqi@0: aoqi@0: void shrq(Register dst, int imm8); aoqi@0: void shrq(Register dst); aoqi@0: aoqi@0: void smovl(); // QQQ generic? aoqi@0: aoqi@0: // Compute Square Root of Scalar Double-Precision Floating-Point Value aoqi@0: void sqrtsd(XMMRegister dst, Address src); aoqi@0: void sqrtsd(XMMRegister dst, XMMRegister src); aoqi@0: aoqi@0: // Compute Square Root of Scalar Single-Precision Floating-Point Value aoqi@0: void sqrtss(XMMRegister dst, Address src); aoqi@0: void sqrtss(XMMRegister dst, XMMRegister src); aoqi@0: aoqi@0: void std(); aoqi@0: aoqi@0: void stmxcsr( Address dst ); aoqi@0: aoqi@0: void subl(Address dst, int32_t imm32); aoqi@0: void subl(Address dst, Register src); aoqi@0: void subl(Register dst, int32_t imm32); aoqi@0: void subl(Register dst, Address src); aoqi@0: void subl(Register dst, Register src); aoqi@0: aoqi@0: void subq(Address dst, int32_t imm32); aoqi@0: void subq(Address dst, Register src); aoqi@0: void subq(Register dst, int32_t imm32); aoqi@0: void subq(Register dst, Address src); aoqi@0: void subq(Register dst, Register src); aoqi@0: aoqi@0: // Force generation of a 4 byte immediate value even if it fits into 8bit aoqi@0: void subl_imm32(Register dst, int32_t imm32); aoqi@0: void subq_imm32(Register dst, int32_t imm32); aoqi@0: aoqi@0: // Subtract Scalar Double-Precision Floating-Point Values aoqi@0: void subsd(XMMRegister dst, Address src); aoqi@0: void subsd(XMMRegister dst, XMMRegister src); aoqi@0: aoqi@0: // Subtract Scalar Single-Precision Floating-Point Values aoqi@0: void subss(XMMRegister dst, Address src); aoqi@0: void subss(XMMRegister dst, XMMRegister src); aoqi@0: aoqi@0: void testb(Register dst, int imm8); aoqi@0: aoqi@0: void testl(Register dst, int32_t imm32); aoqi@0: void testl(Register dst, Register src); aoqi@0: void testl(Register dst, Address src); aoqi@0: aoqi@0: void testq(Register dst, int32_t imm32); aoqi@0: void testq(Register dst, Register src); aoqi@0: aoqi@0: // BMI - count trailing zeros aoqi@0: void tzcntl(Register dst, Register src); aoqi@0: void tzcntq(Register dst, Register src); aoqi@0: aoqi@0: // Unordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS aoqi@0: void ucomisd(XMMRegister dst, Address src); aoqi@0: void ucomisd(XMMRegister dst, XMMRegister src); aoqi@0: aoqi@0: // Unordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS aoqi@0: void ucomiss(XMMRegister dst, Address src); aoqi@0: void ucomiss(XMMRegister dst, XMMRegister src); aoqi@0: aoqi@0: void xabort(int8_t imm8); aoqi@0: aoqi@0: void xaddl(Address dst, Register src); aoqi@0: aoqi@0: void xaddq(Address dst, Register src); aoqi@0: aoqi@0: void xbegin(Label& abort, relocInfo::relocType rtype = relocInfo::none); aoqi@0: aoqi@0: void xchgl(Register reg, Address adr); aoqi@0: void xchgl(Register dst, Register src); aoqi@0: aoqi@0: void xchgq(Register reg, Address adr); aoqi@0: void xchgq(Register dst, Register src); aoqi@0: aoqi@0: void xend(); aoqi@0: aoqi@0: // Get Value of Extended Control Register aoqi@0: void xgetbv(); aoqi@0: aoqi@0: void xorl(Register dst, int32_t imm32); aoqi@0: void xorl(Register dst, Address src); aoqi@0: void xorl(Register dst, Register src); aoqi@0: aoqi@0: void xorq(Register dst, Address src); aoqi@0: void xorq(Register dst, Register src); aoqi@0: aoqi@0: void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0 aoqi@0: aoqi@0: // AVX 3-operands scalar instructions (encoded with VEX prefix) aoqi@0: aoqi@0: void vaddsd(XMMRegister dst, XMMRegister nds, Address src); aoqi@0: void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src); aoqi@0: void vaddss(XMMRegister dst, XMMRegister nds, Address src); aoqi@0: void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src); aoqi@0: void vdivsd(XMMRegister dst, XMMRegister nds, Address src); aoqi@0: void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src); aoqi@0: void vdivss(XMMRegister dst, XMMRegister nds, Address src); aoqi@0: void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src); aoqi@0: void vmulsd(XMMRegister dst, XMMRegister nds, Address src); aoqi@0: void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src); aoqi@0: void vmulss(XMMRegister dst, XMMRegister nds, Address src); aoqi@0: void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src); aoqi@0: void vsubsd(XMMRegister dst, XMMRegister nds, Address src); aoqi@0: void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src); aoqi@0: void vsubss(XMMRegister dst, XMMRegister nds, Address src); aoqi@0: void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src); aoqi@0: aoqi@0: aoqi@0: //====================VECTOR ARITHMETIC===================================== aoqi@0: aoqi@0: // Add Packed Floating-Point Values aoqi@0: void addpd(XMMRegister dst, XMMRegister src); aoqi@0: void addps(XMMRegister dst, XMMRegister src); aoqi@0: void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); aoqi@0: void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); aoqi@0: void vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); aoqi@0: void vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); aoqi@0: aoqi@0: // Subtract Packed Floating-Point Values aoqi@0: void subpd(XMMRegister dst, XMMRegister src); aoqi@0: void subps(XMMRegister dst, XMMRegister src); aoqi@0: void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); aoqi@0: void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); aoqi@0: void vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); aoqi@0: void vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); aoqi@0: aoqi@0: // Multiply Packed Floating-Point Values aoqi@0: void mulpd(XMMRegister dst, XMMRegister src); aoqi@0: void mulps(XMMRegister dst, XMMRegister src); aoqi@0: void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); aoqi@0: void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); aoqi@0: void vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); aoqi@0: void vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); aoqi@0: aoqi@0: // Divide Packed Floating-Point Values aoqi@0: void divpd(XMMRegister dst, XMMRegister src); aoqi@0: void divps(XMMRegister dst, XMMRegister src); aoqi@0: void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); aoqi@0: void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); aoqi@0: void vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); aoqi@0: void vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); aoqi@0: aoqi@0: // Bitwise Logical AND of Packed Floating-Point Values aoqi@0: void andpd(XMMRegister dst, XMMRegister src); aoqi@0: void andps(XMMRegister dst, XMMRegister src); aoqi@0: void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); aoqi@0: void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); aoqi@0: void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); aoqi@0: void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); aoqi@0: aoqi@0: // Bitwise Logical XOR of Packed Floating-Point Values aoqi@0: void xorpd(XMMRegister dst, XMMRegister src); aoqi@0: void xorps(XMMRegister dst, XMMRegister src); aoqi@0: void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); aoqi@0: void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); aoqi@0: void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); aoqi@0: void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); aoqi@0: aoqi@0: // Add packed integers aoqi@0: void paddb(XMMRegister dst, XMMRegister src); aoqi@0: void paddw(XMMRegister dst, XMMRegister src); aoqi@0: void paddd(XMMRegister dst, XMMRegister src); aoqi@0: void paddq(XMMRegister dst, XMMRegister src); aoqi@0: void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); aoqi@0: void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); aoqi@0: void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); aoqi@0: void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); aoqi@0: void vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256); aoqi@0: void vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256); aoqi@0: void vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); aoqi@0: void vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256); aoqi@0: aoqi@0: // Sub packed integers aoqi@0: void psubb(XMMRegister dst, XMMRegister src); aoqi@0: void psubw(XMMRegister dst, XMMRegister src); aoqi@0: void psubd(XMMRegister dst, XMMRegister src); aoqi@0: void psubq(XMMRegister dst, XMMRegister src); aoqi@0: void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); aoqi@0: void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); aoqi@0: void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); aoqi@0: void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); aoqi@0: void vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256); aoqi@0: void vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256); aoqi@0: void vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); aoqi@0: void vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256); aoqi@0: aoqi@0: // Multiply packed integers (only shorts and ints) aoqi@0: void pmullw(XMMRegister dst, XMMRegister src); aoqi@0: void pmulld(XMMRegister dst, XMMRegister src); aoqi@0: void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); aoqi@0: void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); aoqi@0: void vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256); aoqi@0: void vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256); aoqi@0: aoqi@0: // Shift left packed integers aoqi@0: void psllw(XMMRegister dst, int shift); aoqi@0: void pslld(XMMRegister dst, int shift); aoqi@0: void psllq(XMMRegister dst, int shift); aoqi@0: void psllw(XMMRegister dst, XMMRegister shift); aoqi@0: void pslld(XMMRegister dst, XMMRegister shift); aoqi@0: void psllq(XMMRegister dst, XMMRegister shift); aoqi@0: void vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256); aoqi@0: void vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256); aoqi@0: void vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256); aoqi@0: void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); aoqi@0: void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); aoqi@0: void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); aoqi@0: aoqi@0: // Logical shift right packed integers aoqi@0: void psrlw(XMMRegister dst, int shift); aoqi@0: void psrld(XMMRegister dst, int shift); aoqi@0: void psrlq(XMMRegister dst, int shift); aoqi@0: void psrlw(XMMRegister dst, XMMRegister shift); aoqi@0: void psrld(XMMRegister dst, XMMRegister shift); aoqi@0: void psrlq(XMMRegister dst, XMMRegister shift); aoqi@0: void vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256); aoqi@0: void vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256); aoqi@0: void vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256); aoqi@0: void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); aoqi@0: void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); aoqi@0: void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); aoqi@0: aoqi@0: // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs) aoqi@0: void psraw(XMMRegister dst, int shift); aoqi@0: void psrad(XMMRegister dst, int shift); aoqi@0: void psraw(XMMRegister dst, XMMRegister shift); aoqi@0: void psrad(XMMRegister dst, XMMRegister shift); aoqi@0: void vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256); aoqi@0: void vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256); aoqi@0: void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); aoqi@0: void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); aoqi@0: aoqi@0: // And packed integers aoqi@0: void pand(XMMRegister dst, XMMRegister src); aoqi@0: void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); aoqi@0: void vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256); aoqi@0: aoqi@0: // Or packed integers aoqi@0: void por(XMMRegister dst, XMMRegister src); aoqi@0: void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); aoqi@0: void vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256); aoqi@0: aoqi@0: // Xor packed integers aoqi@0: void pxor(XMMRegister dst, XMMRegister src); aoqi@0: void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); aoqi@0: void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256); aoqi@0: aoqi@0: // Copy low 128bit into high 128bit of YMM registers. aoqi@0: void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src); aoqi@0: void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src); aoqi@0: aoqi@0: // Load/store high 128bit of YMM registers which does not destroy other half. aoqi@0: void vinsertf128h(XMMRegister dst, Address src); aoqi@0: void vinserti128h(XMMRegister dst, Address src); aoqi@0: void vextractf128h(Address dst, XMMRegister src); aoqi@0: void vextracti128h(Address dst, XMMRegister src); aoqi@0: aoqi@0: // duplicate 4-bytes integer data from src into 8 locations in dest aoqi@0: void vpbroadcastd(XMMRegister dst, XMMRegister src); aoqi@0: aoqi@0: // Carry-Less Multiplication Quadword aoqi@0: void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask); aoqi@0: aoqi@0: // AVX instruction which is used to clear upper 128 bits of YMM registers and aoqi@0: // to avoid transaction penalty between AVX and SSE states. There is no aoqi@0: // penalty if legacy SSE instructions are encoded using VEX prefix because aoqi@0: // they always clear upper 128 bits. It should be used before calling aoqi@0: // runtime code and native libraries. aoqi@0: void vzeroupper(); aoqi@0: aoqi@0: protected: aoqi@0: // Next instructions require address alignment 16 bytes SSE mode. aoqi@0: // They should be called only from corresponding MacroAssembler instructions. aoqi@0: void andpd(XMMRegister dst, Address src); aoqi@0: void andps(XMMRegister dst, Address src); aoqi@0: void xorpd(XMMRegister dst, Address src); aoqi@0: void xorps(XMMRegister dst, Address src); aoqi@0: aoqi@0: }; aoqi@0: aoqi@0: #endif // CPU_X86_VM_ASSEMBLER_X86_HPP