1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/cpu/x86/vm/x86_32.ad Sat Dec 01 00:00:00 2007 +0000 1.3 @@ -0,0 +1,12778 @@ 1.4 +// 1.5 +// Copyright 1997-2007 Sun Microsystems, Inc. All Rights Reserved. 1.6 +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 +// 1.8 +// This code is free software; you can redistribute it and/or modify it 1.9 +// under the terms of the GNU General Public License version 2 only, as 1.10 +// published by the Free Software Foundation. 1.11 +// 1.12 +// This code is distributed in the hope that it will be useful, but WITHOUT 1.13 +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.14 +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.15 +// version 2 for more details (a copy is included in the LICENSE file that 1.16 +// accompanied this code). 1.17 +// 1.18 +// You should have received a copy of the GNU General Public License version 1.19 +// 2 along with this work; if not, write to the Free Software Foundation, 1.20 +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.21 +// 1.22 +// Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 1.23 +// CA 95054 USA or visit www.sun.com if you need additional information or 1.24 +// have any questions. 1.25 +// 1.26 +// 1.27 + 1.28 +// X86 Architecture Description File 1.29 + 1.30 +//----------REGISTER DEFINITION BLOCK------------------------------------------ 1.31 +// This information is used by the matcher and the register allocator to 1.32 +// describe individual registers and classes of registers within the target 1.33 +// archtecture. 1.34 + 1.35 +register %{ 1.36 +//----------Architecture Description Register Definitions---------------------- 1.37 +// General Registers 1.38 +// "reg_def" name ( register save type, C convention save type, 1.39 +// ideal register type, encoding ); 1.40 +// Register Save Types: 1.41 +// 1.42 +// NS = No-Save: The register allocator assumes that these registers 1.43 +// can be used without saving upon entry to the method, & 1.44 +// that they do not need to be saved at call sites. 1.45 +// 1.46 +// SOC = Save-On-Call: The register allocator assumes that these registers 1.47 +// can be used without saving upon entry to the method, 1.48 +// but that they must be saved at call sites. 1.49 +// 1.50 +// SOE = Save-On-Entry: The register allocator assumes that these registers 1.51 +// must be saved before using them upon entry to the 1.52 +// method, but they do not need to be saved at call 1.53 +// sites. 1.54 +// 1.55 +// AS = Always-Save: The register allocator assumes that these registers 1.56 +// must be saved before using them upon entry to the 1.57 +// method, & that they must be saved at call sites. 1.58 +// 1.59 +// Ideal Register Type is used to determine how to save & restore a 1.60 +// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 1.61 +// spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 1.62 +// 1.63 +// The encoding number is the actual bit-pattern placed into the opcodes. 1.64 + 1.65 +// General Registers 1.66 +// Previously set EBX, ESI, and EDI as save-on-entry for java code 1.67 +// Turn off SOE in java-code due to frequent use of uncommon-traps. 1.68 +// Now that allocator is better, turn on ESI and EDI as SOE registers. 1.69 + 1.70 +reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 1.71 +reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 1.72 +reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 1.73 +reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 1.74 +// now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 1.75 +reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 1.76 +reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 1.77 +reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 1.78 +reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 1.79 + 1.80 +// Special Registers 1.81 +reg_def EFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 1.82 + 1.83 +// Float registers. We treat TOS/FPR0 special. It is invisible to the 1.84 +// allocator, and only shows up in the encodings. 1.85 +reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 1.86 +reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 1.87 +// Ok so here's the trick FPR1 is really st(0) except in the midst 1.88 +// of emission of assembly for a machnode. During the emission the fpu stack 1.89 +// is pushed making FPR1 == st(1) temporarily. However at any safepoint 1.90 +// the stack will not have this element so FPR1 == st(0) from the 1.91 +// oopMap viewpoint. This same weirdness with numbering causes 1.92 +// instruction encoding to have to play games with the register 1.93 +// encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 1.94 +// where it does flt->flt moves to see an example 1.95 +// 1.96 +reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 1.97 +reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 1.98 +reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 1.99 +reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 1.100 +reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 1.101 +reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 1.102 +reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 1.103 +reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 1.104 +reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 1.105 +reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 1.106 +reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 1.107 +reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 1.108 +reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 1.109 +reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 1.110 + 1.111 +// XMM registers. 128-bit registers or 4 words each, labeled a-d. 1.112 +// Word a in each register holds a Float, words ab hold a Double. 1.113 +// We currently do not use the SIMD capabilities, so registers cd 1.114 +// are unused at the moment. 1.115 +reg_def XMM0a( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 1.116 +reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()); 1.117 +reg_def XMM1a( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 1.118 +reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()); 1.119 +reg_def XMM2a( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 1.120 +reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()); 1.121 +reg_def XMM3a( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 1.122 +reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()); 1.123 +reg_def XMM4a( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 1.124 +reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()); 1.125 +reg_def XMM5a( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 1.126 +reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()); 1.127 +reg_def XMM6a( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 1.128 +reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()); 1.129 +reg_def XMM7a( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 1.130 +reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()); 1.131 + 1.132 +// Specify priority of register selection within phases of register 1.133 +// allocation. Highest priority is first. A useful heuristic is to 1.134 +// give registers a low priority when they are required by machine 1.135 +// instructions, like EAX and EDX. Registers which are used as 1.136 +// pairs must fall on an even boundry (witness the FPR#L's in this list). 1.137 +// For the Intel integer registers, the equivalent Long pairs are 1.138 +// EDX:EAX, EBX:ECX, and EDI:EBP. 1.139 +alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 1.140 + FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 1.141 + FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 1.142 + FPR6L, FPR6H, FPR7L, FPR7H ); 1.143 + 1.144 +alloc_class chunk1( XMM0a, XMM0b, 1.145 + XMM1a, XMM1b, 1.146 + XMM2a, XMM2b, 1.147 + XMM3a, XMM3b, 1.148 + XMM4a, XMM4b, 1.149 + XMM5a, XMM5b, 1.150 + XMM6a, XMM6b, 1.151 + XMM7a, XMM7b, EFLAGS); 1.152 + 1.153 + 1.154 +//----------Architecture Description Register Classes-------------------------- 1.155 +// Several register classes are automatically defined based upon information in 1.156 +// this architecture description. 1.157 +// 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 1.158 +// 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) 1.159 +// 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) 1.160 +// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 1.161 +// 1.162 +// Class for all registers 1.163 +reg_class any_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 1.164 +// Class for general registers 1.165 +reg_class e_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 1.166 +// Class for general registers which may be used for implicit null checks on win95 1.167 +// Also safe for use by tailjump. We don't want to allocate in rbp, 1.168 +reg_class e_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX); 1.169 +// Class of "X" registers 1.170 +reg_class x_reg(EBX, ECX, EDX, EAX); 1.171 +// Class of registers that can appear in an address with no offset. 1.172 +// EBP and ESP require an extra instruction byte for zero offset. 1.173 +// Used in fast-unlock 1.174 +reg_class p_reg(EDX, EDI, ESI, EBX); 1.175 +// Class for general registers not including ECX 1.176 +reg_class ncx_reg(EAX, EDX, EBP, EDI, ESI, EBX); 1.177 +// Class for general registers not including EAX 1.178 +reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 1.179 +// Class for general registers not including EAX or EBX. 1.180 +reg_class nabx_reg(EDX, EDI, ESI, ECX, EBP); 1.181 +// Class of EAX (for multiply and divide operations) 1.182 +reg_class eax_reg(EAX); 1.183 +// Class of EBX (for atomic add) 1.184 +reg_class ebx_reg(EBX); 1.185 +// Class of ECX (for shift and JCXZ operations and cmpLTMask) 1.186 +reg_class ecx_reg(ECX); 1.187 +// Class of EDX (for multiply and divide operations) 1.188 +reg_class edx_reg(EDX); 1.189 +// Class of EDI (for synchronization) 1.190 +reg_class edi_reg(EDI); 1.191 +// Class of ESI (for synchronization) 1.192 +reg_class esi_reg(ESI); 1.193 +// Singleton class for interpreter's stack pointer 1.194 +reg_class ebp_reg(EBP); 1.195 +// Singleton class for stack pointer 1.196 +reg_class sp_reg(ESP); 1.197 +// Singleton class for instruction pointer 1.198 +// reg_class ip_reg(EIP); 1.199 +// Singleton class for condition codes 1.200 +reg_class int_flags(EFLAGS); 1.201 +// Class of integer register pairs 1.202 +reg_class long_reg( EAX,EDX, ECX,EBX, EBP,EDI ); 1.203 +// Class of integer register pairs that aligns with calling convention 1.204 +reg_class eadx_reg( EAX,EDX ); 1.205 +reg_class ebcx_reg( ECX,EBX ); 1.206 +// Not AX or DX, used in divides 1.207 +reg_class nadx_reg( EBX,ECX,ESI,EDI,EBP ); 1.208 + 1.209 +// Floating point registers. Notice FPR0 is not a choice. 1.210 +// FPR0 is not ever allocated; we use clever encodings to fake 1.211 +// a 2-address instructions out of Intels FP stack. 1.212 +reg_class flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 1.213 + 1.214 +// make a register class for SSE registers 1.215 +reg_class xmm_reg(XMM0a, XMM1a, XMM2a, XMM3a, XMM4a, XMM5a, XMM6a, XMM7a); 1.216 + 1.217 +// make a double register class for SSE2 registers 1.218 +reg_class xdb_reg(XMM0a,XMM0b, XMM1a,XMM1b, XMM2a,XMM2b, XMM3a,XMM3b, 1.219 + XMM4a,XMM4b, XMM5a,XMM5b, XMM6a,XMM6b, XMM7a,XMM7b ); 1.220 + 1.221 +reg_class dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 1.222 + FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 1.223 + FPR7L,FPR7H ); 1.224 + 1.225 +reg_class flt_reg0( FPR1L ); 1.226 +reg_class dbl_reg0( FPR1L,FPR1H ); 1.227 +reg_class dbl_reg1( FPR2L,FPR2H ); 1.228 +reg_class dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 1.229 + FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 1.230 + 1.231 +// XMM6 and XMM7 could be used as temporary registers for long, float and 1.232 +// double values for SSE2. 1.233 +reg_class xdb_reg6( XMM6a,XMM6b ); 1.234 +reg_class xdb_reg7( XMM7a,XMM7b ); 1.235 +%} 1.236 + 1.237 + 1.238 +//----------SOURCE BLOCK------------------------------------------------------- 1.239 +// This is a block of C++ code which provides values, functions, and 1.240 +// definitions necessary in the rest of the architecture description 1.241 +source %{ 1.242 +#define RELOC_IMM32 Assembler::imm32_operand 1.243 +#define RELOC_DISP32 Assembler::disp32_operand 1.244 + 1.245 +#define __ _masm. 1.246 + 1.247 +// How to find the high register of a Long pair, given the low register 1.248 +#define HIGH_FROM_LOW(x) ((x)+2) 1.249 + 1.250 +// These masks are used to provide 128-bit aligned bitmasks to the XMM 1.251 +// instructions, to allow sign-masking or sign-bit flipping. They allow 1.252 +// fast versions of NegF/NegD and AbsF/AbsD. 1.253 + 1.254 +// Note: 'double' and 'long long' have 32-bits alignment on x86. 1.255 +static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 1.256 + // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 1.257 + // of 128-bits operands for SSE instructions. 1.258 + jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 1.259 + // Store the value to a 128-bits operand. 1.260 + operand[0] = lo; 1.261 + operand[1] = hi; 1.262 + return operand; 1.263 +} 1.264 + 1.265 +// Buffer for 128-bits masks used by SSE instructions. 1.266 +static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 1.267 + 1.268 +// Static initialization during VM startup. 1.269 +static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 1.270 +static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 1.271 +static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 1.272 +static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 1.273 + 1.274 +// !!!!! Special hack to get all type of calls to specify the byte offset 1.275 +// from the start of the call to the point where the return address 1.276 +// will point. 1.277 +int MachCallStaticJavaNode::ret_addr_offset() { 1.278 + return 5 + (Compile::current()->in_24_bit_fp_mode() ? 6 : 0); // 5 bytes from start of call to where return address points 1.279 +} 1.280 + 1.281 +int MachCallDynamicJavaNode::ret_addr_offset() { 1.282 + return 10 + (Compile::current()->in_24_bit_fp_mode() ? 6 : 0); // 10 bytes from start of call to where return address points 1.283 +} 1.284 + 1.285 +static int sizeof_FFree_Float_Stack_All = -1; 1.286 + 1.287 +int MachCallRuntimeNode::ret_addr_offset() { 1.288 + assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 1.289 + return sizeof_FFree_Float_Stack_All + 5 + (Compile::current()->in_24_bit_fp_mode() ? 6 : 0); 1.290 +} 1.291 + 1.292 +// Indicate if the safepoint node needs the polling page as an input. 1.293 +// Since x86 does have absolute addressing, it doesn't. 1.294 +bool SafePointNode::needs_polling_address_input() { 1.295 + return false; 1.296 +} 1.297 + 1.298 +// 1.299 +// Compute padding required for nodes which need alignment 1.300 +// 1.301 + 1.302 +// The address of the call instruction needs to be 4-byte aligned to 1.303 +// ensure that it does not span a cache line so that it can be patched. 1.304 +int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 1.305 + if (Compile::current()->in_24_bit_fp_mode()) 1.306 + current_offset += 6; // skip fldcw in pre_call_FPU, if any 1.307 + current_offset += 1; // skip call opcode byte 1.308 + return round_to(current_offset, alignment_required()) - current_offset; 1.309 +} 1.310 + 1.311 +// The address of the call instruction needs to be 4-byte aligned to 1.312 +// ensure that it does not span a cache line so that it can be patched. 1.313 +int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 1.314 + if (Compile::current()->in_24_bit_fp_mode()) 1.315 + current_offset += 6; // skip fldcw in pre_call_FPU, if any 1.316 + current_offset += 5; // skip MOV instruction 1.317 + current_offset += 1; // skip call opcode byte 1.318 + return round_to(current_offset, alignment_required()) - current_offset; 1.319 +} 1.320 + 1.321 +#ifndef PRODUCT 1.322 +void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const { 1.323 + st->print("INT3"); 1.324 +} 1.325 +#endif 1.326 + 1.327 +// EMIT_RM() 1.328 +void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 1.329 + unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 1.330 + *(cbuf.code_end()) = c; 1.331 + cbuf.set_code_end(cbuf.code_end() + 1); 1.332 +} 1.333 + 1.334 +// EMIT_CC() 1.335 +void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 1.336 + unsigned char c = (unsigned char)( f1 | f2 ); 1.337 + *(cbuf.code_end()) = c; 1.338 + cbuf.set_code_end(cbuf.code_end() + 1); 1.339 +} 1.340 + 1.341 +// EMIT_OPCODE() 1.342 +void emit_opcode(CodeBuffer &cbuf, int code) { 1.343 + *(cbuf.code_end()) = (unsigned char)code; 1.344 + cbuf.set_code_end(cbuf.code_end() + 1); 1.345 +} 1.346 + 1.347 +// EMIT_OPCODE() w/ relocation information 1.348 +void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 1.349 + cbuf.relocate(cbuf.inst_mark() + offset, reloc); 1.350 + emit_opcode(cbuf, code); 1.351 +} 1.352 + 1.353 +// EMIT_D8() 1.354 +void emit_d8(CodeBuffer &cbuf, int d8) { 1.355 + *(cbuf.code_end()) = (unsigned char)d8; 1.356 + cbuf.set_code_end(cbuf.code_end() + 1); 1.357 +} 1.358 + 1.359 +// EMIT_D16() 1.360 +void emit_d16(CodeBuffer &cbuf, int d16) { 1.361 + *((short *)(cbuf.code_end())) = d16; 1.362 + cbuf.set_code_end(cbuf.code_end() + 2); 1.363 +} 1.364 + 1.365 +// EMIT_D32() 1.366 +void emit_d32(CodeBuffer &cbuf, int d32) { 1.367 + *((int *)(cbuf.code_end())) = d32; 1.368 + cbuf.set_code_end(cbuf.code_end() + 4); 1.369 +} 1.370 + 1.371 +// emit 32 bit value and construct relocation entry from relocInfo::relocType 1.372 +void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 1.373 + int format) { 1.374 + cbuf.relocate(cbuf.inst_mark(), reloc, format); 1.375 + 1.376 + *((int *)(cbuf.code_end())) = d32; 1.377 + cbuf.set_code_end(cbuf.code_end() + 4); 1.378 +} 1.379 + 1.380 +// emit 32 bit value and construct relocation entry from RelocationHolder 1.381 +void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 1.382 + int format) { 1.383 +#ifdef ASSERT 1.384 + if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 1.385 + assert(oop(d32)->is_oop() && oop(d32)->is_perm(), "cannot embed non-perm oops in code"); 1.386 + } 1.387 +#endif 1.388 + cbuf.relocate(cbuf.inst_mark(), rspec, format); 1.389 + 1.390 + *((int *)(cbuf.code_end())) = d32; 1.391 + cbuf.set_code_end(cbuf.code_end() + 4); 1.392 +} 1.393 + 1.394 +// Access stack slot for load or store 1.395 +void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 1.396 + emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 1.397 + if( -128 <= disp && disp <= 127 ) { 1.398 + emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 1.399 + emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 1.400 + emit_d8 (cbuf, disp); // Displacement // R/M byte 1.401 + } else { 1.402 + emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 1.403 + emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 1.404 + emit_d32(cbuf, disp); // Displacement // R/M byte 1.405 + } 1.406 +} 1.407 + 1.408 + // eRegI ereg, memory mem) %{ // emit_reg_mem 1.409 +void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, bool displace_is_oop ) { 1.410 + // There is no index & no scale, use form without SIB byte 1.411 + if ((index == 0x4) && 1.412 + (scale == 0) && (base != ESP_enc)) { 1.413 + // If no displacement, mode is 0x0; unless base is [EBP] 1.414 + if ( (displace == 0) && (base != EBP_enc) ) { 1.415 + emit_rm(cbuf, 0x0, reg_encoding, base); 1.416 + } 1.417 + else { // If 8-bit displacement, mode 0x1 1.418 + if ((displace >= -128) && (displace <= 127) 1.419 + && !(displace_is_oop) ) { 1.420 + emit_rm(cbuf, 0x1, reg_encoding, base); 1.421 + emit_d8(cbuf, displace); 1.422 + } 1.423 + else { // If 32-bit displacement 1.424 + if (base == -1) { // Special flag for absolute address 1.425 + emit_rm(cbuf, 0x0, reg_encoding, 0x5); 1.426 + // (manual lies; no SIB needed here) 1.427 + if ( displace_is_oop ) { 1.428 + emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1); 1.429 + } else { 1.430 + emit_d32 (cbuf, displace); 1.431 + } 1.432 + } 1.433 + else { // Normal base + offset 1.434 + emit_rm(cbuf, 0x2, reg_encoding, base); 1.435 + if ( displace_is_oop ) { 1.436 + emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1); 1.437 + } else { 1.438 + emit_d32 (cbuf, displace); 1.439 + } 1.440 + } 1.441 + } 1.442 + } 1.443 + } 1.444 + else { // Else, encode with the SIB byte 1.445 + // If no displacement, mode is 0x0; unless base is [EBP] 1.446 + if (displace == 0 && (base != EBP_enc)) { // If no displacement 1.447 + emit_rm(cbuf, 0x0, reg_encoding, 0x4); 1.448 + emit_rm(cbuf, scale, index, base); 1.449 + } 1.450 + else { // If 8-bit displacement, mode 0x1 1.451 + if ((displace >= -128) && (displace <= 127) 1.452 + && !(displace_is_oop) ) { 1.453 + emit_rm(cbuf, 0x1, reg_encoding, 0x4); 1.454 + emit_rm(cbuf, scale, index, base); 1.455 + emit_d8(cbuf, displace); 1.456 + } 1.457 + else { // If 32-bit displacement 1.458 + if (base == 0x04 ) { 1.459 + emit_rm(cbuf, 0x2, reg_encoding, 0x4); 1.460 + emit_rm(cbuf, scale, index, 0x04); 1.461 + } else { 1.462 + emit_rm(cbuf, 0x2, reg_encoding, 0x4); 1.463 + emit_rm(cbuf, scale, index, base); 1.464 + } 1.465 + if ( displace_is_oop ) { 1.466 + emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1); 1.467 + } else { 1.468 + emit_d32 (cbuf, displace); 1.469 + } 1.470 + } 1.471 + } 1.472 + } 1.473 +} 1.474 + 1.475 + 1.476 +void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 1.477 + if( dst_encoding == src_encoding ) { 1.478 + // reg-reg copy, use an empty encoding 1.479 + } else { 1.480 + emit_opcode( cbuf, 0x8B ); 1.481 + emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 1.482 + } 1.483 +} 1.484 + 1.485 +void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 1.486 + if( dst_encoding == src_encoding ) { 1.487 + // reg-reg copy, use an empty encoding 1.488 + } else { 1.489 + MacroAssembler _masm(&cbuf); 1.490 + 1.491 + __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding)); 1.492 + } 1.493 +} 1.494 + 1.495 + 1.496 +//============================================================================= 1.497 +#ifndef PRODUCT 1.498 +void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1.499 + Compile* C = ra_->C; 1.500 + if( C->in_24_bit_fp_mode() ) { 1.501 + tty->print("FLDCW 24 bit fpu control word"); 1.502 + tty->print_cr(""); tty->print("\t"); 1.503 + } 1.504 + 1.505 + int framesize = C->frame_slots() << LogBytesPerInt; 1.506 + assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 1.507 + // Remove two words for return addr and rbp, 1.508 + framesize -= 2*wordSize; 1.509 + 1.510 + // Calls to C2R adapters often do not accept exceptional returns. 1.511 + // We require that their callers must bang for them. But be careful, because 1.512 + // some VM calls (such as call site linkage) can use several kilobytes of 1.513 + // stack. But the stack safety zone should account for that. 1.514 + // See bugs 4446381, 4468289, 4497237. 1.515 + if (C->need_stack_bang(framesize)) { 1.516 + tty->print_cr("# stack bang"); tty->print("\t"); 1.517 + } 1.518 + tty->print_cr("PUSHL EBP"); tty->print("\t"); 1.519 + 1.520 + if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth 1.521 + tty->print("PUSH 0xBADB100D\t# Majik cookie for stack depth check"); 1.522 + tty->print_cr(""); tty->print("\t"); 1.523 + framesize -= wordSize; 1.524 + } 1.525 + 1.526 + if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) { 1.527 + if (framesize) { 1.528 + tty->print("SUB ESP,%d\t# Create frame",framesize); 1.529 + } 1.530 + } else { 1.531 + tty->print("SUB ESP,%d\t# Create frame",framesize); 1.532 + } 1.533 +} 1.534 +#endif 1.535 + 1.536 + 1.537 +void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1.538 + Compile* C = ra_->C; 1.539 + 1.540 + if (UseSSE >= 2 && VerifyFPU) { 1.541 + MacroAssembler masm(&cbuf); 1.542 + masm.verify_FPU(0, "FPU stack must be clean on entry"); 1.543 + } 1.544 + 1.545 + // WARNING: Initial instruction MUST be 5 bytes or longer so that 1.546 + // NativeJump::patch_verified_entry will be able to patch out the entry 1.547 + // code safely. The fldcw is ok at 6 bytes, the push to verify stack 1.548 + // depth is ok at 5 bytes, the frame allocation can be either 3 or 1.549 + // 6 bytes. So if we don't do the fldcw or the push then we must 1.550 + // use the 6 byte frame allocation even if we have no frame. :-( 1.551 + // If method sets FPU control word do it now 1.552 + if( C->in_24_bit_fp_mode() ) { 1.553 + MacroAssembler masm(&cbuf); 1.554 + masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1.555 + } 1.556 + 1.557 + int framesize = C->frame_slots() << LogBytesPerInt; 1.558 + assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 1.559 + // Remove two words for return addr and rbp, 1.560 + framesize -= 2*wordSize; 1.561 + 1.562 + // Calls to C2R adapters often do not accept exceptional returns. 1.563 + // We require that their callers must bang for them. But be careful, because 1.564 + // some VM calls (such as call site linkage) can use several kilobytes of 1.565 + // stack. But the stack safety zone should account for that. 1.566 + // See bugs 4446381, 4468289, 4497237. 1.567 + if (C->need_stack_bang(framesize)) { 1.568 + MacroAssembler masm(&cbuf); 1.569 + masm.generate_stack_overflow_check(framesize); 1.570 + } 1.571 + 1.572 + // We always push rbp, so that on return to interpreter rbp, will be 1.573 + // restored correctly and we can correct the stack. 1.574 + emit_opcode(cbuf, 0x50 | EBP_enc); 1.575 + 1.576 + if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth 1.577 + emit_opcode(cbuf, 0x68); // push 0xbadb100d 1.578 + emit_d32(cbuf, 0xbadb100d); 1.579 + framesize -= wordSize; 1.580 + } 1.581 + 1.582 + if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) { 1.583 + if (framesize) { 1.584 + emit_opcode(cbuf, 0x83); // sub SP,#framesize 1.585 + emit_rm(cbuf, 0x3, 0x05, ESP_enc); 1.586 + emit_d8(cbuf, framesize); 1.587 + } 1.588 + } else { 1.589 + emit_opcode(cbuf, 0x81); // sub SP,#framesize 1.590 + emit_rm(cbuf, 0x3, 0x05, ESP_enc); 1.591 + emit_d32(cbuf, framesize); 1.592 + } 1.593 + C->set_frame_complete(cbuf.code_end() - cbuf.code_begin()); 1.594 + 1.595 +#ifdef ASSERT 1.596 + if (VerifyStackAtCalls) { 1.597 + Label L; 1.598 + MacroAssembler masm(&cbuf); 1.599 + masm.pushl(rax); 1.600 + masm.movl(rax, rsp); 1.601 + masm.andl(rax, StackAlignmentInBytes-1); 1.602 + masm.cmpl(rax, StackAlignmentInBytes-wordSize); 1.603 + masm.popl(rax); 1.604 + masm.jcc(Assembler::equal, L); 1.605 + masm.stop("Stack is not properly aligned!"); 1.606 + masm.bind(L); 1.607 + } 1.608 +#endif 1.609 + 1.610 +} 1.611 + 1.612 +uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 1.613 + return MachNode::size(ra_); // too many variables; just compute it the hard way 1.614 +} 1.615 + 1.616 +int MachPrologNode::reloc() const { 1.617 + return 0; // a large enough number 1.618 +} 1.619 + 1.620 +//============================================================================= 1.621 +#ifndef PRODUCT 1.622 +void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1.623 + Compile *C = ra_->C; 1.624 + int framesize = C->frame_slots() << LogBytesPerInt; 1.625 + assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 1.626 + // Remove two words for return addr and rbp, 1.627 + framesize -= 2*wordSize; 1.628 + 1.629 + if( C->in_24_bit_fp_mode() ) { 1.630 + st->print("FLDCW standard control word"); 1.631 + st->cr(); st->print("\t"); 1.632 + } 1.633 + if( framesize ) { 1.634 + st->print("ADD ESP,%d\t# Destroy frame",framesize); 1.635 + st->cr(); st->print("\t"); 1.636 + } 1.637 + st->print_cr("POPL EBP"); st->print("\t"); 1.638 + if( do_polling() && C->is_method_compilation() ) { 1.639 + st->print("TEST PollPage,EAX\t! Poll Safepoint"); 1.640 + st->cr(); st->print("\t"); 1.641 + } 1.642 +} 1.643 +#endif 1.644 + 1.645 +void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1.646 + Compile *C = ra_->C; 1.647 + 1.648 + // If method set FPU control word, restore to standard control word 1.649 + if( C->in_24_bit_fp_mode() ) { 1.650 + MacroAssembler masm(&cbuf); 1.651 + masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 1.652 + } 1.653 + 1.654 + int framesize = C->frame_slots() << LogBytesPerInt; 1.655 + assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 1.656 + // Remove two words for return addr and rbp, 1.657 + framesize -= 2*wordSize; 1.658 + 1.659 + // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 1.660 + 1.661 + if( framesize >= 128 ) { 1.662 + emit_opcode(cbuf, 0x81); // add SP, #framesize 1.663 + emit_rm(cbuf, 0x3, 0x00, ESP_enc); 1.664 + emit_d32(cbuf, framesize); 1.665 + } 1.666 + else if( framesize ) { 1.667 + emit_opcode(cbuf, 0x83); // add SP, #framesize 1.668 + emit_rm(cbuf, 0x3, 0x00, ESP_enc); 1.669 + emit_d8(cbuf, framesize); 1.670 + } 1.671 + 1.672 + emit_opcode(cbuf, 0x58 | EBP_enc); 1.673 + 1.674 + if( do_polling() && C->is_method_compilation() ) { 1.675 + cbuf.relocate(cbuf.code_end(), relocInfo::poll_return_type, 0); 1.676 + emit_opcode(cbuf,0x85); 1.677 + emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX 1.678 + emit_d32(cbuf, (intptr_t)os::get_polling_page()); 1.679 + } 1.680 +} 1.681 + 1.682 +uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 1.683 + Compile *C = ra_->C; 1.684 + // If method set FPU control word, restore to standard control word 1.685 + int size = C->in_24_bit_fp_mode() ? 6 : 0; 1.686 + if( do_polling() && C->is_method_compilation() ) size += 6; 1.687 + 1.688 + int framesize = C->frame_slots() << LogBytesPerInt; 1.689 + assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 1.690 + // Remove two words for return addr and rbp, 1.691 + framesize -= 2*wordSize; 1.692 + 1.693 + size++; // popl rbp, 1.694 + 1.695 + if( framesize >= 128 ) { 1.696 + size += 6; 1.697 + } else { 1.698 + size += framesize ? 3 : 0; 1.699 + } 1.700 + return size; 1.701 +} 1.702 + 1.703 +int MachEpilogNode::reloc() const { 1.704 + return 0; // a large enough number 1.705 +} 1.706 + 1.707 +const Pipeline * MachEpilogNode::pipeline() const { 1.708 + return MachNode::pipeline_class(); 1.709 +} 1.710 + 1.711 +int MachEpilogNode::safepoint_offset() const { return 0; } 1.712 + 1.713 +//============================================================================= 1.714 + 1.715 +enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; 1.716 +static enum RC rc_class( OptoReg::Name reg ) { 1.717 + 1.718 + if( !OptoReg::is_valid(reg) ) return rc_bad; 1.719 + if (OptoReg::is_stack(reg)) return rc_stack; 1.720 + 1.721 + VMReg r = OptoReg::as_VMReg(reg); 1.722 + if (r->is_Register()) return rc_int; 1.723 + if (r->is_FloatRegister()) { 1.724 + assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 1.725 + return rc_float; 1.726 + } 1.727 + assert(r->is_XMMRegister(), "must be"); 1.728 + return rc_xmm; 1.729 +} 1.730 + 1.731 +static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, int opcode, const char *op_str, int size ) { 1.732 + if( cbuf ) { 1.733 + emit_opcode (*cbuf, opcode ); 1.734 + encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, false); 1.735 +#ifndef PRODUCT 1.736 + } else if( !do_size ) { 1.737 + if( size != 0 ) tty->print("\n\t"); 1.738 + if( opcode == 0x8B || opcode == 0x89 ) { // MOV 1.739 + if( is_load ) tty->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 1.740 + else tty->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 1.741 + } else { // FLD, FST, PUSH, POP 1.742 + tty->print("%s [ESP + #%d]",op_str,offset); 1.743 + } 1.744 +#endif 1.745 + } 1.746 + int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1.747 + return size+3+offset_size; 1.748 +} 1.749 + 1.750 +// Helper for XMM registers. Extra opcode bits, limited syntax. 1.751 +static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 1.752 + int offset, int reg_lo, int reg_hi, int size ) { 1.753 + if( cbuf ) { 1.754 + if( reg_lo+1 == reg_hi ) { // double move? 1.755 + if( is_load && !UseXmmLoadAndClearUpper ) 1.756 + emit_opcode(*cbuf, 0x66 ); // use 'movlpd' for load 1.757 + else 1.758 + emit_opcode(*cbuf, 0xF2 ); // use 'movsd' otherwise 1.759 + } else { 1.760 + emit_opcode(*cbuf, 0xF3 ); 1.761 + } 1.762 + emit_opcode(*cbuf, 0x0F ); 1.763 + if( reg_lo+1 == reg_hi && is_load && !UseXmmLoadAndClearUpper ) 1.764 + emit_opcode(*cbuf, 0x12 ); // use 'movlpd' for load 1.765 + else 1.766 + emit_opcode(*cbuf, is_load ? 0x10 : 0x11 ); 1.767 + encode_RegMem(*cbuf, Matcher::_regEncode[reg_lo], ESP_enc, 0x4, 0, offset, false); 1.768 +#ifndef PRODUCT 1.769 + } else if( !do_size ) { 1.770 + if( size != 0 ) tty->print("\n\t"); 1.771 + if( reg_lo+1 == reg_hi ) { // double move? 1.772 + if( is_load ) tty->print("%s %s,[ESP + #%d]", 1.773 + UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 1.774 + Matcher::regName[reg_lo], offset); 1.775 + else tty->print("MOVSD [ESP + #%d],%s", 1.776 + offset, Matcher::regName[reg_lo]); 1.777 + } else { 1.778 + if( is_load ) tty->print("MOVSS %s,[ESP + #%d]", 1.779 + Matcher::regName[reg_lo], offset); 1.780 + else tty->print("MOVSS [ESP + #%d],%s", 1.781 + offset, Matcher::regName[reg_lo]); 1.782 + } 1.783 +#endif 1.784 + } 1.785 + int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1.786 + return size+5+offset_size; 1.787 +} 1.788 + 1.789 + 1.790 +static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1.791 + int src_hi, int dst_hi, int size ) { 1.792 + if( UseXmmRegToRegMoveAll ) {//Use movaps,movapd to move between xmm registers 1.793 + if( cbuf ) { 1.794 + if( (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ) { 1.795 + emit_opcode(*cbuf, 0x66 ); 1.796 + } 1.797 + emit_opcode(*cbuf, 0x0F ); 1.798 + emit_opcode(*cbuf, 0x28 ); 1.799 + emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] ); 1.800 +#ifndef PRODUCT 1.801 + } else if( !do_size ) { 1.802 + if( size != 0 ) tty->print("\n\t"); 1.803 + if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 1.804 + tty->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1.805 + } else { 1.806 + tty->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1.807 + } 1.808 +#endif 1.809 + } 1.810 + return size + ((src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 4 : 3); 1.811 + } else { 1.812 + if( cbuf ) { 1.813 + emit_opcode(*cbuf, (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 0xF2 : 0xF3 ); 1.814 + emit_opcode(*cbuf, 0x0F ); 1.815 + emit_opcode(*cbuf, 0x10 ); 1.816 + emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] ); 1.817 +#ifndef PRODUCT 1.818 + } else if( !do_size ) { 1.819 + if( size != 0 ) tty->print("\n\t"); 1.820 + if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 1.821 + tty->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1.822 + } else { 1.823 + tty->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1.824 + } 1.825 +#endif 1.826 + } 1.827 + return size+4; 1.828 + } 1.829 +} 1.830 + 1.831 +static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size ) { 1.832 + if( cbuf ) { 1.833 + emit_opcode(*cbuf, 0x8B ); 1.834 + emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 1.835 +#ifndef PRODUCT 1.836 + } else if( !do_size ) { 1.837 + if( size != 0 ) tty->print("\n\t"); 1.838 + tty->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 1.839 +#endif 1.840 + } 1.841 + return size+2; 1.842 +} 1.843 + 1.844 +static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, int offset, int size ) { 1.845 + if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 1.846 + if( cbuf ) { 1.847 + emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 1.848 + emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 1.849 +#ifndef PRODUCT 1.850 + } else if( !do_size ) { 1.851 + if( size != 0 ) tty->print("\n\t"); 1.852 + tty->print("FLD %s",Matcher::regName[src_lo]); 1.853 +#endif 1.854 + } 1.855 + size += 2; 1.856 + } 1.857 + 1.858 + int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 1.859 + const char *op_str; 1.860 + int op; 1.861 + if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 1.862 + op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 1.863 + op = 0xDD; 1.864 + } else { // 32-bit store 1.865 + op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 1.866 + op = 0xD9; 1.867 + assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 1.868 + } 1.869 + 1.870 + return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size); 1.871 +} 1.872 + 1.873 +uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1.874 + // Get registers to move 1.875 + OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1.876 + OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1.877 + OptoReg::Name dst_second = ra_->get_reg_second(this ); 1.878 + OptoReg::Name dst_first = ra_->get_reg_first(this ); 1.879 + 1.880 + enum RC src_second_rc = rc_class(src_second); 1.881 + enum RC src_first_rc = rc_class(src_first); 1.882 + enum RC dst_second_rc = rc_class(dst_second); 1.883 + enum RC dst_first_rc = rc_class(dst_first); 1.884 + 1.885 + assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1.886 + 1.887 + // Generate spill code! 1.888 + int size = 0; 1.889 + 1.890 + if( src_first == dst_first && src_second == dst_second ) 1.891 + return size; // Self copy, no move 1.892 + 1.893 + // -------------------------------------- 1.894 + // Check for mem-mem move. push/pop to move. 1.895 + if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1.896 + if( src_second == dst_first ) { // overlapping stack copy ranges 1.897 + assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1.898 + size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size); 1.899 + size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size); 1.900 + src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1.901 + } 1.902 + // move low bits 1.903 + size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size); 1.904 + size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size); 1.905 + if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1.906 + size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size); 1.907 + size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size); 1.908 + } 1.909 + return size; 1.910 + } 1.911 + 1.912 + // -------------------------------------- 1.913 + // Check for integer reg-reg copy 1.914 + if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1.915 + size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size); 1.916 + 1.917 + // Check for integer store 1.918 + if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1.919 + size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size); 1.920 + 1.921 + // Check for integer load 1.922 + if( dst_first_rc == rc_int && src_first_rc == rc_stack ) 1.923 + size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size); 1.924 + 1.925 + // -------------------------------------- 1.926 + // Check for float reg-reg copy 1.927 + if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1.928 + assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1.929 + (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1.930 + if( cbuf ) { 1.931 + 1.932 + // Note the mucking with the register encode to compensate for the 0/1 1.933 + // indexing issue mentioned in a comment in the reg_def sections 1.934 + // for FPR registers many lines above here. 1.935 + 1.936 + if( src_first != FPR1L_num ) { 1.937 + emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1.938 + emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1.939 + emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1.940 + emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1.941 + } else { 1.942 + emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1.943 + emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1.944 + } 1.945 +#ifndef PRODUCT 1.946 + } else if( !do_size ) { 1.947 + if( size != 0 ) st->print("\n\t"); 1.948 + if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1.949 + else st->print( "FST %s", Matcher::regName[dst_first]); 1.950 +#endif 1.951 + } 1.952 + return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1.953 + } 1.954 + 1.955 + // Check for float store 1.956 + if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1.957 + return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size); 1.958 + } 1.959 + 1.960 + // Check for float load 1.961 + if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1.962 + int offset = ra_->reg2offset(src_first); 1.963 + const char *op_str; 1.964 + int op; 1.965 + if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1.966 + op_str = "FLD_D"; 1.967 + op = 0xDD; 1.968 + } else { // 32-bit load 1.969 + op_str = "FLD_S"; 1.970 + op = 0xD9; 1.971 + assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1.972 + } 1.973 + if( cbuf ) { 1.974 + emit_opcode (*cbuf, op ); 1.975 + encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, false); 1.976 + emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1.977 + emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1.978 +#ifndef PRODUCT 1.979 + } else if( !do_size ) { 1.980 + if( size != 0 ) st->print("\n\t"); 1.981 + st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1.982 +#endif 1.983 + } 1.984 + int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1.985 + return size + 3+offset_size+2; 1.986 + } 1.987 + 1.988 + // Check for xmm reg-reg copy 1.989 + if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1.990 + assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1.991 + (src_first+1 == src_second && dst_first+1 == dst_second), 1.992 + "no non-adjacent float-moves" ); 1.993 + return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size); 1.994 + } 1.995 + 1.996 + // Check for xmm store 1.997 + if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1.998 + return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size); 1.999 + } 1.1000 + 1.1001 + // Check for float xmm load 1.1002 + if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { 1.1003 + return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size); 1.1004 + } 1.1005 + 1.1006 + // Copy from float reg to xmm reg 1.1007 + if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { 1.1008 + // copy to the top of stack from floating point reg 1.1009 + // and use LEA to preserve flags 1.1010 + if( cbuf ) { 1.1011 + emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1.1012 + emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1.1013 + emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1.1014 + emit_d8(*cbuf,0xF8); 1.1015 +#ifndef PRODUCT 1.1016 + } else if( !do_size ) { 1.1017 + if( size != 0 ) st->print("\n\t"); 1.1018 + st->print("LEA ESP,[ESP-8]"); 1.1019 +#endif 1.1020 + } 1.1021 + size += 4; 1.1022 + 1.1023 + size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size); 1.1024 + 1.1025 + // Copy from the temp memory to the xmm reg. 1.1026 + size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size); 1.1027 + 1.1028 + if( cbuf ) { 1.1029 + emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1.1030 + emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1.1031 + emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1.1032 + emit_d8(*cbuf,0x08); 1.1033 +#ifndef PRODUCT 1.1034 + } else if( !do_size ) { 1.1035 + if( size != 0 ) st->print("\n\t"); 1.1036 + st->print("LEA ESP,[ESP+8]"); 1.1037 +#endif 1.1038 + } 1.1039 + size += 4; 1.1040 + return size; 1.1041 + } 1.1042 + 1.1043 + assert( size > 0, "missed a case" ); 1.1044 + 1.1045 + // -------------------------------------------------------------------- 1.1046 + // Check for second bits still needing moving. 1.1047 + if( src_second == dst_second ) 1.1048 + return size; // Self copy; no move 1.1049 + assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1.1050 + 1.1051 + // Check for second word int-int move 1.1052 + if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1.1053 + return impl_mov_helper(cbuf,do_size,src_second,dst_second,size); 1.1054 + 1.1055 + // Check for second word integer store 1.1056 + if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1.1057 + return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size); 1.1058 + 1.1059 + // Check for second word integer load 1.1060 + if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1.1061 + return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size); 1.1062 + 1.1063 + 1.1064 + Unimplemented(); 1.1065 +} 1.1066 + 1.1067 +#ifndef PRODUCT 1.1068 +void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1.1069 + implementation( NULL, ra_, false, st ); 1.1070 +} 1.1071 +#endif 1.1072 + 1.1073 +void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1.1074 + implementation( &cbuf, ra_, false, NULL ); 1.1075 +} 1.1076 + 1.1077 +uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1.1078 + return implementation( NULL, ra_, true, NULL ); 1.1079 +} 1.1080 + 1.1081 +//============================================================================= 1.1082 +#ifndef PRODUCT 1.1083 +void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const { 1.1084 + st->print("NOP \t# %d bytes pad for loops and calls", _count); 1.1085 +} 1.1086 +#endif 1.1087 + 1.1088 +void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const { 1.1089 + MacroAssembler _masm(&cbuf); 1.1090 + __ nop(_count); 1.1091 +} 1.1092 + 1.1093 +uint MachNopNode::size(PhaseRegAlloc *) const { 1.1094 + return _count; 1.1095 +} 1.1096 + 1.1097 + 1.1098 +//============================================================================= 1.1099 +#ifndef PRODUCT 1.1100 +void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1.1101 + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1.1102 + int reg = ra_->get_reg_first(this); 1.1103 + st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1.1104 +} 1.1105 +#endif 1.1106 + 1.1107 +void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1.1108 + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1.1109 + int reg = ra_->get_encode(this); 1.1110 + if( offset >= 128 ) { 1.1111 + emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1.1112 + emit_rm(cbuf, 0x2, reg, 0x04); 1.1113 + emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1.1114 + emit_d32(cbuf, offset); 1.1115 + } 1.1116 + else { 1.1117 + emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1.1118 + emit_rm(cbuf, 0x1, reg, 0x04); 1.1119 + emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1.1120 + emit_d8(cbuf, offset); 1.1121 + } 1.1122 +} 1.1123 + 1.1124 +uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1.1125 + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1.1126 + if( offset >= 128 ) { 1.1127 + return 7; 1.1128 + } 1.1129 + else { 1.1130 + return 4; 1.1131 + } 1.1132 +} 1.1133 + 1.1134 +//============================================================================= 1.1135 + 1.1136 +// emit call stub, compiled java to interpreter 1.1137 +void emit_java_to_interp(CodeBuffer &cbuf ) { 1.1138 + // Stub is fixed up when the corresponding call is converted from calling 1.1139 + // compiled code to calling interpreted code. 1.1140 + // mov rbx,0 1.1141 + // jmp -1 1.1142 + 1.1143 + address mark = cbuf.inst_mark(); // get mark within main instrs section 1.1144 + 1.1145 + // Note that the code buffer's inst_mark is always relative to insts. 1.1146 + // That's why we must use the macroassembler to generate a stub. 1.1147 + MacroAssembler _masm(&cbuf); 1.1148 + 1.1149 + address base = 1.1150 + __ start_a_stub(Compile::MAX_stubs_size); 1.1151 + if (base == NULL) return; // CodeBuffer::expand failed 1.1152 + // static stub relocation stores the instruction address of the call 1.1153 + __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM32); 1.1154 + // static stub relocation also tags the methodOop in the code-stream. 1.1155 + __ movoop(rbx, (jobject)NULL); // method is zapped till fixup time 1.1156 + __ jump(RuntimeAddress((address)-1)); 1.1157 + 1.1158 + __ end_a_stub(); 1.1159 + // Update current stubs pointer and restore code_end. 1.1160 +} 1.1161 +// size of call stub, compiled java to interpretor 1.1162 +uint size_java_to_interp() { 1.1163 + return 10; // movl; jmp 1.1164 +} 1.1165 +// relocation entries for call stub, compiled java to interpretor 1.1166 +uint reloc_java_to_interp() { 1.1167 + return 4; // 3 in emit_java_to_interp + 1 in Java_Static_Call 1.1168 +} 1.1169 + 1.1170 +//============================================================================= 1.1171 +#ifndef PRODUCT 1.1172 +void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1.1173 + st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1.1174 + st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1.1175 + st->print_cr("\tNOP"); 1.1176 + st->print_cr("\tNOP"); 1.1177 + if( !OptoBreakpoint ) 1.1178 + st->print_cr("\tNOP"); 1.1179 +} 1.1180 +#endif 1.1181 + 1.1182 +void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1.1183 + MacroAssembler masm(&cbuf); 1.1184 +#ifdef ASSERT 1.1185 + uint code_size = cbuf.code_size(); 1.1186 +#endif 1.1187 + masm.cmpl(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1.1188 + masm.jump_cc(Assembler::notEqual, 1.1189 + RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1.1190 + /* WARNING these NOPs are critical so that verified entry point is properly 1.1191 + aligned for patching by NativeJump::patch_verified_entry() */ 1.1192 + int nops_cnt = 2; 1.1193 + if( !OptoBreakpoint ) // Leave space for int3 1.1194 + nops_cnt += 1; 1.1195 + masm.nop(nops_cnt); 1.1196 + 1.1197 + assert(cbuf.code_size() - code_size == size(ra_), "checking code size of inline cache node"); 1.1198 +} 1.1199 + 1.1200 +uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1.1201 + return OptoBreakpoint ? 11 : 12; 1.1202 +} 1.1203 + 1.1204 + 1.1205 +//============================================================================= 1.1206 +uint size_exception_handler() { 1.1207 + // NativeCall instruction size is the same as NativeJump. 1.1208 + // exception handler starts out as jump and can be patched to 1.1209 + // a call be deoptimization. (4932387) 1.1210 + // Note that this value is also credited (in output.cpp) to 1.1211 + // the size of the code section. 1.1212 + return NativeJump::instruction_size; 1.1213 +} 1.1214 + 1.1215 +// Emit exception handler code. Stuff framesize into a register 1.1216 +// and call a VM stub routine. 1.1217 +int emit_exception_handler(CodeBuffer& cbuf) { 1.1218 + 1.1219 + // Note that the code buffer's inst_mark is always relative to insts. 1.1220 + // That's why we must use the macroassembler to generate a handler. 1.1221 + MacroAssembler _masm(&cbuf); 1.1222 + address base = 1.1223 + __ start_a_stub(size_exception_handler()); 1.1224 + if (base == NULL) return 0; // CodeBuffer::expand failed 1.1225 + int offset = __ offset(); 1.1226 + __ jump(RuntimeAddress(OptoRuntime::exception_blob()->instructions_begin())); 1.1227 + assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1.1228 + __ end_a_stub(); 1.1229 + return offset; 1.1230 +} 1.1231 + 1.1232 +uint size_deopt_handler() { 1.1233 + // NativeCall instruction size is the same as NativeJump. 1.1234 + // exception handler starts out as jump and can be patched to 1.1235 + // a call be deoptimization. (4932387) 1.1236 + // Note that this value is also credited (in output.cpp) to 1.1237 + // the size of the code section. 1.1238 + return 5 + NativeJump::instruction_size; // pushl(); jmp; 1.1239 +} 1.1240 + 1.1241 +// Emit deopt handler code. 1.1242 +int emit_deopt_handler(CodeBuffer& cbuf) { 1.1243 + 1.1244 + // Note that the code buffer's inst_mark is always relative to insts. 1.1245 + // That's why we must use the macroassembler to generate a handler. 1.1246 + MacroAssembler _masm(&cbuf); 1.1247 + address base = 1.1248 + __ start_a_stub(size_exception_handler()); 1.1249 + if (base == NULL) return 0; // CodeBuffer::expand failed 1.1250 + int offset = __ offset(); 1.1251 + InternalAddress here(__ pc()); 1.1252 + __ pushptr(here.addr()); 1.1253 + 1.1254 + __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1.1255 + assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); 1.1256 + __ end_a_stub(); 1.1257 + return offset; 1.1258 +} 1.1259 + 1.1260 + 1.1261 +static void emit_double_constant(CodeBuffer& cbuf, double x) { 1.1262 + int mark = cbuf.insts()->mark_off(); 1.1263 + MacroAssembler _masm(&cbuf); 1.1264 + address double_address = __ double_constant(x); 1.1265 + cbuf.insts()->set_mark_off(mark); // preserve mark across masm shift 1.1266 + emit_d32_reloc(cbuf, 1.1267 + (int)double_address, 1.1268 + internal_word_Relocation::spec(double_address), 1.1269 + RELOC_DISP32); 1.1270 +} 1.1271 + 1.1272 +static void emit_float_constant(CodeBuffer& cbuf, float x) { 1.1273 + int mark = cbuf.insts()->mark_off(); 1.1274 + MacroAssembler _masm(&cbuf); 1.1275 + address float_address = __ float_constant(x); 1.1276 + cbuf.insts()->set_mark_off(mark); // preserve mark across masm shift 1.1277 + emit_d32_reloc(cbuf, 1.1278 + (int)float_address, 1.1279 + internal_word_Relocation::spec(float_address), 1.1280 + RELOC_DISP32); 1.1281 +} 1.1282 + 1.1283 + 1.1284 +int Matcher::regnum_to_fpu_offset(int regnum) { 1.1285 + return regnum - 32; // The FP registers are in the second chunk 1.1286 +} 1.1287 + 1.1288 +bool is_positive_zero_float(jfloat f) { 1.1289 + return jint_cast(f) == jint_cast(0.0F); 1.1290 +} 1.1291 + 1.1292 +bool is_positive_one_float(jfloat f) { 1.1293 + return jint_cast(f) == jint_cast(1.0F); 1.1294 +} 1.1295 + 1.1296 +bool is_positive_zero_double(jdouble d) { 1.1297 + return jlong_cast(d) == jlong_cast(0.0); 1.1298 +} 1.1299 + 1.1300 +bool is_positive_one_double(jdouble d) { 1.1301 + return jlong_cast(d) == jlong_cast(1.0); 1.1302 +} 1.1303 + 1.1304 +// This is UltraSparc specific, true just means we have fast l2f conversion 1.1305 +const bool Matcher::convL2FSupported(void) { 1.1306 + return true; 1.1307 +} 1.1308 + 1.1309 +// Vector width in bytes 1.1310 +const uint Matcher::vector_width_in_bytes(void) { 1.1311 + return UseSSE >= 2 ? 8 : 0; 1.1312 +} 1.1313 + 1.1314 +// Vector ideal reg 1.1315 +const uint Matcher::vector_ideal_reg(void) { 1.1316 + return Op_RegD; 1.1317 +} 1.1318 + 1.1319 +// Is this branch offset short enough that a short branch can be used? 1.1320 +// 1.1321 +// NOTE: If the platform does not provide any short branch variants, then 1.1322 +// this method should return false for offset 0. 1.1323 +bool Matcher::is_short_branch_offset(int offset) { 1.1324 + return (-128 <= offset && offset <= 127); 1.1325 +} 1.1326 + 1.1327 +const bool Matcher::isSimpleConstant64(jlong value) { 1.1328 + // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 1.1329 + return false; 1.1330 +} 1.1331 + 1.1332 +// The ecx parameter to rep stos for the ClearArray node is in dwords. 1.1333 +const bool Matcher::init_array_count_is_in_bytes = false; 1.1334 + 1.1335 +// Threshold size for cleararray. 1.1336 +const int Matcher::init_array_short_size = 8 * BytesPerLong; 1.1337 + 1.1338 +// Should the Matcher clone shifts on addressing modes, expecting them to 1.1339 +// be subsumed into complex addressing expressions or compute them into 1.1340 +// registers? True for Intel but false for most RISCs 1.1341 +const bool Matcher::clone_shift_expressions = true; 1.1342 + 1.1343 +// Is it better to copy float constants, or load them directly from memory? 1.1344 +// Intel can load a float constant from a direct address, requiring no 1.1345 +// extra registers. Most RISCs will have to materialize an address into a 1.1346 +// register first, so they would do better to copy the constant from stack. 1.1347 +const bool Matcher::rematerialize_float_constants = true; 1.1348 + 1.1349 +// If CPU can load and store mis-aligned doubles directly then no fixup is 1.1350 +// needed. Else we split the double into 2 integer pieces and move it 1.1351 +// piece-by-piece. Only happens when passing doubles into C code as the 1.1352 +// Java calling convention forces doubles to be aligned. 1.1353 +const bool Matcher::misaligned_doubles_ok = true; 1.1354 + 1.1355 + 1.1356 +void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 1.1357 + // Get the memory operand from the node 1.1358 + uint numopnds = node->num_opnds(); // Virtual call for number of operands 1.1359 + uint skipped = node->oper_input_base(); // Sum of leaves skipped so far 1.1360 + assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); 1.1361 + uint opcnt = 1; // First operand 1.1362 + uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand 1.1363 + while( idx >= skipped+num_edges ) { 1.1364 + skipped += num_edges; 1.1365 + opcnt++; // Bump operand count 1.1366 + assert( opcnt < numopnds, "Accessing non-existent operand" ); 1.1367 + num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand 1.1368 + } 1.1369 + 1.1370 + MachOper *memory = node->_opnds[opcnt]; 1.1371 + MachOper *new_memory = NULL; 1.1372 + switch (memory->opcode()) { 1.1373 + case DIRECT: 1.1374 + case INDOFFSET32X: 1.1375 + // No transformation necessary. 1.1376 + return; 1.1377 + case INDIRECT: 1.1378 + new_memory = new (C) indirect_win95_safeOper( ); 1.1379 + break; 1.1380 + case INDOFFSET8: 1.1381 + new_memory = new (C) indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); 1.1382 + break; 1.1383 + case INDOFFSET32: 1.1384 + new_memory = new (C) indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); 1.1385 + break; 1.1386 + case INDINDEXOFFSET: 1.1387 + new_memory = new (C) indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); 1.1388 + break; 1.1389 + case INDINDEXSCALE: 1.1390 + new_memory = new (C) indIndexScale_win95_safeOper(memory->scale()); 1.1391 + break; 1.1392 + case INDINDEXSCALEOFFSET: 1.1393 + new_memory = new (C) indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); 1.1394 + break; 1.1395 + case LOAD_LONG_INDIRECT: 1.1396 + case LOAD_LONG_INDOFFSET32: 1.1397 + // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} 1.1398 + return; 1.1399 + default: 1.1400 + assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); 1.1401 + return; 1.1402 + } 1.1403 + node->_opnds[opcnt] = new_memory; 1.1404 +} 1.1405 + 1.1406 +// Advertise here if the CPU requires explicit rounding operations 1.1407 +// to implement the UseStrictFP mode. 1.1408 +const bool Matcher::strict_fp_requires_explicit_rounding = true; 1.1409 + 1.1410 +// Do floats take an entire double register or just half? 1.1411 +const bool Matcher::float_in_double = true; 1.1412 +// Do ints take an entire long register or just half? 1.1413 +const bool Matcher::int_in_long = false; 1.1414 + 1.1415 +// Return whether or not this register is ever used as an argument. This 1.1416 +// function is used on startup to build the trampoline stubs in generateOptoStub. 1.1417 +// Registers not mentioned will be killed by the VM call in the trampoline, and 1.1418 +// arguments in those registers not be available to the callee. 1.1419 +bool Matcher::can_be_java_arg( int reg ) { 1.1420 + if( reg == ECX_num || reg == EDX_num ) return true; 1.1421 + if( (reg == XMM0a_num || reg == XMM1a_num) && UseSSE>=1 ) return true; 1.1422 + if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1.1423 + return false; 1.1424 +} 1.1425 + 1.1426 +bool Matcher::is_spillable_arg( int reg ) { 1.1427 + return can_be_java_arg(reg); 1.1428 +} 1.1429 + 1.1430 +// Register for DIVI projection of divmodI 1.1431 +RegMask Matcher::divI_proj_mask() { 1.1432 + return EAX_REG_mask; 1.1433 +} 1.1434 + 1.1435 +// Register for MODI projection of divmodI 1.1436 +RegMask Matcher::modI_proj_mask() { 1.1437 + return EDX_REG_mask; 1.1438 +} 1.1439 + 1.1440 +// Register for DIVL projection of divmodL 1.1441 +RegMask Matcher::divL_proj_mask() { 1.1442 + ShouldNotReachHere(); 1.1443 + return RegMask(); 1.1444 +} 1.1445 + 1.1446 +// Register for MODL projection of divmodL 1.1447 +RegMask Matcher::modL_proj_mask() { 1.1448 + ShouldNotReachHere(); 1.1449 + return RegMask(); 1.1450 +} 1.1451 + 1.1452 +%} 1.1453 + 1.1454 +//----------ENCODING BLOCK----------------------------------------------------- 1.1455 +// This block specifies the encoding classes used by the compiler to output 1.1456 +// byte streams. Encoding classes generate functions which are called by 1.1457 +// Machine Instruction Nodes in order to generate the bit encoding of the 1.1458 +// instruction. Operands specify their base encoding interface with the 1.1459 +// interface keyword. There are currently supported four interfaces, 1.1460 +// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1.1461 +// operand to generate a function which returns its register number when 1.1462 +// queried. CONST_INTER causes an operand to generate a function which 1.1463 +// returns the value of the constant when queried. MEMORY_INTER causes an 1.1464 +// operand to generate four functions which return the Base Register, the 1.1465 +// Index Register, the Scale Value, and the Offset Value of the operand when 1.1466 +// queried. COND_INTER causes an operand to generate six functions which 1.1467 +// return the encoding code (ie - encoding bits for the instruction) 1.1468 +// associated with each basic boolean condition for a conditional instruction. 1.1469 +// Instructions specify two basic values for encoding. They use the 1.1470 +// ins_encode keyword to specify their encoding class (which must be one of 1.1471 +// the class names specified in the encoding block), and they use the 1.1472 +// opcode keyword to specify, in order, their primary, secondary, and 1.1473 +// tertiary opcode. Only the opcode sections which a particular instruction 1.1474 +// needs for encoding need to be specified. 1.1475 +encode %{ 1.1476 + // Build emit functions for each basic byte or larger field in the intel 1.1477 + // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1.1478 + // code in the enc_class source block. Emit functions will live in the 1.1479 + // main source block for now. In future, we can generalize this by 1.1480 + // adding a syntax that specifies the sizes of fields in an order, 1.1481 + // so that the adlc can build the emit functions automagically 1.1482 + enc_class OpcP %{ // Emit opcode 1.1483 + emit_opcode(cbuf,$primary); 1.1484 + %} 1.1485 + 1.1486 + enc_class OpcS %{ // Emit opcode 1.1487 + emit_opcode(cbuf,$secondary); 1.1488 + %} 1.1489 + 1.1490 + enc_class Opcode(immI d8 ) %{ // Emit opcode 1.1491 + emit_opcode(cbuf,$d8$$constant); 1.1492 + %} 1.1493 + 1.1494 + enc_class SizePrefix %{ 1.1495 + emit_opcode(cbuf,0x66); 1.1496 + %} 1.1497 + 1.1498 + enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many) 1.1499 + emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1.1500 + %} 1.1501 + 1.1502 + enc_class OpcRegReg (immI opcode, eRegI dst, eRegI src) %{ // OpcRegReg(Many) 1.1503 + emit_opcode(cbuf,$opcode$$constant); 1.1504 + emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1.1505 + %} 1.1506 + 1.1507 + enc_class mov_r32_imm0( eRegI dst ) %{ 1.1508 + emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1.1509 + emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1.1510 + %} 1.1511 + 1.1512 + enc_class cdq_enc %{ 1.1513 + // Full implementation of Java idiv and irem; checks for 1.1514 + // special case as described in JVM spec., p.243 & p.271. 1.1515 + // 1.1516 + // normal case special case 1.1517 + // 1.1518 + // input : rax,: dividend min_int 1.1519 + // reg: divisor -1 1.1520 + // 1.1521 + // output: rax,: quotient (= rax, idiv reg) min_int 1.1522 + // rdx: remainder (= rax, irem reg) 0 1.1523 + // 1.1524 + // Code sequnce: 1.1525 + // 1.1526 + // 81 F8 00 00 00 80 cmp rax,80000000h 1.1527 + // 0F 85 0B 00 00 00 jne normal_case 1.1528 + // 33 D2 xor rdx,edx 1.1529 + // 83 F9 FF cmp rcx,0FFh 1.1530 + // 0F 84 03 00 00 00 je done 1.1531 + // normal_case: 1.1532 + // 99 cdq 1.1533 + // F7 F9 idiv rax,ecx 1.1534 + // done: 1.1535 + // 1.1536 + emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1.1537 + emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1.1538 + emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1.1539 + emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1.1540 + emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1.1541 + emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1.1542 + emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1.1543 + emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1.1544 + emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1.1545 + emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1.1546 + emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1.1547 + // normal_case: 1.1548 + emit_opcode(cbuf,0x99); // cdq 1.1549 + // idiv (note: must be emitted by the user of this rule) 1.1550 + // normal: 1.1551 + %} 1.1552 + 1.1553 + // Dense encoding for older common ops 1.1554 + enc_class Opc_plus(immI opcode, eRegI reg) %{ 1.1555 + emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1.1556 + %} 1.1557 + 1.1558 + 1.1559 + // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1.1560 + enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1.1561 + // Check for 8-bit immediate, and set sign extend bit in opcode 1.1562 + if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1.1563 + emit_opcode(cbuf, $primary | 0x02); 1.1564 + } 1.1565 + else { // If 32-bit immediate 1.1566 + emit_opcode(cbuf, $primary); 1.1567 + } 1.1568 + %} 1.1569 + 1.1570 + enc_class OpcSErm (eRegI dst, immI imm) %{ // OpcSEr/m 1.1571 + // Emit primary opcode and set sign-extend bit 1.1572 + // Check for 8-bit immediate, and set sign extend bit in opcode 1.1573 + if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1.1574 + emit_opcode(cbuf, $primary | 0x02); } 1.1575 + else { // If 32-bit immediate 1.1576 + emit_opcode(cbuf, $primary); 1.1577 + } 1.1578 + // Emit r/m byte with secondary opcode, after primary opcode. 1.1579 + emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1.1580 + %} 1.1581 + 1.1582 + enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1.1583 + // Check for 8-bit immediate, and set sign extend bit in opcode 1.1584 + if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1.1585 + $$$emit8$imm$$constant; 1.1586 + } 1.1587 + else { // If 32-bit immediate 1.1588 + // Output immediate 1.1589 + $$$emit32$imm$$constant; 1.1590 + } 1.1591 + %} 1.1592 + 1.1593 + enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1.1594 + // Emit primary opcode and set sign-extend bit 1.1595 + // Check for 8-bit immediate, and set sign extend bit in opcode 1.1596 + int con = (int)$imm$$constant; // Throw away top bits 1.1597 + emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1.1598 + // Emit r/m byte with secondary opcode, after primary opcode. 1.1599 + emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1.1600 + if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1.1601 + else emit_d32(cbuf,con); 1.1602 + %} 1.1603 + 1.1604 + enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1.1605 + // Emit primary opcode and set sign-extend bit 1.1606 + // Check for 8-bit immediate, and set sign extend bit in opcode 1.1607 + int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1.1608 + emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1.1609 + // Emit r/m byte with tertiary opcode, after primary opcode. 1.1610 + emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1.1611 + if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1.1612 + else emit_d32(cbuf,con); 1.1613 + %} 1.1614 + 1.1615 + enc_class Lbl (label labl) %{ // JMP, CALL 1.1616 + Label *l = $labl$$label; 1.1617 + emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size()+4)) : 0); 1.1618 + %} 1.1619 + 1.1620 + enc_class LblShort (label labl) %{ // JMP, CALL 1.1621 + Label *l = $labl$$label; 1.1622 + int disp = l ? (l->loc_pos() - (cbuf.code_size()+1)) : 0; 1.1623 + assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp"); 1.1624 + emit_d8(cbuf, disp); 1.1625 + %} 1.1626 + 1.1627 + enc_class OpcSReg (eRegI dst) %{ // BSWAP 1.1628 + emit_cc(cbuf, $secondary, $dst$$reg ); 1.1629 + %} 1.1630 + 1.1631 + enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1.1632 + int destlo = $dst$$reg; 1.1633 + int desthi = HIGH_FROM_LOW(destlo); 1.1634 + // bswap lo 1.1635 + emit_opcode(cbuf, 0x0F); 1.1636 + emit_cc(cbuf, 0xC8, destlo); 1.1637 + // bswap hi 1.1638 + emit_opcode(cbuf, 0x0F); 1.1639 + emit_cc(cbuf, 0xC8, desthi); 1.1640 + // xchg lo and hi 1.1641 + emit_opcode(cbuf, 0x87); 1.1642 + emit_rm(cbuf, 0x3, destlo, desthi); 1.1643 + %} 1.1644 + 1.1645 + enc_class RegOpc (eRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1.1646 + emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1.1647 + %} 1.1648 + 1.1649 + enc_class Jcc (cmpOp cop, label labl) %{ // JCC 1.1650 + Label *l = $labl$$label; 1.1651 + $$$emit8$primary; 1.1652 + emit_cc(cbuf, $secondary, $cop$$cmpcode); 1.1653 + emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size()+4)) : 0); 1.1654 + %} 1.1655 + 1.1656 + enc_class JccShort (cmpOp cop, label labl) %{ // JCC 1.1657 + Label *l = $labl$$label; 1.1658 + emit_cc(cbuf, $primary, $cop$$cmpcode); 1.1659 + int disp = l ? (l->loc_pos() - (cbuf.code_size()+1)) : 0; 1.1660 + assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp"); 1.1661 + emit_d8(cbuf, disp); 1.1662 + %} 1.1663 + 1.1664 + enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1.1665 + $$$emit8$primary; 1.1666 + emit_cc(cbuf, $secondary, $cop$$cmpcode); 1.1667 + %} 1.1668 + 1.1669 + enc_class enc_cmov_d(cmpOp cop, regD src ) %{ // CMOV 1.1670 + int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1.1671 + emit_d8(cbuf, op >> 8 ); 1.1672 + emit_d8(cbuf, op & 255); 1.1673 + %} 1.1674 + 1.1675 + // emulate a CMOV with a conditional branch around a MOV 1.1676 + enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1.1677 + // Invert sense of branch from sense of CMOV 1.1678 + emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1.1679 + emit_d8( cbuf, $brOffs$$constant ); 1.1680 + %} 1.1681 + 1.1682 + enc_class enc_PartialSubtypeCheck( ) %{ 1.1683 + Register Redi = as_Register(EDI_enc); // result register 1.1684 + Register Reax = as_Register(EAX_enc); // super class 1.1685 + Register Recx = as_Register(ECX_enc); // killed 1.1686 + Register Resi = as_Register(ESI_enc); // sub class 1.1687 + Label hit, miss; 1.1688 + 1.1689 + MacroAssembler _masm(&cbuf); 1.1690 + // Compare super with sub directly, since super is not in its own SSA. 1.1691 + // The compiler used to emit this test, but we fold it in here, 1.1692 + // to allow platform-specific tweaking on sparc. 1.1693 + __ cmpl(Reax, Resi); 1.1694 + __ jcc(Assembler::equal, hit); 1.1695 +#ifndef PRODUCT 1.1696 + __ increment(ExternalAddress((address)&SharedRuntime::_partial_subtype_ctr)); 1.1697 +#endif //PRODUCT 1.1698 + __ movl(Redi,Address(Resi,sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())); 1.1699 + __ movl(Recx,Address(Redi,arrayOopDesc::length_offset_in_bytes())); 1.1700 + __ addl(Redi,arrayOopDesc::base_offset_in_bytes(T_OBJECT)); 1.1701 + __ repne_scan(); 1.1702 + __ jcc(Assembler::notEqual, miss); 1.1703 + __ movl(Address(Resi,sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()),Reax); 1.1704 + __ bind(hit); 1.1705 + if( $primary ) 1.1706 + __ xorl(Redi,Redi); 1.1707 + __ bind(miss); 1.1708 + %} 1.1709 + 1.1710 + enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1.1711 + MacroAssembler masm(&cbuf); 1.1712 + int start = masm.offset(); 1.1713 + if (UseSSE >= 2) { 1.1714 + if (VerifyFPU) { 1.1715 + masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1.1716 + } 1.1717 + } else { 1.1718 + // External c_calling_convention expects the FPU stack to be 'clean'. 1.1719 + // Compiled code leaves it dirty. Do cleanup now. 1.1720 + masm.empty_FPU_stack(); 1.1721 + } 1.1722 + if (sizeof_FFree_Float_Stack_All == -1) { 1.1723 + sizeof_FFree_Float_Stack_All = masm.offset() - start; 1.1724 + } else { 1.1725 + assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1.1726 + } 1.1727 + %} 1.1728 + 1.1729 + enc_class Verify_FPU_For_Leaf %{ 1.1730 + if( VerifyFPU ) { 1.1731 + MacroAssembler masm(&cbuf); 1.1732 + masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1.1733 + } 1.1734 + %} 1.1735 + 1.1736 + enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1.1737 + // This is the instruction starting address for relocation info. 1.1738 + cbuf.set_inst_mark(); 1.1739 + $$$emit8$primary; 1.1740 + // CALL directly to the runtime 1.1741 + emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4), 1.1742 + runtime_call_Relocation::spec(), RELOC_IMM32 ); 1.1743 + 1.1744 + if (UseSSE >= 2) { 1.1745 + MacroAssembler _masm(&cbuf); 1.1746 + BasicType rt = tf()->return_type(); 1.1747 + 1.1748 + if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1.1749 + // A C runtime call where the return value is unused. In SSE2+ 1.1750 + // mode the result needs to be removed from the FPU stack. It's 1.1751 + // likely that this function call could be removed by the 1.1752 + // optimizer if the C function is a pure function. 1.1753 + __ ffree(0); 1.1754 + } else if (rt == T_FLOAT) { 1.1755 + __ leal(rsp, Address(rsp, -4)); 1.1756 + __ fstp_s(Address(rsp, 0)); 1.1757 + __ movflt(xmm0, Address(rsp, 0)); 1.1758 + __ leal(rsp, Address(rsp, 4)); 1.1759 + } else if (rt == T_DOUBLE) { 1.1760 + __ leal(rsp, Address(rsp, -8)); 1.1761 + __ fstp_d(Address(rsp, 0)); 1.1762 + __ movdbl(xmm0, Address(rsp, 0)); 1.1763 + __ leal(rsp, Address(rsp, 8)); 1.1764 + } 1.1765 + } 1.1766 + %} 1.1767 + 1.1768 + 1.1769 + enc_class pre_call_FPU %{ 1.1770 + // If method sets FPU control word restore it here 1.1771 + if( Compile::current()->in_24_bit_fp_mode() ) { 1.1772 + MacroAssembler masm(&cbuf); 1.1773 + masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 1.1774 + } 1.1775 + %} 1.1776 + 1.1777 + enc_class post_call_FPU %{ 1.1778 + // If method sets FPU control word do it here also 1.1779 + if( Compile::current()->in_24_bit_fp_mode() ) { 1.1780 + MacroAssembler masm(&cbuf); 1.1781 + masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1.1782 + } 1.1783 + %} 1.1784 + 1.1785 + enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1.1786 + // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1.1787 + // who we intended to call. 1.1788 + cbuf.set_inst_mark(); 1.1789 + $$$emit8$primary; 1.1790 + if ( !_method ) { 1.1791 + emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4), 1.1792 + runtime_call_Relocation::spec(), RELOC_IMM32 ); 1.1793 + } else if(_optimized_virtual) { 1.1794 + emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4), 1.1795 + opt_virtual_call_Relocation::spec(), RELOC_IMM32 ); 1.1796 + } else { 1.1797 + emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4), 1.1798 + static_call_Relocation::spec(), RELOC_IMM32 ); 1.1799 + } 1.1800 + if( _method ) { // Emit stub for static call 1.1801 + emit_java_to_interp(cbuf); 1.1802 + } 1.1803 + %} 1.1804 + 1.1805 + enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1.1806 + // !!!!! 1.1807 + // Generate "Mov EAX,0x00", placeholder instruction to load oop-info 1.1808 + // emit_call_dynamic_prologue( cbuf ); 1.1809 + cbuf.set_inst_mark(); 1.1810 + emit_opcode(cbuf, 0xB8 + EAX_enc); // mov EAX,-1 1.1811 + emit_d32_reloc(cbuf, (int)Universe::non_oop_word(), oop_Relocation::spec_for_immediate(), RELOC_IMM32); 1.1812 + address virtual_call_oop_addr = cbuf.inst_mark(); 1.1813 + // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1.1814 + // who we intended to call. 1.1815 + cbuf.set_inst_mark(); 1.1816 + $$$emit8$primary; 1.1817 + emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4), 1.1818 + virtual_call_Relocation::spec(virtual_call_oop_addr), RELOC_IMM32 ); 1.1819 + %} 1.1820 + 1.1821 + enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1.1822 + int disp = in_bytes(methodOopDesc::from_compiled_offset()); 1.1823 + assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1.1824 + 1.1825 + // CALL *[EAX+in_bytes(methodOopDesc::from_compiled_code_entry_point_offset())] 1.1826 + cbuf.set_inst_mark(); 1.1827 + $$$emit8$primary; 1.1828 + emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1.1829 + emit_d8(cbuf, disp); // Displacement 1.1830 + 1.1831 + %} 1.1832 + 1.1833 + enc_class Xor_Reg (eRegI dst) %{ 1.1834 + emit_opcode(cbuf, 0x33); 1.1835 + emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 1.1836 + %} 1.1837 + 1.1838 +// Following encoding is no longer used, but may be restored if calling 1.1839 +// convention changes significantly. 1.1840 +// Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1.1841 +// 1.1842 +// enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1.1843 +// // int ic_reg = Matcher::inline_cache_reg(); 1.1844 +// // int ic_encode = Matcher::_regEncode[ic_reg]; 1.1845 +// // int imo_reg = Matcher::interpreter_method_oop_reg(); 1.1846 +// // int imo_encode = Matcher::_regEncode[imo_reg]; 1.1847 +// 1.1848 +// // // Interpreter expects method_oop in EBX, currently a callee-saved register, 1.1849 +// // // so we load it immediately before the call 1.1850 +// // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop 1.1851 +// // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1.1852 +// 1.1853 +// // xor rbp,ebp 1.1854 +// emit_opcode(cbuf, 0x33); 1.1855 +// emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1.1856 +// 1.1857 +// // CALL to interpreter. 1.1858 +// cbuf.set_inst_mark(); 1.1859 +// $$$emit8$primary; 1.1860 +// emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.code_end()) - 4), 1.1861 +// runtime_call_Relocation::spec(), RELOC_IMM32 ); 1.1862 +// %} 1.1863 + 1.1864 + enc_class RegOpcImm (eRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1.1865 + $$$emit8$primary; 1.1866 + emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1.1867 + $$$emit8$shift$$constant; 1.1868 + %} 1.1869 + 1.1870 + enc_class LdImmI (eRegI dst, immI src) %{ // Load Immediate 1.1871 + // Load immediate does not have a zero or sign extended version 1.1872 + // for 8-bit immediates 1.1873 + emit_opcode(cbuf, 0xB8 + $dst$$reg); 1.1874 + $$$emit32$src$$constant; 1.1875 + %} 1.1876 + 1.1877 + enc_class LdImmP (eRegI dst, immI src) %{ // Load Immediate 1.1878 + // Load immediate does not have a zero or sign extended version 1.1879 + // for 8-bit immediates 1.1880 + emit_opcode(cbuf, $primary + $dst$$reg); 1.1881 + $$$emit32$src$$constant; 1.1882 + %} 1.1883 + 1.1884 + enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1.1885 + // Load immediate does not have a zero or sign extended version 1.1886 + // for 8-bit immediates 1.1887 + int dst_enc = $dst$$reg; 1.1888 + int src_con = $src$$constant & 0x0FFFFFFFFL; 1.1889 + if (src_con == 0) { 1.1890 + // xor dst, dst 1.1891 + emit_opcode(cbuf, 0x33); 1.1892 + emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1.1893 + } else { 1.1894 + emit_opcode(cbuf, $primary + dst_enc); 1.1895 + emit_d32(cbuf, src_con); 1.1896 + } 1.1897 + %} 1.1898 + 1.1899 + enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 1.1900 + // Load immediate does not have a zero or sign extended version 1.1901 + // for 8-bit immediates 1.1902 + int dst_enc = $dst$$reg + 2; 1.1903 + int src_con = ((julong)($src$$constant)) >> 32; 1.1904 + if (src_con == 0) { 1.1905 + // xor dst, dst 1.1906 + emit_opcode(cbuf, 0x33); 1.1907 + emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1.1908 + } else { 1.1909 + emit_opcode(cbuf, $primary + dst_enc); 1.1910 + emit_d32(cbuf, src_con); 1.1911 + } 1.1912 + %} 1.1913 + 1.1914 + 1.1915 + enc_class LdImmD (immD src) %{ // Load Immediate 1.1916 + if( is_positive_zero_double($src$$constant)) { 1.1917 + // FLDZ 1.1918 + emit_opcode(cbuf,0xD9); 1.1919 + emit_opcode(cbuf,0xEE); 1.1920 + } else if( is_positive_one_double($src$$constant)) { 1.1921 + // FLD1 1.1922 + emit_opcode(cbuf,0xD9); 1.1923 + emit_opcode(cbuf,0xE8); 1.1924 + } else { 1.1925 + emit_opcode(cbuf,0xDD); 1.1926 + emit_rm(cbuf, 0x0, 0x0, 0x5); 1.1927 + emit_double_constant(cbuf, $src$$constant); 1.1928 + } 1.1929 + %} 1.1930 + 1.1931 + 1.1932 + enc_class LdImmF (immF src) %{ // Load Immediate 1.1933 + if( is_positive_zero_float($src$$constant)) { 1.1934 + emit_opcode(cbuf,0xD9); 1.1935 + emit_opcode(cbuf,0xEE); 1.1936 + } else if( is_positive_one_float($src$$constant)) { 1.1937 + emit_opcode(cbuf,0xD9); 1.1938 + emit_opcode(cbuf,0xE8); 1.1939 + } else { 1.1940 + $$$emit8$primary; 1.1941 + // Load immediate does not have a zero or sign extended version 1.1942 + // for 8-bit immediates 1.1943 + // First load to TOS, then move to dst 1.1944 + emit_rm(cbuf, 0x0, 0x0, 0x5); 1.1945 + emit_float_constant(cbuf, $src$$constant); 1.1946 + } 1.1947 + %} 1.1948 + 1.1949 + enc_class LdImmX (regX dst, immXF con) %{ // Load Immediate 1.1950 + emit_rm(cbuf, 0x0, $dst$$reg, 0x5); 1.1951 + emit_float_constant(cbuf, $con$$constant); 1.1952 + %} 1.1953 + 1.1954 + enc_class LdImmXD (regXD dst, immXD con) %{ // Load Immediate 1.1955 + emit_rm(cbuf, 0x0, $dst$$reg, 0x5); 1.1956 + emit_double_constant(cbuf, $con$$constant); 1.1957 + %} 1.1958 + 1.1959 + enc_class load_conXD (regXD dst, immXD con) %{ // Load double constant 1.1960 + // UseXmmLoadAndClearUpper ? movsd(dst, con) : movlpd(dst, con) 1.1961 + emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66); 1.1962 + emit_opcode(cbuf, 0x0F); 1.1963 + emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12); 1.1964 + emit_rm(cbuf, 0x0, $dst$$reg, 0x5); 1.1965 + emit_double_constant(cbuf, $con$$constant); 1.1966 + %} 1.1967 + 1.1968 + enc_class Opc_MemImm_F(immF src) %{ 1.1969 + cbuf.set_inst_mark(); 1.1970 + $$$emit8$primary; 1.1971 + emit_rm(cbuf, 0x0, $secondary, 0x5); 1.1972 + emit_float_constant(cbuf, $src$$constant); 1.1973 + %} 1.1974 + 1.1975 + 1.1976 + enc_class MovI2X_reg(regX dst, eRegI src) %{ 1.1977 + emit_opcode(cbuf, 0x66 ); // MOVD dst,src 1.1978 + emit_opcode(cbuf, 0x0F ); 1.1979 + emit_opcode(cbuf, 0x6E ); 1.1980 + emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1.1981 + %} 1.1982 + 1.1983 + enc_class MovX2I_reg(eRegI dst, regX src) %{ 1.1984 + emit_opcode(cbuf, 0x66 ); // MOVD dst,src 1.1985 + emit_opcode(cbuf, 0x0F ); 1.1986 + emit_opcode(cbuf, 0x7E ); 1.1987 + emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg); 1.1988 + %} 1.1989 + 1.1990 + enc_class MovL2XD_reg(regXD dst, eRegL src, regXD tmp) %{ 1.1991 + { // MOVD $dst,$src.lo 1.1992 + emit_opcode(cbuf,0x66); 1.1993 + emit_opcode(cbuf,0x0F); 1.1994 + emit_opcode(cbuf,0x6E); 1.1995 + emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1.1996 + } 1.1997 + { // MOVD $tmp,$src.hi 1.1998 + emit_opcode(cbuf,0x66); 1.1999 + emit_opcode(cbuf,0x0F); 1.2000 + emit_opcode(cbuf,0x6E); 1.2001 + emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 1.2002 + } 1.2003 + { // PUNPCKLDQ $dst,$tmp 1.2004 + emit_opcode(cbuf,0x66); 1.2005 + emit_opcode(cbuf,0x0F); 1.2006 + emit_opcode(cbuf,0x62); 1.2007 + emit_rm(cbuf, 0x3, $dst$$reg, $tmp$$reg); 1.2008 + } 1.2009 + %} 1.2010 + 1.2011 + enc_class MovXD2L_reg(eRegL dst, regXD src, regXD tmp) %{ 1.2012 + { // MOVD $dst.lo,$src 1.2013 + emit_opcode(cbuf,0x66); 1.2014 + emit_opcode(cbuf,0x0F); 1.2015 + emit_opcode(cbuf,0x7E); 1.2016 + emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg); 1.2017 + } 1.2018 + { // PSHUFLW $tmp,$src,0x4E (01001110b) 1.2019 + emit_opcode(cbuf,0xF2); 1.2020 + emit_opcode(cbuf,0x0F); 1.2021 + emit_opcode(cbuf,0x70); 1.2022 + emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 1.2023 + emit_d8(cbuf, 0x4E); 1.2024 + } 1.2025 + { // MOVD $dst.hi,$tmp 1.2026 + emit_opcode(cbuf,0x66); 1.2027 + emit_opcode(cbuf,0x0F); 1.2028 + emit_opcode(cbuf,0x7E); 1.2029 + emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg)); 1.2030 + } 1.2031 + %} 1.2032 + 1.2033 + 1.2034 + // Encode a reg-reg copy. If it is useless, then empty encoding. 1.2035 + enc_class enc_Copy( eRegI dst, eRegI src ) %{ 1.2036 + encode_Copy( cbuf, $dst$$reg, $src$$reg ); 1.2037 + %} 1.2038 + 1.2039 + enc_class enc_CopyL_Lo( eRegI dst, eRegL src ) %{ 1.2040 + encode_Copy( cbuf, $dst$$reg, $src$$reg ); 1.2041 + %} 1.2042 + 1.2043 + // Encode xmm reg-reg copy. If it is useless, then empty encoding. 1.2044 + enc_class enc_CopyXD( RegXD dst, RegXD src ) %{ 1.2045 + encode_CopyXD( cbuf, $dst$$reg, $src$$reg ); 1.2046 + %} 1.2047 + 1.2048 + enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many) 1.2049 + emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1.2050 + %} 1.2051 + 1.2052 + enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 1.2053 + $$$emit8$primary; 1.2054 + emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1.2055 + %} 1.2056 + 1.2057 + enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 1.2058 + $$$emit8$secondary; 1.2059 + emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 1.2060 + %} 1.2061 + 1.2062 + enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 1.2063 + emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1.2064 + %} 1.2065 + 1.2066 + enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 1.2067 + emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 1.2068 + %} 1.2069 + 1.2070 + enc_class RegReg_HiLo( eRegL src, eRegI dst ) %{ 1.2071 + emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 1.2072 + %} 1.2073 + 1.2074 + enc_class Con32 (immI src) %{ // Con32(storeImmI) 1.2075 + // Output immediate 1.2076 + $$$emit32$src$$constant; 1.2077 + %} 1.2078 + 1.2079 + enc_class Con32F_as_bits(immF src) %{ // storeF_imm 1.2080 + // Output Float immediate bits 1.2081 + jfloat jf = $src$$constant; 1.2082 + int jf_as_bits = jint_cast( jf ); 1.2083 + emit_d32(cbuf, jf_as_bits); 1.2084 + %} 1.2085 + 1.2086 + enc_class Con32XF_as_bits(immXF src) %{ // storeX_imm 1.2087 + // Output Float immediate bits 1.2088 + jfloat jf = $src$$constant; 1.2089 + int jf_as_bits = jint_cast( jf ); 1.2090 + emit_d32(cbuf, jf_as_bits); 1.2091 + %} 1.2092 + 1.2093 + enc_class Con16 (immI src) %{ // Con16(storeImmI) 1.2094 + // Output immediate 1.2095 + $$$emit16$src$$constant; 1.2096 + %} 1.2097 + 1.2098 + enc_class Con_d32(immI src) %{ 1.2099 + emit_d32(cbuf,$src$$constant); 1.2100 + %} 1.2101 + 1.2102 + enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 1.2103 + // Output immediate memory reference 1.2104 + emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 1.2105 + emit_d32(cbuf, 0x00); 1.2106 + %} 1.2107 + 1.2108 + enc_class lock_prefix( ) %{ 1.2109 + if( os::is_MP() ) 1.2110 + emit_opcode(cbuf,0xF0); // [Lock] 1.2111 + %} 1.2112 + 1.2113 + // Cmp-xchg long value. 1.2114 + // Note: we need to swap rbx, and rcx before and after the 1.2115 + // cmpxchg8 instruction because the instruction uses 1.2116 + // rcx as the high order word of the new value to store but 1.2117 + // our register encoding uses rbx,. 1.2118 + enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 1.2119 + 1.2120 + // XCHG rbx,ecx 1.2121 + emit_opcode(cbuf,0x87); 1.2122 + emit_opcode(cbuf,0xD9); 1.2123 + // [Lock] 1.2124 + if( os::is_MP() ) 1.2125 + emit_opcode(cbuf,0xF0); 1.2126 + // CMPXCHG8 [Eptr] 1.2127 + emit_opcode(cbuf,0x0F); 1.2128 + emit_opcode(cbuf,0xC7); 1.2129 + emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 1.2130 + // XCHG rbx,ecx 1.2131 + emit_opcode(cbuf,0x87); 1.2132 + emit_opcode(cbuf,0xD9); 1.2133 + %} 1.2134 + 1.2135 + enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 1.2136 + // [Lock] 1.2137 + if( os::is_MP() ) 1.2138 + emit_opcode(cbuf,0xF0); 1.2139 + 1.2140 + // CMPXCHG [Eptr] 1.2141 + emit_opcode(cbuf,0x0F); 1.2142 + emit_opcode(cbuf,0xB1); 1.2143 + emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 1.2144 + %} 1.2145 + 1.2146 + enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 1.2147 + int res_encoding = $res$$reg; 1.2148 + 1.2149 + // MOV res,0 1.2150 + emit_opcode( cbuf, 0xB8 + res_encoding); 1.2151 + emit_d32( cbuf, 0 ); 1.2152 + // JNE,s fail 1.2153 + emit_opcode(cbuf,0x75); 1.2154 + emit_d8(cbuf, 5 ); 1.2155 + // MOV res,1 1.2156 + emit_opcode( cbuf, 0xB8 + res_encoding); 1.2157 + emit_d32( cbuf, 1 ); 1.2158 + // fail: 1.2159 + %} 1.2160 + 1.2161 + enc_class set_instruction_start( ) %{ 1.2162 + cbuf.set_inst_mark(); // Mark start of opcode for reloc info in mem operand 1.2163 + %} 1.2164 + 1.2165 + enc_class RegMem (eRegI ereg, memory mem) %{ // emit_reg_mem 1.2166 + int reg_encoding = $ereg$$reg; 1.2167 + int base = $mem$$base; 1.2168 + int index = $mem$$index; 1.2169 + int scale = $mem$$scale; 1.2170 + int displace = $mem$$disp; 1.2171 + bool disp_is_oop = $mem->disp_is_oop(); 1.2172 + encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop); 1.2173 + %} 1.2174 + 1.2175 + enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 1.2176 + int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 1.2177 + int base = $mem$$base; 1.2178 + int index = $mem$$index; 1.2179 + int scale = $mem$$scale; 1.2180 + int displace = $mem$$disp + 4; // Offset is 4 further in memory 1.2181 + assert( !$mem->disp_is_oop(), "Cannot add 4 to oop" ); 1.2182 + encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, false/*disp_is_oop*/); 1.2183 + %} 1.2184 + 1.2185 + enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 1.2186 + int r1, r2; 1.2187 + if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 1.2188 + else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 1.2189 + emit_opcode(cbuf,0x0F); 1.2190 + emit_opcode(cbuf,$tertiary); 1.2191 + emit_rm(cbuf, 0x3, r1, r2); 1.2192 + emit_d8(cbuf,$cnt$$constant); 1.2193 + emit_d8(cbuf,$primary); 1.2194 + emit_rm(cbuf, 0x3, $secondary, r1); 1.2195 + emit_d8(cbuf,$cnt$$constant); 1.2196 + %} 1.2197 + 1.2198 + enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 1.2199 + emit_opcode( cbuf, 0x8B ); // Move 1.2200 + emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 1.2201 + emit_d8(cbuf,$primary); 1.2202 + emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1.2203 + emit_d8(cbuf,$cnt$$constant-32); 1.2204 + emit_d8(cbuf,$primary); 1.2205 + emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 1.2206 + emit_d8(cbuf,31); 1.2207 + %} 1.2208 + 1.2209 + enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 1.2210 + int r1, r2; 1.2211 + if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 1.2212 + else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 1.2213 + 1.2214 + emit_opcode( cbuf, 0x8B ); // Move r1,r2 1.2215 + emit_rm(cbuf, 0x3, r1, r2); 1.2216 + if( $cnt$$constant > 32 ) { // Shift, if not by zero 1.2217 + emit_opcode(cbuf,$primary); 1.2218 + emit_rm(cbuf, 0x3, $secondary, r1); 1.2219 + emit_d8(cbuf,$cnt$$constant-32); 1.2220 + } 1.2221 + emit_opcode(cbuf,0x33); // XOR r2,r2 1.2222 + emit_rm(cbuf, 0x3, r2, r2); 1.2223 + %} 1.2224 + 1.2225 + // Clone of RegMem but accepts an extra parameter to access each 1.2226 + // half of a double in memory; it never needs relocation info. 1.2227 + enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, eRegI rm_reg) %{ 1.2228 + emit_opcode(cbuf,$opcode$$constant); 1.2229 + int reg_encoding = $rm_reg$$reg; 1.2230 + int base = $mem$$base; 1.2231 + int index = $mem$$index; 1.2232 + int scale = $mem$$scale; 1.2233 + int displace = $mem$$disp + $disp_for_half$$constant; 1.2234 + bool disp_is_oop = false; 1.2235 + encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop); 1.2236 + %} 1.2237 + 1.2238 + // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 1.2239 + // 1.2240 + // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 1.2241 + // and it never needs relocation information. 1.2242 + // Frequently used to move data between FPU's Stack Top and memory. 1.2243 + enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 1.2244 + int rm_byte_opcode = $rm_opcode$$constant; 1.2245 + int base = $mem$$base; 1.2246 + int index = $mem$$index; 1.2247 + int scale = $mem$$scale; 1.2248 + int displace = $mem$$disp; 1.2249 + assert( !$mem->disp_is_oop(), "No oops here because no relo info allowed" ); 1.2250 + encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, false); 1.2251 + %} 1.2252 + 1.2253 + enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 1.2254 + int rm_byte_opcode = $rm_opcode$$constant; 1.2255 + int base = $mem$$base; 1.2256 + int index = $mem$$index; 1.2257 + int scale = $mem$$scale; 1.2258 + int displace = $mem$$disp; 1.2259 + bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals 1.2260 + encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop); 1.2261 + %} 1.2262 + 1.2263 + enc_class RegLea (eRegI dst, eRegI src0, immI src1 ) %{ // emit_reg_lea 1.2264 + int reg_encoding = $dst$$reg; 1.2265 + int base = $src0$$reg; // 0xFFFFFFFF indicates no base 1.2266 + int index = 0x04; // 0x04 indicates no index 1.2267 + int scale = 0x00; // 0x00 indicates no scale 1.2268 + int displace = $src1$$constant; // 0x00 indicates no displacement 1.2269 + bool disp_is_oop = false; 1.2270 + encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop); 1.2271 + %} 1.2272 + 1.2273 + enc_class min_enc (eRegI dst, eRegI src) %{ // MIN 1.2274 + // Compare dst,src 1.2275 + emit_opcode(cbuf,0x3B); 1.2276 + emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1.2277 + // jmp dst < src around move 1.2278 + emit_opcode(cbuf,0x7C); 1.2279 + emit_d8(cbuf,2); 1.2280 + // move dst,src 1.2281 + emit_opcode(cbuf,0x8B); 1.2282 + emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1.2283 + %} 1.2284 + 1.2285 + enc_class max_enc (eRegI dst, eRegI src) %{ // MAX 1.2286 + // Compare dst,src 1.2287 + emit_opcode(cbuf,0x3B); 1.2288 + emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1.2289 + // jmp dst > src around move 1.2290 + emit_opcode(cbuf,0x7F); 1.2291 + emit_d8(cbuf,2); 1.2292 + // move dst,src 1.2293 + emit_opcode(cbuf,0x8B); 1.2294 + emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1.2295 + %} 1.2296 + 1.2297 + enc_class enc_FP_store(memory mem, regD src) %{ 1.2298 + // If src is FPR1, we can just FST to store it. 1.2299 + // Else we need to FLD it to FPR1, then FSTP to store/pop it. 1.2300 + int reg_encoding = 0x2; // Just store 1.2301 + int base = $mem$$base; 1.2302 + int index = $mem$$index; 1.2303 + int scale = $mem$$scale; 1.2304 + int displace = $mem$$disp; 1.2305 + bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals 1.2306 + if( $src$$reg != FPR1L_enc ) { 1.2307 + reg_encoding = 0x3; // Store & pop 1.2308 + emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 1.2309 + emit_d8( cbuf, 0xC0-1+$src$$reg ); 1.2310 + } 1.2311 + cbuf.set_inst_mark(); // Mark start of opcode for reloc info in mem operand 1.2312 + emit_opcode(cbuf,$primary); 1.2313 + encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop); 1.2314 + %} 1.2315 + 1.2316 + enc_class neg_reg(eRegI dst) %{ 1.2317 + // NEG $dst 1.2318 + emit_opcode(cbuf,0xF7); 1.2319 + emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 1.2320 + %} 1.2321 + 1.2322 + enc_class setLT_reg(eCXRegI dst) %{ 1.2323 + // SETLT $dst 1.2324 + emit_opcode(cbuf,0x0F); 1.2325 + emit_opcode(cbuf,0x9C); 1.2326 + emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 1.2327 + %} 1.2328 + 1.2329 + enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 1.2330 + int tmpReg = $tmp$$reg; 1.2331 + 1.2332 + // SUB $p,$q 1.2333 + emit_opcode(cbuf,0x2B); 1.2334 + emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 1.2335 + // SBB $tmp,$tmp 1.2336 + emit_opcode(cbuf,0x1B); 1.2337 + emit_rm(cbuf, 0x3, tmpReg, tmpReg); 1.2338 + // AND $tmp,$y 1.2339 + emit_opcode(cbuf,0x23); 1.2340 + emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 1.2341 + // ADD $p,$tmp 1.2342 + emit_opcode(cbuf,0x03); 1.2343 + emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 1.2344 + %} 1.2345 + 1.2346 + enc_class enc_cmpLTP_mem(eRegI p, eRegI q, memory mem, eCXRegI tmp) %{ // cadd_cmpLT 1.2347 + int tmpReg = $tmp$$reg; 1.2348 + 1.2349 + // SUB $p,$q 1.2350 + emit_opcode(cbuf,0x2B); 1.2351 + emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 1.2352 + // SBB $tmp,$tmp 1.2353 + emit_opcode(cbuf,0x1B); 1.2354 + emit_rm(cbuf, 0x3, tmpReg, tmpReg); 1.2355 + // AND $tmp,$y 1.2356 + cbuf.set_inst_mark(); // Mark start of opcode for reloc info in mem operand 1.2357 + emit_opcode(cbuf,0x23); 1.2358 + int reg_encoding = tmpReg; 1.2359 + int base = $mem$$base; 1.2360 + int index = $mem$$index; 1.2361 + int scale = $mem$$scale; 1.2362 + int displace = $mem$$disp; 1.2363 + bool disp_is_oop = $mem->disp_is_oop(); 1.2364 + encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop); 1.2365 + // ADD $p,$tmp 1.2366 + emit_opcode(cbuf,0x03); 1.2367 + emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 1.2368 + %} 1.2369 + 1.2370 + enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 1.2371 + // TEST shift,32 1.2372 + emit_opcode(cbuf,0xF7); 1.2373 + emit_rm(cbuf, 0x3, 0, ECX_enc); 1.2374 + emit_d32(cbuf,0x20); 1.2375 + // JEQ,s small 1.2376 + emit_opcode(cbuf, 0x74); 1.2377 + emit_d8(cbuf, 0x04); 1.2378 + // MOV $dst.hi,$dst.lo 1.2379 + emit_opcode( cbuf, 0x8B ); 1.2380 + emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 1.2381 + // CLR $dst.lo 1.2382 + emit_opcode(cbuf, 0x33); 1.2383 + emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 1.2384 +// small: 1.2385 + // SHLD $dst.hi,$dst.lo,$shift 1.2386 + emit_opcode(cbuf,0x0F); 1.2387 + emit_opcode(cbuf,0xA5); 1.2388 + emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 1.2389 + // SHL $dst.lo,$shift" 1.2390 + emit_opcode(cbuf,0xD3); 1.2391 + emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 1.2392 + %} 1.2393 + 1.2394 + enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 1.2395 + // TEST shift,32 1.2396 + emit_opcode(cbuf,0xF7); 1.2397 + emit_rm(cbuf, 0x3, 0, ECX_enc); 1.2398 + emit_d32(cbuf,0x20); 1.2399 + // JEQ,s small 1.2400 + emit_opcode(cbuf, 0x74); 1.2401 + emit_d8(cbuf, 0x04); 1.2402 + // MOV $dst.lo,$dst.hi 1.2403 + emit_opcode( cbuf, 0x8B ); 1.2404 + emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 1.2405 + // CLR $dst.hi 1.2406 + emit_opcode(cbuf, 0x33); 1.2407 + emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 1.2408 +// small: 1.2409 + // SHRD $dst.lo,$dst.hi,$shift 1.2410 + emit_opcode(cbuf,0x0F); 1.2411 + emit_opcode(cbuf,0xAD); 1.2412 + emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 1.2413 + // SHR $dst.hi,$shift" 1.2414 + emit_opcode(cbuf,0xD3); 1.2415 + emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 1.2416 + %} 1.2417 + 1.2418 + enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 1.2419 + // TEST shift,32 1.2420 + emit_opcode(cbuf,0xF7); 1.2421 + emit_rm(cbuf, 0x3, 0, ECX_enc); 1.2422 + emit_d32(cbuf,0x20); 1.2423 + // JEQ,s small 1.2424 + emit_opcode(cbuf, 0x74); 1.2425 + emit_d8(cbuf, 0x05); 1.2426 + // MOV $dst.lo,$dst.hi 1.2427 + emit_opcode( cbuf, 0x8B ); 1.2428 + emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 1.2429 + // SAR $dst.hi,31 1.2430 + emit_opcode(cbuf, 0xC1); 1.2431 + emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 1.2432 + emit_d8(cbuf, 0x1F ); 1.2433 +// small: 1.2434 + // SHRD $dst.lo,$dst.hi,$shift 1.2435 + emit_opcode(cbuf,0x0F); 1.2436 + emit_opcode(cbuf,0xAD); 1.2437 + emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 1.2438 + // SAR $dst.hi,$shift" 1.2439 + emit_opcode(cbuf,0xD3); 1.2440 + emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 1.2441 + %} 1.2442 + 1.2443 + 1.2444 + // ----------------- Encodings for floating point unit ----------------- 1.2445 + // May leave result in FPU-TOS or FPU reg depending on opcodes 1.2446 + enc_class OpcReg_F (regF src) %{ // FMUL, FDIV 1.2447 + $$$emit8$primary; 1.2448 + emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 1.2449 + %} 1.2450 + 1.2451 + // Pop argument in FPR0 with FSTP ST(0) 1.2452 + enc_class PopFPU() %{ 1.2453 + emit_opcode( cbuf, 0xDD ); 1.2454 + emit_d8( cbuf, 0xD8 ); 1.2455 + %} 1.2456 + 1.2457 + // !!!!! equivalent to Pop_Reg_F 1.2458 + enc_class Pop_Reg_D( regD dst ) %{ 1.2459 + emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 1.2460 + emit_d8( cbuf, 0xD8+$dst$$reg ); 1.2461 + %} 1.2462 + 1.2463 + enc_class Push_Reg_D( regD dst ) %{ 1.2464 + emit_opcode( cbuf, 0xD9 ); 1.2465 + emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 1.2466 + %} 1.2467 + 1.2468 + enc_class strictfp_bias1( regD dst ) %{ 1.2469 + emit_opcode( cbuf, 0xDB ); // FLD m80real 1.2470 + emit_opcode( cbuf, 0x2D ); 1.2471 + emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 1.2472 + emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 1.2473 + emit_opcode( cbuf, 0xC8+$dst$$reg ); 1.2474 + %} 1.2475 + 1.2476 + enc_class strictfp_bias2( regD dst ) %{ 1.2477 + emit_opcode( cbuf, 0xDB ); // FLD m80real 1.2478 + emit_opcode( cbuf, 0x2D ); 1.2479 + emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 1.2480 + emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 1.2481 + emit_opcode( cbuf, 0xC8+$dst$$reg ); 1.2482 + %} 1.2483 + 1.2484 + // Special case for moving an integer register to a stack slot. 1.2485 + enc_class OpcPRegSS( stackSlotI dst, eRegI src ) %{ // RegSS 1.2486 + store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 1.2487 + %} 1.2488 + 1.2489 + // Special case for moving a register to a stack slot. 1.2490 + enc_class RegSS( stackSlotI dst, eRegI src ) %{ // RegSS 1.2491 + // Opcode already emitted 1.2492 + emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 1.2493 + emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 1.2494 + emit_d32(cbuf, $dst$$disp); // Displacement 1.2495 + %} 1.2496 + 1.2497 + // Push the integer in stackSlot 'src' onto FP-stack 1.2498 + enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 1.2499 + store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 1.2500 + %} 1.2501 + 1.2502 + // Push the float in stackSlot 'src' onto FP-stack 1.2503 + enc_class Push_Mem_F( memory src ) %{ // FLD_S [ESP+src] 1.2504 + store_to_stackslot( cbuf, 0xD9, 0x00, $src$$disp ); 1.2505 + %} 1.2506 + 1.2507 + // Push the double in stackSlot 'src' onto FP-stack 1.2508 + enc_class Push_Mem_D( memory src ) %{ // FLD_D [ESP+src] 1.2509 + store_to_stackslot( cbuf, 0xDD, 0x00, $src$$disp ); 1.2510 + %} 1.2511 + 1.2512 + // Push FPU's TOS float to a stack-slot, and pop FPU-stack 1.2513 + enc_class Pop_Mem_F( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 1.2514 + store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 1.2515 + %} 1.2516 + 1.2517 + // Same as Pop_Mem_F except for opcode 1.2518 + // Push FPU's TOS double to a stack-slot, and pop FPU-stack 1.2519 + enc_class Pop_Mem_D( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 1.2520 + store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 1.2521 + %} 1.2522 + 1.2523 + enc_class Pop_Reg_F( regF dst ) %{ 1.2524 + emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 1.2525 + emit_d8( cbuf, 0xD8+$dst$$reg ); 1.2526 + %} 1.2527 + 1.2528 + enc_class Push_Reg_F( regF dst ) %{ 1.2529 + emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 1.2530 + emit_d8( cbuf, 0xC0-1+$dst$$reg ); 1.2531 + %} 1.2532 + 1.2533 + // Push FPU's float to a stack-slot, and pop FPU-stack 1.2534 + enc_class Pop_Mem_Reg_F( stackSlotF dst, regF src ) %{ 1.2535 + int pop = 0x02; 1.2536 + if ($src$$reg != FPR1L_enc) { 1.2537 + emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 1.2538 + emit_d8( cbuf, 0xC0-1+$src$$reg ); 1.2539 + pop = 0x03; 1.2540 + } 1.2541 + store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 1.2542 + %} 1.2543 + 1.2544 + // Push FPU's double to a stack-slot, and pop FPU-stack 1.2545 + enc_class Pop_Mem_Reg_D( stackSlotD dst, regD src ) %{ 1.2546 + int pop = 0x02; 1.2547 + if ($src$$reg != FPR1L_enc) { 1.2548 + emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 1.2549 + emit_d8( cbuf, 0xC0-1+$src$$reg ); 1.2550 + pop = 0x03; 1.2551 + } 1.2552 + store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 1.2553 + %} 1.2554 + 1.2555 + // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 1.2556 + enc_class Pop_Reg_Reg_D( regD dst, regF src ) %{ 1.2557 + int pop = 0xD0 - 1; // -1 since we skip FLD 1.2558 + if ($src$$reg != FPR1L_enc) { 1.2559 + emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 1.2560 + emit_d8( cbuf, 0xC0-1+$src$$reg ); 1.2561 + pop = 0xD8; 1.2562 + } 1.2563 + emit_opcode( cbuf, 0xDD ); 1.2564 + emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 1.2565 + %} 1.2566 + 1.2567 + 1.2568 + enc_class Mul_Add_F( regF dst, regF src, regF src1, regF src2 ) %{ 1.2569 + MacroAssembler masm(&cbuf); 1.2570 + masm.fld_s( $src1$$reg-1); // nothing at TOS, load TOS from src1.reg 1.2571 + masm.fmul( $src2$$reg+0); // value at TOS 1.2572 + masm.fadd( $src$$reg+0); // value at TOS 1.2573 + masm.fstp_d( $dst$$reg+0); // value at TOS, popped off after store 1.2574 + %} 1.2575 + 1.2576 + 1.2577 + enc_class Push_Reg_Mod_D( regD dst, regD src) %{ 1.2578 + // load dst in FPR0 1.2579 + emit_opcode( cbuf, 0xD9 ); 1.2580 + emit_d8( cbuf, 0xC0-1+$dst$$reg ); 1.2581 + if ($src$$reg != FPR1L_enc) { 1.2582 + // fincstp 1.2583 + emit_opcode (cbuf, 0xD9); 1.2584 + emit_opcode (cbuf, 0xF7); 1.2585 + // swap src with FPR1: 1.2586 + // FXCH FPR1 with src 1.2587 + emit_opcode(cbuf, 0xD9); 1.2588 + emit_d8(cbuf, 0xC8-1+$src$$reg ); 1.2589 + // fdecstp 1.2590 + emit_opcode (cbuf, 0xD9); 1.2591 + emit_opcode (cbuf, 0xF6); 1.2592 + } 1.2593 + %} 1.2594 + 1.2595 + enc_class Push_ModD_encoding( regXD src0, regXD src1) %{ 1.2596 + // Allocate a word 1.2597 + emit_opcode(cbuf,0x83); // SUB ESP,8 1.2598 + emit_opcode(cbuf,0xEC); 1.2599 + emit_d8(cbuf,0x08); 1.2600 + 1.2601 + emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src1 1.2602 + emit_opcode (cbuf, 0x0F ); 1.2603 + emit_opcode (cbuf, 0x11 ); 1.2604 + encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false); 1.2605 + 1.2606 + emit_opcode(cbuf,0xDD ); // FLD_D [ESP] 1.2607 + encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 1.2608 + 1.2609 + emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src0 1.2610 + emit_opcode (cbuf, 0x0F ); 1.2611 + emit_opcode (cbuf, 0x11 ); 1.2612 + encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false); 1.2613 + 1.2614 + emit_opcode(cbuf,0xDD ); // FLD_D [ESP] 1.2615 + encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 1.2616 + 1.2617 + %} 1.2618 + 1.2619 + enc_class Push_ModX_encoding( regX src0, regX src1) %{ 1.2620 + // Allocate a word 1.2621 + emit_opcode(cbuf,0x83); // SUB ESP,4 1.2622 + emit_opcode(cbuf,0xEC); 1.2623 + emit_d8(cbuf,0x04); 1.2624 + 1.2625 + emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src1 1.2626 + emit_opcode (cbuf, 0x0F ); 1.2627 + emit_opcode (cbuf, 0x11 ); 1.2628 + encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false); 1.2629 + 1.2630 + emit_opcode(cbuf,0xD9 ); // FLD [ESP] 1.2631 + encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 1.2632 + 1.2633 + emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src0 1.2634 + emit_opcode (cbuf, 0x0F ); 1.2635 + emit_opcode (cbuf, 0x11 ); 1.2636 + encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false); 1.2637 + 1.2638 + emit_opcode(cbuf,0xD9 ); // FLD [ESP] 1.2639 + encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 1.2640 + 1.2641 + %} 1.2642 + 1.2643 + enc_class Push_ResultXD(regXD dst) %{ 1.2644 + store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [ESP] 1.2645 + 1.2646 + // UseXmmLoadAndClearUpper ? movsd dst,[esp] : movlpd dst,[esp] 1.2647 + emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66); 1.2648 + emit_opcode (cbuf, 0x0F ); 1.2649 + emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12); 1.2650 + encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false); 1.2651 + 1.2652 + emit_opcode(cbuf,0x83); // ADD ESP,8 1.2653 + emit_opcode(cbuf,0xC4); 1.2654 + emit_d8(cbuf,0x08); 1.2655 + %} 1.2656 + 1.2657 + enc_class Push_ResultX(regX dst, immI d8) %{ 1.2658 + store_to_stackslot( cbuf, 0xD9, 0x03, 0 ); //FSTP_S [ESP] 1.2659 + 1.2660 + emit_opcode (cbuf, 0xF3 ); // MOVSS dst(xmm), [ESP] 1.2661 + emit_opcode (cbuf, 0x0F ); 1.2662 + emit_opcode (cbuf, 0x10 ); 1.2663 + encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false); 1.2664 + 1.2665 + emit_opcode(cbuf,0x83); // ADD ESP,d8 (4 or 8) 1.2666 + emit_opcode(cbuf,0xC4); 1.2667 + emit_d8(cbuf,$d8$$constant); 1.2668 + %} 1.2669 + 1.2670 + enc_class Push_SrcXD(regXD src) %{ 1.2671 + // Allocate a word 1.2672 + emit_opcode(cbuf,0x83); // SUB ESP,8 1.2673 + emit_opcode(cbuf,0xEC); 1.2674 + emit_d8(cbuf,0x08); 1.2675 + 1.2676 + emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src 1.2677 + emit_opcode (cbuf, 0x0F ); 1.2678 + emit_opcode (cbuf, 0x11 ); 1.2679 + encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); 1.2680 + 1.2681 + emit_opcode(cbuf,0xDD ); // FLD_D [ESP] 1.2682 + encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 1.2683 + %} 1.2684 + 1.2685 + enc_class push_stack_temp_qword() %{ 1.2686 + emit_opcode(cbuf,0x83); // SUB ESP,8 1.2687 + emit_opcode(cbuf,0xEC); 1.2688 + emit_d8 (cbuf,0x08); 1.2689 + %} 1.2690 + 1.2691 + enc_class pop_stack_temp_qword() %{ 1.2692 + emit_opcode(cbuf,0x83); // ADD ESP,8 1.2693 + emit_opcode(cbuf,0xC4); 1.2694 + emit_d8 (cbuf,0x08); 1.2695 + %} 1.2696 + 1.2697 + enc_class push_xmm_to_fpr1( regXD xmm_src ) %{ 1.2698 + emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], xmm_src 1.2699 + emit_opcode (cbuf, 0x0F ); 1.2700 + emit_opcode (cbuf, 0x11 ); 1.2701 + encode_RegMem(cbuf, $xmm_src$$reg, ESP_enc, 0x4, 0, 0, false); 1.2702 + 1.2703 + emit_opcode(cbuf,0xDD ); // FLD_D [ESP] 1.2704 + encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 1.2705 + %} 1.2706 + 1.2707 + // Compute X^Y using Intel's fast hardware instructions, if possible. 1.2708 + // Otherwise return a NaN. 1.2709 + enc_class pow_exp_core_encoding %{ 1.2710 + // FPR1 holds Y*ln2(X). Compute FPR1 = 2^(Y*ln2(X)) 1.2711 + emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xC0); // fdup = fld st(0) Q Q 1.2712 + emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xFC); // frndint int(Q) Q 1.2713 + emit_opcode(cbuf,0xDC); emit_opcode(cbuf,0xE9); // fsub st(1) -= st(0); int(Q) frac(Q) 1.2714 + emit_opcode(cbuf,0xDB); // FISTP [ESP] frac(Q) 1.2715 + emit_opcode(cbuf,0x1C); 1.2716 + emit_d8(cbuf,0x24); 1.2717 + emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xF0); // f2xm1 2^frac(Q)-1 1.2718 + emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xE8); // fld1 1 2^frac(Q)-1 1.2719 + emit_opcode(cbuf,0xDE); emit_opcode(cbuf,0xC1); // faddp 2^frac(Q) 1.2720 + emit_opcode(cbuf,0x8B); // mov rax,[esp+0]=int(Q) 1.2721 + encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 0, false); 1.2722 + emit_opcode(cbuf,0xC7); // mov rcx,0xFFFFF800 - overflow mask 1.2723 + emit_rm(cbuf, 0x3, 0x0, ECX_enc); 1.2724 + emit_d32(cbuf,0xFFFFF800); 1.2725 + emit_opcode(cbuf,0x81); // add rax,1023 - the double exponent bias 1.2726 + emit_rm(cbuf, 0x3, 0x0, EAX_enc); 1.2727 + emit_d32(cbuf,1023); 1.2728 + emit_opcode(cbuf,0x8B); // mov rbx,eax 1.2729 + emit_rm(cbuf, 0x3, EBX_enc, EAX_enc); 1.2730 + emit_opcode(cbuf,0xC1); // shl rax,20 - Slide to exponent position 1.2731 + emit_rm(cbuf,0x3,0x4,EAX_enc); 1.2732 + emit_d8(cbuf,20); 1.2733 + emit_opcode(cbuf,0x85); // test rbx,ecx - check for overflow 1.2734 + emit_rm(cbuf, 0x3, EBX_enc, ECX_enc); 1.2735 + emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0x45); // CMOVne rax,ecx - overflow; stuff NAN into EAX 1.2736 + emit_rm(cbuf, 0x3, EAX_enc, ECX_enc); 1.2737 + emit_opcode(cbuf,0x89); // mov [esp+4],eax - Store as part of double word 1.2738 + encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 4, false); 1.2739 + emit_opcode(cbuf,0xC7); // mov [esp+0],0 - [ESP] = (double)(1<<int(Q)) = 2^int(Q) 1.2740 + encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 1.2741 + emit_d32(cbuf,0); 1.2742 + emit_opcode(cbuf,0xDC); // fmul dword st(0),[esp+0]; FPR1 = 2^int(Q)*2^frac(Q) = 2^Q 1.2743 + encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false); 1.2744 + %} 1.2745 + 1.2746 +// enc_class Pop_Reg_Mod_D( regD dst, regD src) 1.2747 +// was replaced by Push_Result_Mod_D followed by Pop_Reg_X() or Pop_Mem_X() 1.2748 + 1.2749 + enc_class Push_Result_Mod_D( regD src) %{ 1.2750 + if ($src$$reg != FPR1L_enc) { 1.2751 + // fincstp 1.2752 + emit_opcode (cbuf, 0xD9); 1.2753 + emit_opcode (cbuf, 0xF7); 1.2754 + // FXCH FPR1 with src 1.2755 + emit_opcode(cbuf, 0xD9); 1.2756 + emit_d8(cbuf, 0xC8-1+$src$$reg ); 1.2757 + // fdecstp 1.2758 + emit_opcode (cbuf, 0xD9); 1.2759 + emit_opcode (cbuf, 0xF6); 1.2760 + } 1.2761 + // // following asm replaced with Pop_Reg_F or Pop_Mem_F 1.2762 + // // FSTP FPR$dst$$reg 1.2763 + // emit_opcode( cbuf, 0xDD ); 1.2764 + // emit_d8( cbuf, 0xD8+$dst$$reg ); 1.2765 + %} 1.2766 + 1.2767 + enc_class fnstsw_sahf_skip_parity() %{ 1.2768 + // fnstsw ax 1.2769 + emit_opcode( cbuf, 0xDF ); 1.2770 + emit_opcode( cbuf, 0xE0 ); 1.2771 + // sahf 1.2772 + emit_opcode( cbuf, 0x9E ); 1.2773 + // jnp ::skip 1.2774 + emit_opcode( cbuf, 0x7B ); 1.2775 + emit_opcode( cbuf, 0x05 ); 1.2776 + %} 1.2777 + 1.2778 + enc_class emitModD() %{ 1.2779 + // fprem must be iterative 1.2780 + // :: loop 1.2781 + // fprem 1.2782 + emit_opcode( cbuf, 0xD9 ); 1.2783 + emit_opcode( cbuf, 0xF8 ); 1.2784 + // wait 1.2785 + emit_opcode( cbuf, 0x9b ); 1.2786 + // fnstsw ax 1.2787 + emit_opcode( cbuf, 0xDF ); 1.2788 + emit_opcode( cbuf, 0xE0 ); 1.2789 + // sahf 1.2790 + emit_opcode( cbuf, 0x9E ); 1.2791 + // jp ::loop 1.2792 + emit_opcode( cbuf, 0x0F ); 1.2793 + emit_opcode( cbuf, 0x8A ); 1.2794 + emit_opcode( cbuf, 0xF4 ); 1.2795 + emit_opcode( cbuf, 0xFF ); 1.2796 + emit_opcode( cbuf, 0xFF ); 1.2797 + emit_opcode( cbuf, 0xFF ); 1.2798 + %} 1.2799 + 1.2800 + enc_class fpu_flags() %{ 1.2801 + // fnstsw_ax 1.2802 + emit_opcode( cbuf, 0xDF); 1.2803 + emit_opcode( cbuf, 0xE0); 1.2804 + // test ax,0x0400 1.2805 + emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 1.2806 + emit_opcode( cbuf, 0xA9 ); 1.2807 + emit_d16 ( cbuf, 0x0400 ); 1.2808 + // // // This sequence works, but stalls for 12-16 cycles on PPro 1.2809 + // // test rax,0x0400 1.2810 + // emit_opcode( cbuf, 0xA9 ); 1.2811 + // emit_d32 ( cbuf, 0x00000400 ); 1.2812 + // 1.2813 + // jz exit (no unordered comparison) 1.2814 + emit_opcode( cbuf, 0x74 ); 1.2815 + emit_d8 ( cbuf, 0x02 ); 1.2816 + // mov ah,1 - treat as LT case (set carry flag) 1.2817 + emit_opcode( cbuf, 0xB4 ); 1.2818 + emit_d8 ( cbuf, 0x01 ); 1.2819 + // sahf 1.2820 + emit_opcode( cbuf, 0x9E); 1.2821 + %} 1.2822 + 1.2823 + enc_class cmpF_P6_fixup() %{ 1.2824 + // Fixup the integer flags in case comparison involved a NaN 1.2825 + // 1.2826 + // JNP exit (no unordered comparison, P-flag is set by NaN) 1.2827 + emit_opcode( cbuf, 0x7B ); 1.2828 + emit_d8 ( cbuf, 0x03 ); 1.2829 + // MOV AH,1 - treat as LT case (set carry flag) 1.2830 + emit_opcode( cbuf, 0xB4 ); 1.2831 + emit_d8 ( cbuf, 0x01 ); 1.2832 + // SAHF 1.2833 + emit_opcode( cbuf, 0x9E); 1.2834 + // NOP // target for branch to avoid branch to branch 1.2835 + emit_opcode( cbuf, 0x90); 1.2836 + %} 1.2837 + 1.2838 +// fnstsw_ax(); 1.2839 +// sahf(); 1.2840 +// movl(dst, nan_result); 1.2841 +// jcc(Assembler::parity, exit); 1.2842 +// movl(dst, less_result); 1.2843 +// jcc(Assembler::below, exit); 1.2844 +// movl(dst, equal_result); 1.2845 +// jcc(Assembler::equal, exit); 1.2846 +// movl(dst, greater_result); 1.2847 + 1.2848 +// less_result = 1; 1.2849 +// greater_result = -1; 1.2850 +// equal_result = 0; 1.2851 +// nan_result = -1; 1.2852 + 1.2853 + enc_class CmpF_Result(eRegI dst) %{ 1.2854 + // fnstsw_ax(); 1.2855 + emit_opcode( cbuf, 0xDF); 1.2856 + emit_opcode( cbuf, 0xE0); 1.2857 + // sahf 1.2858 + emit_opcode( cbuf, 0x9E); 1.2859 + // movl(dst, nan_result); 1.2860 + emit_opcode( cbuf, 0xB8 + $dst$$reg); 1.2861 + emit_d32( cbuf, -1 ); 1.2862 + // jcc(Assembler::parity, exit); 1.2863 + emit_opcode( cbuf, 0x7A ); 1.2864 + emit_d8 ( cbuf, 0x13 ); 1.2865 + // movl(dst, less_result); 1.2866 + emit_opcode( cbuf, 0xB8 + $dst$$reg); 1.2867 + emit_d32( cbuf, -1 ); 1.2868 + // jcc(Assembler::below, exit); 1.2869 + emit_opcode( cbuf, 0x72 ); 1.2870 + emit_d8 ( cbuf, 0x0C ); 1.2871 + // movl(dst, equal_result); 1.2872 + emit_opcode( cbuf, 0xB8 + $dst$$reg); 1.2873 + emit_d32( cbuf, 0 ); 1.2874 + // jcc(Assembler::equal, exit); 1.2875 + emit_opcode( cbuf, 0x74 ); 1.2876 + emit_d8 ( cbuf, 0x05 ); 1.2877 + // movl(dst, greater_result); 1.2878 + emit_opcode( cbuf, 0xB8 + $dst$$reg); 1.2879 + emit_d32( cbuf, 1 ); 1.2880 + %} 1.2881 + 1.2882 + 1.2883 + // XMM version of CmpF_Result. Because the XMM compare 1.2884 + // instructions set the EFLAGS directly. It becomes simpler than 1.2885 + // the float version above. 1.2886 + enc_class CmpX_Result(eRegI dst) %{ 1.2887 + MacroAssembler _masm(&cbuf); 1.2888 + Label nan, inc, done; 1.2889 + 1.2890 + __ jccb(Assembler::parity, nan); 1.2891 + __ jccb(Assembler::equal, done); 1.2892 + __ jccb(Assembler::above, inc); 1.2893 + __ bind(nan); 1.2894 + __ decrement(as_Register($dst$$reg)); 1.2895 + __ jmpb(done); 1.2896 + __ bind(inc); 1.2897 + __ increment(as_Register($dst$$reg)); 1.2898 + __ bind(done); 1.2899 + %} 1.2900 + 1.2901 + // Compare the longs and set flags 1.2902 + // BROKEN! Do Not use as-is 1.2903 + enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 1.2904 + // CMP $src1.hi,$src2.hi 1.2905 + emit_opcode( cbuf, 0x3B ); 1.2906 + emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 1.2907 + // JNE,s done 1.2908 + emit_opcode(cbuf,0x75); 1.2909 + emit_d8(cbuf, 2 ); 1.2910 + // CMP $src1.lo,$src2.lo 1.2911 + emit_opcode( cbuf, 0x3B ); 1.2912 + emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 1.2913 +// done: 1.2914 + %} 1.2915 + 1.2916 + enc_class convert_int_long( regL dst, eRegI src ) %{ 1.2917 + // mov $dst.lo,$src 1.2918 + int dst_encoding = $dst$$reg; 1.2919 + int src_encoding = $src$$reg; 1.2920 + encode_Copy( cbuf, dst_encoding , src_encoding ); 1.2921 + // mov $dst.hi,$src 1.2922 + encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 1.2923 + // sar $dst.hi,31 1.2924 + emit_opcode( cbuf, 0xC1 ); 1.2925 + emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 1.2926 + emit_d8(cbuf, 0x1F ); 1.2927 + %} 1.2928 + 1.2929 + enc_class convert_long_double( eRegL src ) %{ 1.2930 + // push $src.hi 1.2931 + emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 1.2932 + // push $src.lo 1.2933 + emit_opcode(cbuf, 0x50+$src$$reg ); 1.2934 + // fild 64-bits at [SP] 1.2935 + emit_opcode(cbuf,0xdf); 1.2936 + emit_d8(cbuf, 0x6C); 1.2937 + emit_d8(cbuf, 0x24); 1.2938 + emit_d8(cbuf, 0x00); 1.2939 + // pop stack 1.2940 + emit_opcode(cbuf, 0x83); // add SP, #8 1.2941 + emit_rm(cbuf, 0x3, 0x00, ESP_enc); 1.2942 + emit_d8(cbuf, 0x8); 1.2943 + %} 1.2944 + 1.2945 + enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 1.2946 + // IMUL EDX:EAX,$src1 1.2947 + emit_opcode( cbuf, 0xF7 ); 1.2948 + emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 1.2949 + // SAR EDX,$cnt-32 1.2950 + int shift_count = ((int)$cnt$$constant) - 32; 1.2951 + if (shift_count > 0) { 1.2952 + emit_opcode(cbuf, 0xC1); 1.2953 + emit_rm(cbuf, 0x3, 7, $dst$$reg ); 1.2954 + emit_d8(cbuf, shift_count); 1.2955 + } 1.2956 + %} 1.2957 + 1.2958 + // this version doesn't have add sp, 8 1.2959 + enc_class convert_long_double2( eRegL src ) %{ 1.2960 + // push $src.hi 1.2961 + emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 1.2962 + // push $src.lo 1.2963 + emit_opcode(cbuf, 0x50+$src$$reg ); 1.2964 + // fild 64-bits at [SP] 1.2965 + emit_opcode(cbuf,0xdf); 1.2966 + emit_d8(cbuf, 0x6C); 1.2967 + emit_d8(cbuf, 0x24); 1.2968 + emit_d8(cbuf, 0x00); 1.2969 + %} 1.2970 + 1.2971 + enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 1.2972 + // Basic idea: long = (long)int * (long)int 1.2973 + // IMUL EDX:EAX, src 1.2974 + emit_opcode( cbuf, 0xF7 ); 1.2975 + emit_rm( cbuf, 0x3, 0x5, $src$$reg); 1.2976 + %} 1.2977 + 1.2978 + enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 1.2979 + // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 1.2980 + // MUL EDX:EAX, src 1.2981 + emit_opcode( cbuf, 0xF7 ); 1.2982 + emit_rm( cbuf, 0x3, 0x4, $src$$reg); 1.2983 + %} 1.2984 + 1.2985 + enc_class long_multiply( eADXRegL dst, eRegL src, eRegI tmp ) %{ 1.2986 + // Basic idea: lo(result) = lo(x_lo * y_lo) 1.2987 + // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 1.2988 + // MOV $tmp,$src.lo 1.2989 + encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 1.2990 + // IMUL $tmp,EDX 1.2991 + emit_opcode( cbuf, 0x0F ); 1.2992 + emit_opcode( cbuf, 0xAF ); 1.2993 + emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 1.2994 + // MOV EDX,$src.hi 1.2995 + encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 1.2996 + // IMUL EDX,EAX 1.2997 + emit_opcode( cbuf, 0x0F ); 1.2998 + emit_opcode( cbuf, 0xAF ); 1.2999 + emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 1.3000 + // ADD $tmp,EDX 1.3001 + emit_opcode( cbuf, 0x03 ); 1.3002 + emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 1.3003 + // MUL EDX:EAX,$src.lo 1.3004 + emit_opcode( cbuf, 0xF7 ); 1.3005 + emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 1.3006 + // ADD EDX,ESI 1.3007 + emit_opcode( cbuf, 0x03 ); 1.3008 + emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 1.3009 + %} 1.3010 + 1.3011 + enc_class long_multiply_con( eADXRegL dst, immL_127 src, eRegI tmp ) %{ 1.3012 + // Basic idea: lo(result) = lo(src * y_lo) 1.3013 + // hi(result) = hi(src * y_lo) + lo(src * y_hi) 1.3014 + // IMUL $tmp,EDX,$src 1.3015 + emit_opcode( cbuf, 0x6B ); 1.3016 + emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 1.3017 + emit_d8( cbuf, (int)$src$$constant ); 1.3018 + // MOV EDX,$src 1.3019 + emit_opcode(cbuf, 0xB8 + EDX_enc); 1.3020 + emit_d32( cbuf, (int)$src$$constant ); 1.3021 + // MUL EDX:EAX,EDX 1.3022 + emit_opcode( cbuf, 0xF7 ); 1.3023 + emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 1.3024 + // ADD EDX,ESI 1.3025 + emit_opcode( cbuf, 0x03 ); 1.3026 + emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 1.3027 + %} 1.3028 + 1.3029 + enc_class long_div( eRegL src1, eRegL src2 ) %{ 1.3030 + // PUSH src1.hi 1.3031 + emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 1.3032 + // PUSH src1.lo 1.3033 + emit_opcode(cbuf, 0x50+$src1$$reg ); 1.3034 + // PUSH src2.hi 1.3035 + emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 1.3036 + // PUSH src2.lo 1.3037 + emit_opcode(cbuf, 0x50+$src2$$reg ); 1.3038 + // CALL directly to the runtime 1.3039 + cbuf.set_inst_mark(); 1.3040 + emit_opcode(cbuf,0xE8); // Call into runtime 1.3041 + emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.code_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 1.3042 + // Restore stack 1.3043 + emit_opcode(cbuf, 0x83); // add SP, #framesize 1.3044 + emit_rm(cbuf, 0x3, 0x00, ESP_enc); 1.3045 + emit_d8(cbuf, 4*4); 1.3046 + %} 1.3047 + 1.3048 + enc_class long_mod( eRegL src1, eRegL src2 ) %{ 1.3049 + // PUSH src1.hi 1.3050 + emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 1.3051 + // PUSH src1.lo 1.3052 + emit_opcode(cbuf, 0x50+$src1$$reg ); 1.3053 + // PUSH src2.hi 1.3054 + emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 1.3055 + // PUSH src2.lo 1.3056 + emit_opcode(cbuf, 0x50+$src2$$reg ); 1.3057 + // CALL directly to the runtime 1.3058 + cbuf.set_inst_mark(); 1.3059 + emit_opcode(cbuf,0xE8); // Call into runtime 1.3060 + emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.code_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 1.3061 + // Restore stack 1.3062 + emit_opcode(cbuf, 0x83); // add SP, #framesize 1.3063 + emit_rm(cbuf, 0x3, 0x00, ESP_enc); 1.3064 + emit_d8(cbuf, 4*4); 1.3065 + %} 1.3066 + 1.3067 + enc_class long_cmp_flags0( eRegL src, eRegI tmp ) %{ 1.3068 + // MOV $tmp,$src.lo 1.3069 + emit_opcode(cbuf, 0x8B); 1.3070 + emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 1.3071 + // OR $tmp,$src.hi 1.3072 + emit_opcode(cbuf, 0x0B); 1.3073 + emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 1.3074 + %} 1.3075 + 1.3076 + enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 1.3077 + // CMP $src1.lo,$src2.lo 1.3078 + emit_opcode( cbuf, 0x3B ); 1.3079 + emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 1.3080 + // JNE,s skip 1.3081 + emit_cc(cbuf, 0x70, 0x5); 1.3082 + emit_d8(cbuf,2); 1.3083 + // CMP $src1.hi,$src2.hi 1.3084 + emit_opcode( cbuf, 0x3B ); 1.3085 + emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 1.3086 + %} 1.3087 + 1.3088 + enc_class long_cmp_flags2( eRegL src1, eRegL src2, eRegI tmp ) %{ 1.3089 + // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 1.3090 + emit_opcode( cbuf, 0x3B ); 1.3091 + emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 1.3092 + // MOV $tmp,$src1.hi 1.3093 + emit_opcode( cbuf, 0x8B ); 1.3094 + emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 1.3095 + // SBB $tmp,$src2.hi\t! Compute flags for long compare 1.3096 + emit_opcode( cbuf, 0x1B ); 1.3097 + emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 1.3098 + %} 1.3099 + 1.3100 + enc_class long_cmp_flags3( eRegL src, eRegI tmp ) %{ 1.3101 + // XOR $tmp,$tmp 1.3102 + emit_opcode(cbuf,0x33); // XOR 1.3103 + emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 1.3104 + // CMP $tmp,$src.lo 1.3105 + emit_opcode( cbuf, 0x3B ); 1.3106 + emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 1.3107 + // SBB $tmp,$src.hi 1.3108 + emit_opcode( cbuf, 0x1B ); 1.3109 + emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 1.3110 + %} 1.3111 + 1.3112 + // Sniff, sniff... smells like Gnu Superoptimizer 1.3113 + enc_class neg_long( eRegL dst ) %{ 1.3114 + emit_opcode(cbuf,0xF7); // NEG hi 1.3115 + emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 1.3116 + emit_opcode(cbuf,0xF7); // NEG lo 1.3117 + emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 1.3118 + emit_opcode(cbuf,0x83); // SBB hi,0 1.3119 + emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 1.3120 + emit_d8 (cbuf,0 ); 1.3121 + %} 1.3122 + 1.3123 + enc_class movq_ld(regXD dst, memory mem) %{ 1.3124 + MacroAssembler _masm(&cbuf); 1.3125 + Address madr = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp); 1.3126 + __ movq(as_XMMRegister($dst$$reg), madr); 1.3127 + %} 1.3128 + 1.3129 + enc_class movq_st(memory mem, regXD src) %{ 1.3130 + MacroAssembler _masm(&cbuf); 1.3131 + Address madr = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp); 1.3132 + __ movq(madr, as_XMMRegister($src$$reg)); 1.3133 + %} 1.3134 + 1.3135 + enc_class pshufd_8x8(regX dst, regX src) %{ 1.3136 + MacroAssembler _masm(&cbuf); 1.3137 + 1.3138 + encode_CopyXD(cbuf, $dst$$reg, $src$$reg); 1.3139 + __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg)); 1.3140 + __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00); 1.3141 + %} 1.3142 + 1.3143 + enc_class pshufd_4x16(regX dst, regX src) %{ 1.3144 + MacroAssembler _masm(&cbuf); 1.3145 + 1.3146 + __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00); 1.3147 + %} 1.3148 + 1.3149 + enc_class pshufd(regXD dst, regXD src, int mode) %{ 1.3150 + MacroAssembler _masm(&cbuf); 1.3151 + 1.3152 + __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode); 1.3153 + %} 1.3154 + 1.3155 + enc_class pxor(regXD dst, regXD src) %{ 1.3156 + MacroAssembler _masm(&cbuf); 1.3157 + 1.3158 + __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg)); 1.3159 + %} 1.3160 + 1.3161 + enc_class mov_i2x(regXD dst, eRegI src) %{ 1.3162 + MacroAssembler _masm(&cbuf); 1.3163 + 1.3164 + __ movd(as_XMMRegister($dst$$reg), as_Register($src$$reg)); 1.3165 + %} 1.3166 + 1.3167 + 1.3168 + // Because the transitions from emitted code to the runtime 1.3169 + // monitorenter/exit helper stubs are so slow it's critical that 1.3170 + // we inline both the stack-locking fast-path and the inflated fast path. 1.3171 + // 1.3172 + // See also: cmpFastLock and cmpFastUnlock. 1.3173 + // 1.3174 + // What follows is a specialized inline transliteration of the code 1.3175 + // in slow_enter() and slow_exit(). If we're concerned about I$ bloat 1.3176 + // another option would be to emit TrySlowEnter and TrySlowExit methods 1.3177 + // at startup-time. These methods would accept arguments as 1.3178 + // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure 1.3179 + // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply 1.3180 + // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit. 1.3181 + // In practice, however, the # of lock sites is bounded and is usually small. 1.3182 + // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer 1.3183 + // if the processor uses simple bimodal branch predictors keyed by EIP 1.3184 + // Since the helper routines would be called from multiple synchronization 1.3185 + // sites. 1.3186 + // 1.3187 + // An even better approach would be write "MonitorEnter()" and "MonitorExit()" 1.3188 + // in java - using j.u.c and unsafe - and just bind the lock and unlock sites 1.3189 + // to those specialized methods. That'd give us a mostly platform-independent 1.3190 + // implementation that the JITs could optimize and inline at their pleasure. 1.3191 + // Done correctly, the only time we'd need to cross to native could would be 1.3192 + // to park() or unpark() threads. We'd also need a few more unsafe operators 1.3193 + // to (a) prevent compiler-JIT reordering of non-volatile accesses, and 1.3194 + // (b) explicit barriers or fence operations. 1.3195 + // 1.3196 + // TODO: 1.3197 + // 1.3198 + // * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr). 1.3199 + // This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals. 1.3200 + // Given TLAB allocation, Self is usually manifested in a register, so passing it into 1.3201 + // the lock operators would typically be faster than reifying Self. 1.3202 + // 1.3203 + // * Ideally I'd define the primitives as: 1.3204 + // fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED. 1.3205 + // fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED 1.3206 + // Unfortunately ADLC bugs prevent us from expressing the ideal form. 1.3207 + // Instead, we're stuck with a rather awkward and brittle register assignments below. 1.3208 + // Furthermore the register assignments are overconstrained, possibly resulting in 1.3209 + // sub-optimal code near the synchronization site. 1.3210 + // 1.3211 + // * Eliminate the sp-proximity tests and just use "== Self" tests instead. 1.3212 + // Alternately, use a better sp-proximity test. 1.3213 + // 1.3214 + // * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value. 1.3215 + // Either one is sufficient to uniquely identify a thread. 1.3216 + // TODO: eliminate use of sp in _owner and use get_thread(tr) instead. 1.3217 + // 1.3218 + // * Intrinsify notify() and notifyAll() for the common cases where the 1.3219 + // object is locked by the calling thread but the waitlist is empty. 1.3220 + // avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll(). 1.3221 + // 1.3222 + // * use jccb and jmpb instead of jcc and jmp to improve code density. 1.3223 + // But beware of excessive branch density on AMD Opterons. 1.3224 + // 1.3225 + // * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success 1.3226 + // or failure of the fast-path. If the fast-path fails then we pass 1.3227 + // control to the slow-path, typically in C. In Fast_Lock and 1.3228 + // Fast_Unlock we often branch to DONE_LABEL, just to find that C2 1.3229 + // will emit a conditional branch immediately after the node. 1.3230 + // So we have branches to branches and lots of ICC.ZF games. 1.3231 + // Instead, it might be better to have C2 pass a "FailureLabel" 1.3232 + // into Fast_Lock and Fast_Unlock. In the case of success, control 1.3233 + // will drop through the node. ICC.ZF is undefined at exit. 1.3234 + // In the case of failure, the node will branch directly to the 1.3235 + // FailureLabel 1.3236 + 1.3237 + 1.3238 + // obj: object to lock 1.3239 + // box: on-stack box address (displaced header location) - KILLED 1.3240 + // rax,: tmp -- KILLED 1.3241 + // scr: tmp -- KILLED 1.3242 + enc_class Fast_Lock( eRegP obj, eRegP box, eAXRegI tmp, eRegP scr ) %{ 1.3243 + 1.3244 + Register objReg = as_Register($obj$$reg); 1.3245 + Register boxReg = as_Register($box$$reg); 1.3246 + Register tmpReg = as_Register($tmp$$reg); 1.3247 + Register scrReg = as_Register($scr$$reg); 1.3248 + 1.3249 + // Ensure the register assignents are disjoint 1.3250 + guarantee (objReg != boxReg, "") ; 1.3251 + guarantee (objReg != tmpReg, "") ; 1.3252 + guarantee (objReg != scrReg, "") ; 1.3253 + guarantee (boxReg != tmpReg, "") ; 1.3254 + guarantee (boxReg != scrReg, "") ; 1.3255 + guarantee (tmpReg == as_Register(EAX_enc), "") ; 1.3256 + 1.3257 + MacroAssembler masm(&cbuf); 1.3258 + 1.3259 + if (_counters != NULL) { 1.3260 + masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr())); 1.3261 + } 1.3262 + if (EmitSync & 1) { 1.3263 + // set box->dhw = unused_mark (3) 1.3264 + // Force all sync thru slow-path: slow_enter() and slow_exit() 1.3265 + masm.movl (Address(boxReg, 0), intptr_t(markOopDesc::unused_mark())) ; 1.3266 + masm.cmpl (rsp, 0) ; 1.3267 + } else 1.3268 + if (EmitSync & 2) { 1.3269 + Label DONE_LABEL ; 1.3270 + if (UseBiasedLocking) { 1.3271 + // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument. 1.3272 + masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters); 1.3273 + } 1.3274 + 1.3275 + masm.movl (tmpReg, Address(objReg, 0)) ; // fetch markword 1.3276 + masm.orl (tmpReg, 0x1); 1.3277 + masm.movl (Address(boxReg, 0), tmpReg); // Anticipate successful CAS 1.3278 + if (os::is_MP()) { masm.lock(); } 1.3279 + masm.cmpxchg(boxReg, Address(objReg, 0)); // Updates tmpReg 1.3280 + masm.jcc(Assembler::equal, DONE_LABEL); 1.3281 + // Recursive locking 1.3282 + masm.subl(tmpReg, rsp); 1.3283 + masm.andl(tmpReg, 0xFFFFF003 ); 1.3284 + masm.movl(Address(boxReg, 0), tmpReg); 1.3285 + masm.bind(DONE_LABEL) ; 1.3286 + } else { 1.3287 + // Possible cases that we'll encounter in fast_lock 1.3288 + // ------------------------------------------------ 1.3289 + // * Inflated 1.3290 + // -- unlocked 1.3291 + // -- Locked 1.3292 + // = by self 1.3293 + // = by other 1.3294 + // * biased 1.3295 + // -- by Self 1.3296 + // -- by other 1.3297 + // * neutral 1.3298 + // * stack-locked 1.3299 + // -- by self 1.3300 + // = sp-proximity test hits 1.3301 + // = sp-proximity test generates false-negative 1.3302 + // -- by other 1.3303 + // 1.3304 + 1.3305 + Label IsInflated, DONE_LABEL, PopDone ; 1.3306 + 1.3307 + // TODO: optimize away redundant LDs of obj->mark and improve the markword triage 1.3308 + // order to reduce the number of conditional branches in the most common cases. 1.3309 + // Beware -- there's a subtle invariant that fetch of the markword 1.3310 + // at [FETCH], below, will never observe a biased encoding (*101b). 1.3311 + // If this invariant is not held we risk exclusion (safety) failure. 1.3312 + if (UseBiasedLocking) { 1.3313 + masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters); 1.3314 + } 1.3315 + 1.3316 + masm.movl (tmpReg, Address(objReg, 0)) ; // [FETCH] 1.3317 + masm.testl (tmpReg, 0x02) ; // Inflated v (Stack-locked or neutral) 1.3318 + masm.jccb (Assembler::notZero, IsInflated) ; 1.3319 + 1.3320 + // Attempt stack-locking ... 1.3321 + masm.orl (tmpReg, 0x1); 1.3322 + masm.movl (Address(boxReg, 0), tmpReg); // Anticipate successful CAS 1.3323 + if (os::is_MP()) { masm.lock(); } 1.3324 + masm.cmpxchg(boxReg, Address(objReg, 0)); // Updates tmpReg 1.3325 + if (_counters != NULL) { 1.3326 + masm.cond_inc32(Assembler::equal, 1.3327 + ExternalAddress((address)_counters->fast_path_entry_count_addr())); 1.3328 + } 1.3329 + masm.jccb (Assembler::equal, DONE_LABEL); 1.3330 + 1.3331 + // Recursive locking 1.3332 + masm.subl(tmpReg, rsp); 1.3333 + masm.andl(tmpReg, 0xFFFFF003 ); 1.3334 + masm.movl(Address(boxReg, 0), tmpReg); 1.3335 + if (_counters != NULL) { 1.3336 + masm.cond_inc32(Assembler::equal, 1.3337 + ExternalAddress((address)_counters->fast_path_entry_count_addr())); 1.3338 + } 1.3339 + masm.jmp (DONE_LABEL) ; 1.3340 + 1.3341 + masm.bind (IsInflated) ; 1.3342 + 1.3343 + // The object is inflated. 1.3344 + // 1.3345 + // TODO-FIXME: eliminate the ugly use of manifest constants: 1.3346 + // Use markOopDesc::monitor_value instead of "2". 1.3347 + // use markOop::unused_mark() instead of "3". 1.3348 + // The tmpReg value is an objectMonitor reference ORed with 1.3349 + // markOopDesc::monitor_value (2). We can either convert tmpReg to an 1.3350 + // objectmonitor pointer by masking off the "2" bit or we can just 1.3351 + // use tmpReg as an objectmonitor pointer but bias the objectmonitor 1.3352 + // field offsets with "-2" to compensate for and annul the low-order tag bit. 1.3353 + // 1.3354 + // I use the latter as it avoids AGI stalls. 1.3355 + // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]" 1.3356 + // instead of "mov r, [tmpReg+OFFSETOF(Owner)]". 1.3357 + // 1.3358 + #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2) 1.3359 + 1.3360 + // boxReg refers to the on-stack BasicLock in the current frame. 1.3361 + // We'd like to write: 1.3362 + // set box->_displaced_header = markOop::unused_mark(). Any non-0 value suffices. 1.3363 + // This is convenient but results a ST-before-CAS penalty. The following CAS suffers 1.3364 + // additional latency as we have another ST in the store buffer that must drain. 1.3365 + 1.3366 + if (EmitSync & 8192) { 1.3367 + masm.movl (Address(boxReg, 0), 3) ; // results in ST-before-CAS penalty 1.3368 + masm.get_thread (scrReg) ; 1.3369 + masm.movl (boxReg, tmpReg); // consider: LEA box, [tmp-2] 1.3370 + masm.movl (tmpReg, 0); // consider: xor vs mov 1.3371 + if (os::is_MP()) { masm.lock(); } 1.3372 + masm.cmpxchg (scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 1.3373 + } else 1.3374 + if ((EmitSync & 128) == 0) { // avoid ST-before-CAS 1.3375 + masm.movl (scrReg, boxReg) ; 1.3376 + masm.movl (boxReg, tmpReg); // consider: LEA box, [tmp-2] 1.3377 + 1.3378 + // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes 1.3379 + if ((EmitSync & 2048) && VM_Version::supports_3dnow() && os::is_MP()) { 1.3380 + // prefetchw [eax + Offset(_owner)-2] 1.3381 + masm.emit_raw (0x0F) ; 1.3382 + masm.emit_raw (0x0D) ; 1.3383 + masm.emit_raw (0x48) ; 1.3384 + masm.emit_raw (ObjectMonitor::owner_offset_in_bytes()-2) ; 1.3385 + } 1.3386 + 1.3387 + if ((EmitSync & 64) == 0) { 1.3388 + // Optimistic form: consider XORL tmpReg,tmpReg 1.3389 + masm.movl (tmpReg, 0 ) ; 1.3390 + } else { 1.3391 + // Can suffer RTS->RTO upgrades on shared or cold $ lines 1.3392 + // Test-And-CAS instead of CAS 1.3393 + masm.movl (tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // rax, = m->_owner 1.3394 + masm.testl (tmpReg, tmpReg) ; // Locked ? 1.3395 + masm.jccb (Assembler::notZero, DONE_LABEL) ; 1.3396 + } 1.3397 + 1.3398 + // Appears unlocked - try to swing _owner from null to non-null. 1.3399 + // Ideally, I'd manifest "Self" with get_thread and then attempt 1.3400 + // to CAS the register containing Self into m->Owner. 1.3401 + // But we don't have enough registers, so instead we can either try to CAS 1.3402 + // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds 1.3403 + // we later store "Self" into m->Owner. Transiently storing a stack address 1.3404 + // (rsp or the address of the box) into m->owner is harmless. 1.3405 + // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand. 1.3406 + if (os::is_MP()) { masm.lock(); } 1.3407 + masm.cmpxchg (scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 1.3408 + masm.movl (Address(scrReg, 0), 3) ; // box->_displaced_header = 3 1.3409 + masm.jccb (Assembler::notZero, DONE_LABEL) ; 1.3410 + masm.get_thread (scrReg) ; // beware: clobbers ICCs 1.3411 + masm.movl (Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg) ; 1.3412 + masm.xorl (boxReg, boxReg) ; // set icc.ZFlag = 1 to indicate success 1.3413 + 1.3414 + // If the CAS fails we can either retry or pass control to the slow-path. 1.3415 + // We use the latter tactic. 1.3416 + // Pass the CAS result in the icc.ZFlag into DONE_LABEL 1.3417 + // If the CAS was successful ... 1.3418 + // Self has acquired the lock 1.3419 + // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it. 1.3420 + // Intentional fall-through into DONE_LABEL ... 1.3421 + } else { 1.3422 + masm.movl (Address(boxReg, 0), 3) ; // results in ST-before-CAS penalty 1.3423 + masm.movl (boxReg, tmpReg) ; 1.3424 + 1.3425 + // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes 1.3426 + if ((EmitSync & 2048) && VM_Version::supports_3dnow() && os::is_MP()) { 1.3427 + // prefetchw [eax + Offset(_owner)-2] 1.3428 + masm.emit_raw (0x0F) ; 1.3429 + masm.emit_raw (0x0D) ; 1.3430 + masm.emit_raw (0x48) ; 1.3431 + masm.emit_raw (ObjectMonitor::owner_offset_in_bytes()-2) ; 1.3432 + } 1.3433 + 1.3434 + if ((EmitSync & 64) == 0) { 1.3435 + // Optimistic form 1.3436 + masm.xorl (tmpReg, tmpReg) ; 1.3437 + } else { 1.3438 + // Can suffer RTS->RTO upgrades on shared or cold $ lines 1.3439 + masm.movl (tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // rax, = m->_owner 1.3440 + masm.testl (tmpReg, tmpReg) ; // Locked ? 1.3441 + masm.jccb (Assembler::notZero, DONE_LABEL) ; 1.3442 + } 1.3443 + 1.3444 + // Appears unlocked - try to swing _owner from null to non-null. 1.3445 + // Use either "Self" (in scr) or rsp as thread identity in _owner. 1.3446 + // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand. 1.3447 + masm.get_thread (scrReg) ; 1.3448 + if (os::is_MP()) { masm.lock(); } 1.3449 + masm.cmpxchg (scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 1.3450 + 1.3451 + // If the CAS fails we can either retry or pass control to the slow-path. 1.3452 + // We use the latter tactic. 1.3453 + // Pass the CAS result in the icc.ZFlag into DONE_LABEL 1.3454 + // If the CAS was successful ... 1.3455 + // Self has acquired the lock 1.3456 + // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it. 1.3457 + // Intentional fall-through into DONE_LABEL ... 1.3458 + } 1.3459 + 1.3460 + // DONE_LABEL is a hot target - we'd really like to place it at the 1.3461 + // start of cache line by padding with NOPs. 1.3462 + // See the AMD and Intel software optimization manuals for the 1.3463 + // most efficient "long" NOP encodings. 1.3464 + // Unfortunately none of our alignment mechanisms suffice. 1.3465 + masm.bind(DONE_LABEL); 1.3466 + 1.3467 + // Avoid branch-to-branch on AMD processors 1.3468 + // This appears to be superstition. 1.3469 + if (EmitSync & 32) masm.nop() ; 1.3470 + 1.3471 + 1.3472 + // At DONE_LABEL the icc ZFlag is set as follows ... 1.3473 + // Fast_Unlock uses the same protocol. 1.3474 + // ZFlag == 1 -> Success 1.3475 + // ZFlag == 0 -> Failure - force control through the slow-path 1.3476 + } 1.3477 + %} 1.3478 + 1.3479 + // obj: object to unlock 1.3480 + // box: box address (displaced header location), killed. Must be EAX. 1.3481 + // rbx,: killed tmp; cannot be obj nor box. 1.3482 + // 1.3483 + // Some commentary on balanced locking: 1.3484 + // 1.3485 + // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites. 1.3486 + // Methods that don't have provably balanced locking are forced to run in the 1.3487 + // interpreter - such methods won't be compiled to use fast_lock and fast_unlock. 1.3488 + // The interpreter provides two properties: 1.3489 + // I1: At return-time the interpreter automatically and quietly unlocks any 1.3490 + // objects acquired the current activation (frame). Recall that the 1.3491 + // interpreter maintains an on-stack list of locks currently held by 1.3492 + // a frame. 1.3493 + // I2: If a method attempts to unlock an object that is not held by the 1.3494 + // the frame the interpreter throws IMSX. 1.3495 + // 1.3496 + // Lets say A(), which has provably balanced locking, acquires O and then calls B(). 1.3497 + // B() doesn't have provably balanced locking so it runs in the interpreter. 1.3498 + // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O 1.3499 + // is still locked by A(). 1.3500 + // 1.3501 + // The only other source of unbalanced locking would be JNI. The "Java Native Interface: 1.3502 + // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter 1.3503 + // should not be unlocked by "normal" java-level locking and vice-versa. The specification 1.3504 + // doesn't specify what will occur if a program engages in such mixed-mode locking, however. 1.3505 + 1.3506 + enc_class Fast_Unlock( nabxRegP obj, eAXRegP box, eRegP tmp) %{ 1.3507 + 1.3508 + Register objReg = as_Register($obj$$reg); 1.3509 + Register boxReg = as_Register($box$$reg); 1.3510 + Register tmpReg = as_Register($tmp$$reg); 1.3511 + 1.3512 + guarantee (objReg != boxReg, "") ; 1.3513 + guarantee (objReg != tmpReg, "") ; 1.3514 + guarantee (boxReg != tmpReg, "") ; 1.3515 + guarantee (boxReg == as_Register(EAX_enc), "") ; 1.3516 + MacroAssembler masm(&cbuf); 1.3517 + 1.3518 + if (EmitSync & 4) { 1.3519 + // Disable - inhibit all inlining. Force control through the slow-path 1.3520 + masm.cmpl (rsp, 0) ; 1.3521 + } else 1.3522 + if (EmitSync & 8) { 1.3523 + Label DONE_LABEL ; 1.3524 + if (UseBiasedLocking) { 1.3525 + masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL); 1.3526 + } 1.3527 + // classic stack-locking code ... 1.3528 + masm.movl (tmpReg, Address(boxReg, 0)) ; 1.3529 + masm.testl (tmpReg, tmpReg) ; 1.3530 + masm.jcc (Assembler::zero, DONE_LABEL) ; 1.3531 + if (os::is_MP()) { masm.lock(); } 1.3532 + masm.cmpxchg(tmpReg, Address(objReg, 0)); // Uses EAX which is box 1.3533 + masm.bind(DONE_LABEL); 1.3534 + } else { 1.3535 + Label DONE_LABEL, Stacked, CheckSucc, Inflated ; 1.3536 + 1.3537 + // Critically, the biased locking test must have precedence over 1.3538 + // and appear before the (box->dhw == 0) recursive stack-lock test. 1.3539 + if (UseBiasedLocking) { 1.3540 + masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL); 1.3541 + } 1.3542 + 1.3543 + masm.cmpl (Address(boxReg, 0), 0) ; // Examine the displaced header 1.3544 + masm.movl (tmpReg, Address(objReg, 0)) ; // Examine the object's markword 1.3545 + masm.jccb (Assembler::zero, DONE_LABEL) ; // 0 indicates recursive stack-lock 1.3546 + 1.3547 + masm.testl (tmpReg, 0x02) ; // Inflated? 1.3548 + masm.jccb (Assembler::zero, Stacked) ; 1.3549 + 1.3550 + masm.bind (Inflated) ; 1.3551 + // It's inflated. 1.3552 + // Despite our balanced locking property we still check that m->_owner == Self 1.3553 + // as java routines or native JNI code called by this thread might 1.3554 + // have released the lock. 1.3555 + // Refer to the comments in synchronizer.cpp for how we might encode extra 1.3556 + // state in _succ so we can avoid fetching EntryList|cxq. 1.3557 + // 1.3558 + // I'd like to add more cases in fast_lock() and fast_unlock() -- 1.3559 + // such as recursive enter and exit -- but we have to be wary of 1.3560 + // I$ bloat, T$ effects and BP$ effects. 1.3561 + // 1.3562 + // If there's no contention try a 1-0 exit. That is, exit without 1.3563 + // a costly MEMBAR or CAS. See synchronizer.cpp for details on how 1.3564 + // we detect and recover from the race that the 1-0 exit admits. 1.3565 + // 1.3566 + // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier 1.3567 + // before it STs null into _owner, releasing the lock. Updates 1.3568 + // to data protected by the critical section must be visible before 1.3569 + // we drop the lock (and thus before any other thread could acquire 1.3570 + // the lock and observe the fields protected by the lock). 1.3571 + // IA32's memory-model is SPO, so STs are ordered with respect to 1.3572 + // each other and there's no need for an explicit barrier (fence). 1.3573 + // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html. 1.3574 + 1.3575 + masm.get_thread (boxReg) ; 1.3576 + if ((EmitSync & 4096) && VM_Version::supports_3dnow() && os::is_MP()) { 1.3577 + // prefetchw [ebx + Offset(_owner)-2] 1.3578 + masm.emit_raw (0x0F) ; 1.3579 + masm.emit_raw (0x0D) ; 1.3580 + masm.emit_raw (0x4B) ; 1.3581 + masm.emit_raw (ObjectMonitor::owner_offset_in_bytes()-2) ; 1.3582 + } 1.3583 + 1.3584 + // Note that we could employ various encoding schemes to reduce 1.3585 + // the number of loads below (currently 4) to just 2 or 3. 1.3586 + // Refer to the comments in synchronizer.cpp. 1.3587 + // In practice the chain of fetches doesn't seem to impact performance, however. 1.3588 + if ((EmitSync & 65536) == 0 && (EmitSync & 256)) { 1.3589 + // Attempt to reduce branch density - AMD's branch predictor. 1.3590 + masm.xorl (boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 1.3591 + masm.orl (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ; 1.3592 + masm.orl (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 1.3593 + masm.orl (boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 1.3594 + masm.jccb (Assembler::notZero, DONE_LABEL) ; 1.3595 + masm.movl (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), 0) ; 1.3596 + masm.jmpb (DONE_LABEL) ; 1.3597 + } else { 1.3598 + masm.xorl (boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 1.3599 + masm.orl (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ; 1.3600 + masm.jccb (Assembler::notZero, DONE_LABEL) ; 1.3601 + masm.movl (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 1.3602 + masm.orl (boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 1.3603 + masm.jccb (Assembler::notZero, CheckSucc) ; 1.3604 + masm.movl (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), 0) ; 1.3605 + masm.jmpb (DONE_LABEL) ; 1.3606 + } 1.3607 + 1.3608 + // The Following code fragment (EmitSync & 65536) improves the performance of 1.3609 + // contended applications and contended synchronization microbenchmarks. 1.3610 + // Unfortunately the emission of the code - even though not executed - causes regressions 1.3611 + // in scimark and jetstream, evidently because of $ effects. Replacing the code 1.3612 + // with an equal number of never-executed NOPs results in the same regression. 1.3613 + // We leave it off by default. 1.3614 + 1.3615 + if ((EmitSync & 65536) != 0) { 1.3616 + Label LSuccess, LGoSlowPath ; 1.3617 + 1.3618 + masm.bind (CheckSucc) ; 1.3619 + 1.3620 + // Optional pre-test ... it's safe to elide this 1.3621 + if ((EmitSync & 16) == 0) { 1.3622 + masm.cmpl (Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ; 1.3623 + masm.jccb (Assembler::zero, LGoSlowPath) ; 1.3624 + } 1.3625 + 1.3626 + // We have a classic Dekker-style idiom: 1.3627 + // ST m->_owner = 0 ; MEMBAR; LD m->_succ 1.3628 + // There are a number of ways to implement the barrier: 1.3629 + // (1) lock:andl &m->_owner, 0 1.3630 + // is fast, but mask doesn't currently support the "ANDL M,IMM32" form. 1.3631 + // LOCK: ANDL [ebx+Offset(_Owner)-2], 0 1.3632 + // Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8 1.3633 + // (2) If supported, an explicit MFENCE is appealing. 1.3634 + // In older IA32 processors MFENCE is slower than lock:add or xchg 1.3635 + // particularly if the write-buffer is full as might be the case if 1.3636 + // if stores closely precede the fence or fence-equivalent instruction. 1.3637 + // In more modern implementations MFENCE appears faster, however. 1.3638 + // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack 1.3639 + // The $lines underlying the top-of-stack should be in M-state. 1.3640 + // The locked add instruction is serializing, of course. 1.3641 + // (4) Use xchg, which is serializing 1.3642 + // mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works 1.3643 + // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0. 1.3644 + // The integer condition codes will tell us if succ was 0. 1.3645 + // Since _succ and _owner should reside in the same $line and 1.3646 + // we just stored into _owner, it's likely that the $line 1.3647 + // remains in M-state for the lock:orl. 1.3648 + // 1.3649 + // We currently use (3), although it's likely that switching to (2) 1.3650 + // is correct for the future. 1.3651 + 1.3652 + masm.movl (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), 0) ; 1.3653 + if (os::is_MP()) { 1.3654 + if (VM_Version::supports_sse2() && 1 == FenceInstruction) { 1.3655 + masm.emit_raw (0x0F) ; // MFENCE ... 1.3656 + masm.emit_raw (0xAE) ; 1.3657 + masm.emit_raw (0xF0) ; 1.3658 + } else { 1.3659 + masm.lock () ; masm.addl (Address(rsp, 0), 0) ; 1.3660 + } 1.3661 + } 1.3662 + // Ratify _succ remains non-null 1.3663 + masm.cmpl (Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ; 1.3664 + masm.jccb (Assembler::notZero, LSuccess) ; 1.3665 + 1.3666 + masm.xorl (boxReg, boxReg) ; // box is really EAX 1.3667 + if (os::is_MP()) { masm.lock(); } 1.3668 + masm.cmpxchg(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); 1.3669 + masm.jccb (Assembler::notEqual, LSuccess) ; 1.3670 + // Since we're low on registers we installed rsp as a placeholding in _owner. 1.3671 + // Now install Self over rsp. This is safe as we're transitioning from 1.3672 + // non-null to non=null 1.3673 + masm.get_thread (boxReg) ; 1.3674 + masm.movl (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg) ; 1.3675 + // Intentional fall-through into LGoSlowPath ... 1.3676 + 1.3677 + masm.bind (LGoSlowPath) ; 1.3678 + masm.orl (boxReg, 1) ; // set ICC.ZF=0 to indicate failure 1.3679 + masm.jmpb (DONE_LABEL) ; 1.3680 + 1.3681 + masm.bind (LSuccess) ; 1.3682 + masm.xorl (boxReg, boxReg) ; // set ICC.ZF=1 to indicate success 1.3683 + masm.jmpb (DONE_LABEL) ; 1.3684 + } 1.3685 + 1.3686 + masm.bind (Stacked) ; 1.3687 + // It's not inflated and it's not recursively stack-locked and it's not biased. 1.3688 + // It must be stack-locked. 1.3689 + // Try to reset the header to displaced header. 1.3690 + // The "box" value on the stack is stable, so we can reload 1.3691 + // and be assured we observe the same value as above. 1.3692 + masm.movl (tmpReg, Address(boxReg, 0)) ; 1.3693 + if (os::is_MP()) { masm.lock(); } 1.3694 + masm.cmpxchg(tmpReg, Address(objReg, 0)); // Uses EAX which is box 1.3695 + // Intention fall-thru into DONE_LABEL 1.3696 + 1.3697 + 1.3698 + // DONE_LABEL is a hot target - we'd really like to place it at the 1.3699 + // start of cache line by padding with NOPs. 1.3700 + // See the AMD and Intel software optimization manuals for the 1.3701 + // most efficient "long" NOP encodings. 1.3702 + // Unfortunately none of our alignment mechanisms suffice. 1.3703 + if ((EmitSync & 65536) == 0) { 1.3704 + masm.bind (CheckSucc) ; 1.3705 + } 1.3706 + masm.bind(DONE_LABEL); 1.3707 + 1.3708 + // Avoid branch to branch on AMD processors 1.3709 + if (EmitSync & 32768) { masm.nop() ; } 1.3710 + } 1.3711 + %} 1.3712 + 1.3713 + enc_class enc_String_Compare() %{ 1.3714 + Label ECX_GOOD_LABEL, LENGTH_DIFF_LABEL, 1.3715 + POP_LABEL, DONE_LABEL, CONT_LABEL, 1.3716 + WHILE_HEAD_LABEL; 1.3717 + MacroAssembler masm(&cbuf); 1.3718 + 1.3719 + // Get the first character position in both strings 1.3720 + // [8] char array, [12] offset, [16] count 1.3721 + int value_offset = java_lang_String::value_offset_in_bytes(); 1.3722 + int offset_offset = java_lang_String::offset_offset_in_bytes(); 1.3723 + int count_offset = java_lang_String::count_offset_in_bytes(); 1.3724 + int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); 1.3725 + 1.3726 + masm.movl(rax, Address(rsi, value_offset)); 1.3727 + masm.movl(rcx, Address(rsi, offset_offset)); 1.3728 + masm.leal(rax, Address(rax, rcx, Address::times_2, base_offset)); 1.3729 + masm.movl(rbx, Address(rdi, value_offset)); 1.3730 + masm.movl(rcx, Address(rdi, offset_offset)); 1.3731 + masm.leal(rbx, Address(rbx, rcx, Address::times_2, base_offset)); 1.3732 + 1.3733 + // Compute the minimum of the string lengths(rsi) and the 1.3734 + // difference of the string lengths (stack) 1.3735 + 1.3736 + 1.3737 + if (VM_Version::supports_cmov()) { 1.3738 + masm.movl(rdi, Address(rdi, count_offset)); 1.3739 + masm.movl(rsi, Address(rsi, count_offset)); 1.3740 + masm.movl(rcx, rdi); 1.3741 + masm.subl(rdi, rsi); 1.3742 + masm.pushl(rdi); 1.3743 + masm.cmovl(Assembler::lessEqual, rsi, rcx); 1.3744 + } else { 1.3745 + masm.movl(rdi, Address(rdi, count_offset)); 1.3746 + masm.movl(rcx, Address(rsi, count_offset)); 1.3747 + masm.movl(rsi, rdi); 1.3748 + masm.subl(rdi, rcx); 1.3749 + masm.pushl(rdi); 1.3750 + masm.jcc(Assembler::lessEqual, ECX_GOOD_LABEL); 1.3751 + masm.movl(rsi, rcx); 1.3752 + // rsi holds min, rcx is unused 1.3753 + } 1.3754 + 1.3755 + // Is the minimum length zero? 1.3756 + masm.bind(ECX_GOOD_LABEL); 1.3757 + masm.testl(rsi, rsi); 1.3758 + masm.jcc(Assembler::zero, LENGTH_DIFF_LABEL); 1.3759 + 1.3760 + // Load first characters 1.3761 + masm.load_unsigned_word(rcx, Address(rbx, 0)); 1.3762 + masm.load_unsigned_word(rdi, Address(rax, 0)); 1.3763 + 1.3764 + // Compare first characters 1.3765 + masm.subl(rcx, rdi); 1.3766 + masm.jcc(Assembler::notZero, POP_LABEL); 1.3767 + masm.decrement(rsi); 1.3768 + masm.jcc(Assembler::zero, LENGTH_DIFF_LABEL); 1.3769 + 1.3770 + { 1.3771 + // Check after comparing first character to see if strings are equivalent 1.3772 + Label LSkip2; 1.3773 + // Check if the strings start at same location 1.3774 + masm.cmpl(rbx,rax); 1.3775 + masm.jcc(Assembler::notEqual, LSkip2); 1.3776 + 1.3777 + // Check if the length difference is zero (from stack) 1.3778 + masm.cmpl(Address(rsp, 0), 0x0); 1.3779 + masm.jcc(Assembler::equal, LENGTH_DIFF_LABEL); 1.3780 + 1.3781 + // Strings might not be equivalent 1.3782 + masm.bind(LSkip2); 1.3783 + } 1.3784 + 1.3785 + // Shift rax, and rbx, to the end of the arrays, negate min 1.3786 + masm.leal(rax, Address(rax, rsi, Address::times_2, 2)); 1.3787 + masm.leal(rbx, Address(rbx, rsi, Address::times_2, 2)); 1.3788 + masm.negl(rsi); 1.3789 + 1.3790 + // Compare the rest of the characters 1.3791 + masm.bind(WHILE_HEAD_LABEL); 1.3792 + masm.load_unsigned_word(rcx, Address(rbx, rsi, Address::times_2, 0)); 1.3793 + masm.load_unsigned_word(rdi, Address(rax, rsi, Address::times_2, 0)); 1.3794 + masm.subl(rcx, rdi); 1.3795 + masm.jcc(Assembler::notZero, POP_LABEL); 1.3796 + masm.increment(rsi); 1.3797 + masm.jcc(Assembler::notZero, WHILE_HEAD_LABEL); 1.3798 + 1.3799 + // Strings are equal up to min length. Return the length difference. 1.3800 + masm.bind(LENGTH_DIFF_LABEL); 1.3801 + masm.popl(rcx); 1.3802 + masm.jmp(DONE_LABEL); 1.3803 + 1.3804 + // Discard the stored length difference 1.3805 + masm.bind(POP_LABEL); 1.3806 + masm.addl(rsp, 4); 1.3807 + 1.3808 + // That's it 1.3809 + masm.bind(DONE_LABEL); 1.3810 + %} 1.3811 + 1.3812 + enc_class enc_pop_rdx() %{ 1.3813 + emit_opcode(cbuf,0x5A); 1.3814 + %} 1.3815 + 1.3816 + enc_class enc_rethrow() %{ 1.3817 + cbuf.set_inst_mark(); 1.3818 + emit_opcode(cbuf, 0xE9); // jmp entry 1.3819 + emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.code_end())-4, 1.3820 + runtime_call_Relocation::spec(), RELOC_IMM32 ); 1.3821 + %} 1.3822 + 1.3823 + 1.3824 + // Convert a double to an int. Java semantics require we do complex 1.3825 + // manglelations in the corner cases. So we set the rounding mode to 1.3826 + // 'zero', store the darned double down as an int, and reset the 1.3827 + // rounding mode to 'nearest'. The hardware throws an exception which 1.3828 + // patches up the correct value directly to the stack. 1.3829 + enc_class D2I_encoding( regD src ) %{ 1.3830 + // Flip to round-to-zero mode. We attempted to allow invalid-op 1.3831 + // exceptions here, so that a NAN or other corner-case value will 1.3832 + // thrown an exception (but normal values get converted at full speed). 1.3833 + // However, I2C adapters and other float-stack manglers leave pending 1.3834 + // invalid-op exceptions hanging. We would have to clear them before 1.3835 + // enabling them and that is more expensive than just testing for the 1.3836 + // invalid value Intel stores down in the corner cases. 1.3837 + emit_opcode(cbuf,0xD9); // FLDCW trunc 1.3838 + emit_opcode(cbuf,0x2D); 1.3839 + emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 1.3840 + // Allocate a word 1.3841 + emit_opcode(cbuf,0x83); // SUB ESP,4 1.3842 + emit_opcode(cbuf,0xEC); 1.3843 + emit_d8(cbuf,0x04); 1.3844 + // Encoding assumes a double has been pushed into FPR0. 1.3845 + // Store down the double as an int, popping the FPU stack 1.3846 + emit_opcode(cbuf,0xDB); // FISTP [ESP] 1.3847 + emit_opcode(cbuf,0x1C); 1.3848 + emit_d8(cbuf,0x24); 1.3849 + // Restore the rounding mode; mask the exception 1.3850 + emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 1.3851 + emit_opcode(cbuf,0x2D); 1.3852 + emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 1.3853 + ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 1.3854 + : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 1.3855 + 1.3856 + // Load the converted int; adjust CPU stack 1.3857 + emit_opcode(cbuf,0x58); // POP EAX 1.3858 + emit_opcode(cbuf,0x3D); // CMP EAX,imm 1.3859 + emit_d32 (cbuf,0x80000000); // 0x80000000 1.3860 + emit_opcode(cbuf,0x75); // JNE around_slow_call 1.3861 + emit_d8 (cbuf,0x07); // Size of slow_call 1.3862 + // Push src onto stack slow-path 1.3863 + emit_opcode(cbuf,0xD9 ); // FLD ST(i) 1.3864 + emit_d8 (cbuf,0xC0-1+$src$$reg ); 1.3865 + // CALL directly to the runtime 1.3866 + cbuf.set_inst_mark(); 1.3867 + emit_opcode(cbuf,0xE8); // Call into runtime 1.3868 + emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.code_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 1.3869 + // Carry on here... 1.3870 + %} 1.3871 + 1.3872 + enc_class D2L_encoding( regD src ) %{ 1.3873 + emit_opcode(cbuf,0xD9); // FLDCW trunc 1.3874 + emit_opcode(cbuf,0x2D); 1.3875 + emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 1.3876 + // Allocate a word 1.3877 + emit_opcode(cbuf,0x83); // SUB ESP,8 1.3878 + emit_opcode(cbuf,0xEC); 1.3879 + emit_d8(cbuf,0x08); 1.3880 + // Encoding assumes a double has been pushed into FPR0. 1.3881 + // Store down the double as a long, popping the FPU stack 1.3882 + emit_opcode(cbuf,0xDF); // FISTP [ESP] 1.3883 + emit_opcode(cbuf,0x3C); 1.3884 + emit_d8(cbuf,0x24); 1.3885 + // Restore the rounding mode; mask the exception 1.3886 + emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 1.3887 + emit_opcode(cbuf,0x2D); 1.3888 + emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 1.3889 + ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 1.3890 + : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 1.3891 + 1.3892 + // Load the converted int; adjust CPU stack 1.3893 + emit_opcode(cbuf,0x58); // POP EAX 1.3894 + emit_opcode(cbuf,0x5A); // POP EDX 1.3895 + emit_opcode(cbuf,0x81); // CMP EDX,imm 1.3896 + emit_d8 (cbuf,0xFA); // rdx 1.3897 + emit_d32 (cbuf,0x80000000); // 0x80000000 1.3898 + emit_opcode(cbuf,0x75); // JNE around_slow_call 1.3899 + emit_d8 (cbuf,0x07+4); // Size of slow_call 1.3900 + emit_opcode(cbuf,0x85); // TEST EAX,EAX 1.3901 + emit_opcode(cbuf,0xC0); // 2/rax,/rax, 1.3902 + emit_opcode(cbuf,0x75); // JNE around_slow_call 1.3903 + emit_d8 (cbuf,0x07); // Size of slow_call 1.3904 + // Push src onto stack slow-path 1.3905 + emit_opcode(cbuf,0xD9 ); // FLD ST(i) 1.3906 + emit_d8 (cbuf,0xC0-1+$src$$reg ); 1.3907 + // CALL directly to the runtime 1.3908 + cbuf.set_inst_mark(); 1.3909 + emit_opcode(cbuf,0xE8); // Call into runtime 1.3910 + emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.code_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 1.3911 + // Carry on here... 1.3912 + %} 1.3913 + 1.3914 + enc_class X2L_encoding( regX src ) %{ 1.3915 + // Allocate a word 1.3916 + emit_opcode(cbuf,0x83); // SUB ESP,8 1.3917 + emit_opcode(cbuf,0xEC); 1.3918 + emit_d8(cbuf,0x08); 1.3919 + 1.3920 + emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src 1.3921 + emit_opcode (cbuf, 0x0F ); 1.3922 + emit_opcode (cbuf, 0x11 ); 1.3923 + encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); 1.3924 + 1.3925 + emit_opcode(cbuf,0xD9 ); // FLD_S [ESP] 1.3926 + encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 1.3927 + 1.3928 + emit_opcode(cbuf,0xD9); // FLDCW trunc 1.3929 + emit_opcode(cbuf,0x2D); 1.3930 + emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 1.3931 + 1.3932 + // Encoding assumes a double has been pushed into FPR0. 1.3933 + // Store down the double as a long, popping the FPU stack 1.3934 + emit_opcode(cbuf,0xDF); // FISTP [ESP] 1.3935 + emit_opcode(cbuf,0x3C); 1.3936 + emit_d8(cbuf,0x24); 1.3937 + 1.3938 + // Restore the rounding mode; mask the exception 1.3939 + emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 1.3940 + emit_opcode(cbuf,0x2D); 1.3941 + emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 1.3942 + ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 1.3943 + : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 1.3944 + 1.3945 + // Load the converted int; adjust CPU stack 1.3946 + emit_opcode(cbuf,0x58); // POP EAX 1.3947 + 1.3948 + emit_opcode(cbuf,0x5A); // POP EDX 1.3949 + 1.3950 + emit_opcode(cbuf,0x81); // CMP EDX,imm 1.3951 + emit_d8 (cbuf,0xFA); // rdx 1.3952 + emit_d32 (cbuf,0x80000000);// 0x80000000 1.3953 + 1.3954 + emit_opcode(cbuf,0x75); // JNE around_slow_call 1.3955 + emit_d8 (cbuf,0x13+4); // Size of slow_call 1.3956 + 1.3957 + emit_opcode(cbuf,0x85); // TEST EAX,EAX 1.3958 + emit_opcode(cbuf,0xC0); // 2/rax,/rax, 1.3959 + 1.3960 + emit_opcode(cbuf,0x75); // JNE around_slow_call 1.3961 + emit_d8 (cbuf,0x13); // Size of slow_call 1.3962 + 1.3963 + // Allocate a word 1.3964 + emit_opcode(cbuf,0x83); // SUB ESP,4 1.3965 + emit_opcode(cbuf,0xEC); 1.3966 + emit_d8(cbuf,0x04); 1.3967 + 1.3968 + emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src 1.3969 + emit_opcode (cbuf, 0x0F ); 1.3970 + emit_opcode (cbuf, 0x11 ); 1.3971 + encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); 1.3972 + 1.3973 + emit_opcode(cbuf,0xD9 ); // FLD_S [ESP] 1.3974 + encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 1.3975 + 1.3976 + emit_opcode(cbuf,0x83); // ADD ESP,4 1.3977 + emit_opcode(cbuf,0xC4); 1.3978 + emit_d8(cbuf,0x04); 1.3979 + 1.3980 + // CALL directly to the runtime 1.3981 + cbuf.set_inst_mark(); 1.3982 + emit_opcode(cbuf,0xE8); // Call into runtime 1.3983 + emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.code_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 1.3984 + // Carry on here... 1.3985 + %} 1.3986 + 1.3987 + enc_class XD2L_encoding( regXD src ) %{ 1.3988 + // Allocate a word 1.3989 + emit_opcode(cbuf,0x83); // SUB ESP,8 1.3990 + emit_opcode(cbuf,0xEC); 1.3991 + emit_d8(cbuf,0x08); 1.3992 + 1.3993 + emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src 1.3994 + emit_opcode (cbuf, 0x0F ); 1.3995 + emit_opcode (cbuf, 0x11 ); 1.3996 + encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); 1.3997 + 1.3998 + emit_opcode(cbuf,0xDD ); // FLD_D [ESP] 1.3999 + encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 1.4000 + 1.4001 + emit_opcode(cbuf,0xD9); // FLDCW trunc 1.4002 + emit_opcode(cbuf,0x2D); 1.4003 + emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 1.4004 + 1.4005 + // Encoding assumes a double has been pushed into FPR0. 1.4006 + // Store down the double as a long, popping the FPU stack 1.4007 + emit_opcode(cbuf,0xDF); // FISTP [ESP] 1.4008 + emit_opcode(cbuf,0x3C); 1.4009 + emit_d8(cbuf,0x24); 1.4010 + 1.4011 + // Restore the rounding mode; mask the exception 1.4012 + emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 1.4013 + emit_opcode(cbuf,0x2D); 1.4014 + emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 1.4015 + ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 1.4016 + : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 1.4017 + 1.4018 + // Load the converted int; adjust CPU stack 1.4019 + emit_opcode(cbuf,0x58); // POP EAX 1.4020 + 1.4021 + emit_opcode(cbuf,0x5A); // POP EDX 1.4022 + 1.4023 + emit_opcode(cbuf,0x81); // CMP EDX,imm 1.4024 + emit_d8 (cbuf,0xFA); // rdx 1.4025 + emit_d32 (cbuf,0x80000000); // 0x80000000 1.4026 + 1.4027 + emit_opcode(cbuf,0x75); // JNE around_slow_call 1.4028 + emit_d8 (cbuf,0x13+4); // Size of slow_call 1.4029 + 1.4030 + emit_opcode(cbuf,0x85); // TEST EAX,EAX 1.4031 + emit_opcode(cbuf,0xC0); // 2/rax,/rax, 1.4032 + 1.4033 + emit_opcode(cbuf,0x75); // JNE around_slow_call 1.4034 + emit_d8 (cbuf,0x13); // Size of slow_call 1.4035 + 1.4036 + // Push src onto stack slow-path 1.4037 + // Allocate a word 1.4038 + emit_opcode(cbuf,0x83); // SUB ESP,8 1.4039 + emit_opcode(cbuf,0xEC); 1.4040 + emit_d8(cbuf,0x08); 1.4041 + 1.4042 + emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src 1.4043 + emit_opcode (cbuf, 0x0F ); 1.4044 + emit_opcode (cbuf, 0x11 ); 1.4045 + encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); 1.4046 + 1.4047 + emit_opcode(cbuf,0xDD ); // FLD_D [ESP] 1.4048 + encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 1.4049 + 1.4050 + emit_opcode(cbuf,0x83); // ADD ESP,8 1.4051 + emit_opcode(cbuf,0xC4); 1.4052 + emit_d8(cbuf,0x08); 1.4053 + 1.4054 + // CALL directly to the runtime 1.4055 + cbuf.set_inst_mark(); 1.4056 + emit_opcode(cbuf,0xE8); // Call into runtime 1.4057 + emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.code_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 1.4058 + // Carry on here... 1.4059 + %} 1.4060 + 1.4061 + enc_class D2X_encoding( regX dst, regD src ) %{ 1.4062 + // Allocate a word 1.4063 + emit_opcode(cbuf,0x83); // SUB ESP,4 1.4064 + emit_opcode(cbuf,0xEC); 1.4065 + emit_d8(cbuf,0x04); 1.4066 + int pop = 0x02; 1.4067 + if ($src$$reg != FPR1L_enc) { 1.4068 + emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 1.4069 + emit_d8( cbuf, 0xC0-1+$src$$reg ); 1.4070 + pop = 0x03; 1.4071 + } 1.4072 + store_to_stackslot( cbuf, 0xD9, pop, 0 ); // FST<P>_S [ESP] 1.4073 + 1.4074 + emit_opcode (cbuf, 0xF3 ); // MOVSS dst(xmm), [ESP] 1.4075 + emit_opcode (cbuf, 0x0F ); 1.4076 + emit_opcode (cbuf, 0x10 ); 1.4077 + encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false); 1.4078 + 1.4079 + emit_opcode(cbuf,0x83); // ADD ESP,4 1.4080 + emit_opcode(cbuf,0xC4); 1.4081 + emit_d8(cbuf,0x04); 1.4082 + // Carry on here... 1.4083 + %} 1.4084 + 1.4085 + enc_class FX2I_encoding( regX src, eRegI dst ) %{ 1.4086 + emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1.4087 + 1.4088 + // Compare the result to see if we need to go to the slow path 1.4089 + emit_opcode(cbuf,0x81); // CMP dst,imm 1.4090 + emit_rm (cbuf,0x3,0x7,$dst$$reg); 1.4091 + emit_d32 (cbuf,0x80000000); // 0x80000000 1.4092 + 1.4093 + emit_opcode(cbuf,0x75); // JNE around_slow_call 1.4094 + emit_d8 (cbuf,0x13); // Size of slow_call 1.4095 + // Store xmm to a temp memory 1.4096 + // location and push it onto stack. 1.4097 + 1.4098 + emit_opcode(cbuf,0x83); // SUB ESP,4 1.4099 + emit_opcode(cbuf,0xEC); 1.4100 + emit_d8(cbuf, $primary ? 0x8 : 0x4); 1.4101 + 1.4102 + emit_opcode (cbuf, $primary ? 0xF2 : 0xF3 ); // MOVSS [ESP], xmm 1.4103 + emit_opcode (cbuf, 0x0F ); 1.4104 + emit_opcode (cbuf, 0x11 ); 1.4105 + encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); 1.4106 + 1.4107 + emit_opcode(cbuf, $primary ? 0xDD : 0xD9 ); // FLD [ESP] 1.4108 + encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 1.4109 + 1.4110 + emit_opcode(cbuf,0x83); // ADD ESP,4 1.4111 + emit_opcode(cbuf,0xC4); 1.4112 + emit_d8(cbuf, $primary ? 0x8 : 0x4); 1.4113 + 1.4114 + // CALL directly to the runtime 1.4115 + cbuf.set_inst_mark(); 1.4116 + emit_opcode(cbuf,0xE8); // Call into runtime 1.4117 + emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.code_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 1.4118 + 1.4119 + // Carry on here... 1.4120 + %} 1.4121 + 1.4122 + enc_class X2D_encoding( regD dst, regX src ) %{ 1.4123 + // Allocate a word 1.4124 + emit_opcode(cbuf,0x83); // SUB ESP,4 1.4125 + emit_opcode(cbuf,0xEC); 1.4126 + emit_d8(cbuf,0x04); 1.4127 + 1.4128 + emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], xmm 1.4129 + emit_opcode (cbuf, 0x0F ); 1.4130 + emit_opcode (cbuf, 0x11 ); 1.4131 + encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); 1.4132 + 1.4133 + emit_opcode(cbuf,0xD9 ); // FLD_S [ESP] 1.4134 + encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 1.4135 + 1.4136 + emit_opcode(cbuf,0x83); // ADD ESP,4 1.4137 + emit_opcode(cbuf,0xC4); 1.4138 + emit_d8(cbuf,0x04); 1.4139 + 1.4140 + // Carry on here... 1.4141 + %} 1.4142 + 1.4143 + enc_class AbsXF_encoding(regX dst) %{ 1.4144 + address signmask_address=(address)float_signmask_pool; 1.4145 + // andpd:\tANDPS $dst,[signconst] 1.4146 + emit_opcode(cbuf, 0x0F); 1.4147 + emit_opcode(cbuf, 0x54); 1.4148 + emit_rm(cbuf, 0x0, $dst$$reg, 0x5); 1.4149 + emit_d32(cbuf, (int)signmask_address); 1.4150 + %} 1.4151 + 1.4152 + enc_class AbsXD_encoding(regXD dst) %{ 1.4153 + address signmask_address=(address)double_signmask_pool; 1.4154 + // andpd:\tANDPD $dst,[signconst] 1.4155 + emit_opcode(cbuf, 0x66); 1.4156 + emit_opcode(cbuf, 0x0F); 1.4157 + emit_opcode(cbuf, 0x54); 1.4158 + emit_rm(cbuf, 0x0, $dst$$reg, 0x5); 1.4159 + emit_d32(cbuf, (int)signmask_address); 1.4160 + %} 1.4161 + 1.4162 + enc_class NegXF_encoding(regX dst) %{ 1.4163 + address signmask_address=(address)float_signflip_pool; 1.4164 + // andpd:\tXORPS $dst,[signconst] 1.4165 + emit_opcode(cbuf, 0x0F); 1.4166 + emit_opcode(cbuf, 0x57); 1.4167 + emit_rm(cbuf, 0x0, $dst$$reg, 0x5); 1.4168 + emit_d32(cbuf, (int)signmask_address); 1.4169 + %} 1.4170 + 1.4171 + enc_class NegXD_encoding(regXD dst) %{ 1.4172 + address signmask_address=(address)double_signflip_pool; 1.4173 + // andpd:\tXORPD $dst,[signconst] 1.4174 + emit_opcode(cbuf, 0x66); 1.4175 + emit_opcode(cbuf, 0x0F); 1.4176 + emit_opcode(cbuf, 0x57); 1.4177 + emit_rm(cbuf, 0x0, $dst$$reg, 0x5); 1.4178 + emit_d32(cbuf, (int)signmask_address); 1.4179 + %} 1.4180 + 1.4181 + enc_class FMul_ST_reg( eRegF src1 ) %{ 1.4182 + // Operand was loaded from memory into fp ST (stack top) 1.4183 + // FMUL ST,$src /* D8 C8+i */ 1.4184 + emit_opcode(cbuf, 0xD8); 1.4185 + emit_opcode(cbuf, 0xC8 + $src1$$reg); 1.4186 + %} 1.4187 + 1.4188 + enc_class FAdd_ST_reg( eRegF src2 ) %{ 1.4189 + // FADDP ST,src2 /* D8 C0+i */ 1.4190 + emit_opcode(cbuf, 0xD8); 1.4191 + emit_opcode(cbuf, 0xC0 + $src2$$reg); 1.4192 + //could use FADDP src2,fpST /* DE C0+i */ 1.4193 + %} 1.4194 + 1.4195 + enc_class FAddP_reg_ST( eRegF src2 ) %{ 1.4196 + // FADDP src2,ST /* DE C0+i */ 1.4197 + emit_opcode(cbuf, 0xDE); 1.4198 + emit_opcode(cbuf, 0xC0 + $src2$$reg); 1.4199 + %} 1.4200 + 1.4201 + enc_class subF_divF_encode( eRegF src1, eRegF src2) %{ 1.4202 + // Operand has been loaded into fp ST (stack top) 1.4203 + // FSUB ST,$src1 1.4204 + emit_opcode(cbuf, 0xD8); 1.4205 + emit_opcode(cbuf, 0xE0 + $src1$$reg); 1.4206 + 1.4207 + // FDIV 1.4208 + emit_opcode(cbuf, 0xD8); 1.4209 + emit_opcode(cbuf, 0xF0 + $src2$$reg); 1.4210 + %} 1.4211 + 1.4212 + enc_class MulFAddF (eRegF src1, eRegF src2) %{ 1.4213 + // Operand was loaded from memory into fp ST (stack top) 1.4214 + // FADD ST,$src /* D8 C0+i */ 1.4215 + emit_opcode(cbuf, 0xD8); 1.4216 + emit_opcode(cbuf, 0xC0 + $src1$$reg); 1.4217 + 1.4218 + // FMUL ST,src2 /* D8 C*+i */ 1.4219 + emit_opcode(cbuf, 0xD8); 1.4220 + emit_opcode(cbuf, 0xC8 + $src2$$reg); 1.4221 + %} 1.4222 + 1.4223 + 1.4224 + enc_class MulFAddFreverse (eRegF src1, eRegF src2) %{ 1.4225 + // Operand was loaded from memory into fp ST (stack top) 1.4226 + // FADD ST,$src /* D8 C0+i */ 1.4227 + emit_opcode(cbuf, 0xD8); 1.4228 + emit_opcode(cbuf, 0xC0 + $src1$$reg); 1.4229 + 1.4230 + // FMULP src2,ST /* DE C8+i */ 1.4231 + emit_opcode(cbuf, 0xDE); 1.4232 + emit_opcode(cbuf, 0xC8 + $src2$$reg); 1.4233 + %} 1.4234 + 1.4235 + enc_class enc_membar_acquire %{ 1.4236 + // Doug Lea believes this is not needed with current Sparcs and TSO. 1.4237 + // MacroAssembler masm(&cbuf); 1.4238 + // masm.membar(); 1.4239 + %} 1.4240 + 1.4241 + enc_class enc_membar_release %{ 1.4242 + // Doug Lea believes this is not needed with current Sparcs and TSO. 1.4243 + // MacroAssembler masm(&cbuf); 1.4244 + // masm.membar(); 1.4245 + %} 1.4246 + 1.4247 + enc_class enc_membar_volatile %{ 1.4248 + MacroAssembler masm(&cbuf); 1.4249 + masm.membar(); 1.4250 + %} 1.4251 + 1.4252 + // Atomically load the volatile long 1.4253 + enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 1.4254 + emit_opcode(cbuf,0xDF); 1.4255 + int rm_byte_opcode = 0x05; 1.4256 + int base = $mem$$base; 1.4257 + int index = $mem$$index; 1.4258 + int scale = $mem$$scale; 1.4259 + int displace = $mem$$disp; 1.4260 + bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals 1.4261 + encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop); 1.4262 + store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 1.4263 + %} 1.4264 + 1.4265 + enc_class enc_loadLX_volatile( memory mem, stackSlotL dst, regXD tmp ) %{ 1.4266 + { // Atomic long load 1.4267 + // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem 1.4268 + emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66); 1.4269 + emit_opcode(cbuf,0x0F); 1.4270 + emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12); 1.4271 + int base = $mem$$base; 1.4272 + int index = $mem$$index; 1.4273 + int scale = $mem$$scale; 1.4274 + int displace = $mem$$disp; 1.4275 + bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals 1.4276 + encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); 1.4277 + } 1.4278 + { // MOVSD $dst,$tmp ! atomic long store 1.4279 + emit_opcode(cbuf,0xF2); 1.4280 + emit_opcode(cbuf,0x0F); 1.4281 + emit_opcode(cbuf,0x11); 1.4282 + int base = $dst$$base; 1.4283 + int index = $dst$$index; 1.4284 + int scale = $dst$$scale; 1.4285 + int displace = $dst$$disp; 1.4286 + bool disp_is_oop = $dst->disp_is_oop(); // disp-as-oop when working with static globals 1.4287 + encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); 1.4288 + } 1.4289 + %} 1.4290 + 1.4291 + enc_class enc_loadLX_reg_volatile( memory mem, eRegL dst, regXD tmp ) %{ 1.4292 + { // Atomic long load 1.4293 + // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem 1.4294 + emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66); 1.4295 + emit_opcode(cbuf,0x0F); 1.4296 + emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12); 1.4297 + int base = $mem$$base; 1.4298 + int index = $mem$$index; 1.4299 + int scale = $mem$$scale; 1.4300 + int displace = $mem$$disp; 1.4301 + bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals 1.4302 + encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); 1.4303 + } 1.4304 + { // MOVD $dst.lo,$tmp 1.4305 + emit_opcode(cbuf,0x66); 1.4306 + emit_opcode(cbuf,0x0F); 1.4307 + emit_opcode(cbuf,0x7E); 1.4308 + emit_rm(cbuf, 0x3, $tmp$$reg, $dst$$reg); 1.4309 + } 1.4310 + { // PSRLQ $tmp,32 1.4311 + emit_opcode(cbuf,0x66); 1.4312 + emit_opcode(cbuf,0x0F); 1.4313 + emit_opcode(cbuf,0x73); 1.4314 + emit_rm(cbuf, 0x3, 0x02, $tmp$$reg); 1.4315 + emit_d8(cbuf, 0x20); 1.4316 + } 1.4317 + { // MOVD $dst.hi,$tmp 1.4318 + emit_opcode(cbuf,0x66); 1.4319 + emit_opcode(cbuf,0x0F); 1.4320 + emit_opcode(cbuf,0x7E); 1.4321 + emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg)); 1.4322 + } 1.4323 + %} 1.4324 + 1.4325 + // Volatile Store Long. Must be atomic, so move it into 1.4326 + // the FP TOS and then do a 64-bit FIST. Has to probe the 1.4327 + // target address before the store (for null-ptr checks) 1.4328 + // so the memory operand is used twice in the encoding. 1.4329 + enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 1.4330 + store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 1.4331 + cbuf.set_inst_mark(); // Mark start of FIST in case $mem has an oop 1.4332 + emit_opcode(cbuf,0xDF); 1.4333 + int rm_byte_opcode = 0x07; 1.4334 + int base = $mem$$base; 1.4335 + int index = $mem$$index; 1.4336 + int scale = $mem$$scale; 1.4337 + int displace = $mem$$disp; 1.4338 + bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals 1.4339 + encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop); 1.4340 + %} 1.4341 + 1.4342 + enc_class enc_storeLX_volatile( memory mem, stackSlotL src, regXD tmp) %{ 1.4343 + { // Atomic long load 1.4344 + // UseXmmLoadAndClearUpper ? movsd $tmp,[$src] : movlpd $tmp,[$src] 1.4345 + emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66); 1.4346 + emit_opcode(cbuf,0x0F); 1.4347 + emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12); 1.4348 + int base = $src$$base; 1.4349 + int index = $src$$index; 1.4350 + int scale = $src$$scale; 1.4351 + int displace = $src$$disp; 1.4352 + bool disp_is_oop = $src->disp_is_oop(); // disp-as-oop when working with static globals 1.4353 + encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); 1.4354 + } 1.4355 + cbuf.set_inst_mark(); // Mark start of MOVSD in case $mem has an oop 1.4356 + { // MOVSD $mem,$tmp ! atomic long store 1.4357 + emit_opcode(cbuf,0xF2); 1.4358 + emit_opcode(cbuf,0x0F); 1.4359 + emit_opcode(cbuf,0x11); 1.4360 + int base = $mem$$base; 1.4361 + int index = $mem$$index; 1.4362 + int scale = $mem$$scale; 1.4363 + int displace = $mem$$disp; 1.4364 + bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals 1.4365 + encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); 1.4366 + } 1.4367 + %} 1.4368 + 1.4369 + enc_class enc_storeLX_reg_volatile( memory mem, eRegL src, regXD tmp, regXD tmp2) %{ 1.4370 + { // MOVD $tmp,$src.lo 1.4371 + emit_opcode(cbuf,0x66); 1.4372 + emit_opcode(cbuf,0x0F); 1.4373 + emit_opcode(cbuf,0x6E); 1.4374 + emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 1.4375 + } 1.4376 + { // MOVD $tmp2,$src.hi 1.4377 + emit_opcode(cbuf,0x66); 1.4378 + emit_opcode(cbuf,0x0F); 1.4379 + emit_opcode(cbuf,0x6E); 1.4380 + emit_rm(cbuf, 0x3, $tmp2$$reg, HIGH_FROM_LOW($src$$reg)); 1.4381 + } 1.4382 + { // PUNPCKLDQ $tmp,$tmp2 1.4383 + emit_opcode(cbuf,0x66); 1.4384 + emit_opcode(cbuf,0x0F); 1.4385 + emit_opcode(cbuf,0x62); 1.4386 + emit_rm(cbuf, 0x3, $tmp$$reg, $tmp2$$reg); 1.4387 + } 1.4388 + cbuf.set_inst_mark(); // Mark start of MOVSD in case $mem has an oop 1.4389 + { // MOVSD $mem,$tmp ! atomic long store 1.4390 + emit_opcode(cbuf,0xF2); 1.4391 + emit_opcode(cbuf,0x0F); 1.4392 + emit_opcode(cbuf,0x11); 1.4393 + int base = $mem$$base; 1.4394 + int index = $mem$$index; 1.4395 + int scale = $mem$$scale; 1.4396 + int displace = $mem$$disp; 1.4397 + bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals 1.4398 + encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); 1.4399 + } 1.4400 + %} 1.4401 + 1.4402 + // Safepoint Poll. This polls the safepoint page, and causes an 1.4403 + // exception if it is not readable. Unfortunately, it kills the condition code 1.4404 + // in the process 1.4405 + // We current use TESTL [spp],EDI 1.4406 + // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0 1.4407 + 1.4408 + enc_class Safepoint_Poll() %{ 1.4409 + cbuf.relocate(cbuf.inst_mark(), relocInfo::poll_type, 0); 1.4410 + emit_opcode(cbuf,0x85); 1.4411 + emit_rm (cbuf, 0x0, 0x7, 0x5); 1.4412 + emit_d32(cbuf, (intptr_t)os::get_polling_page()); 1.4413 + %} 1.4414 +%} 1.4415 + 1.4416 + 1.4417 +//----------FRAME-------------------------------------------------------------- 1.4418 +// Definition of frame structure and management information. 1.4419 +// 1.4420 +// S T A C K L A Y O U T Allocators stack-slot number 1.4421 +// | (to get allocators register number 1.4422 +// G Owned by | | v add OptoReg::stack0()) 1.4423 +// r CALLER | | 1.4424 +// o | +--------+ pad to even-align allocators stack-slot 1.4425 +// w V | pad0 | numbers; owned by CALLER 1.4426 +// t -----------+--------+----> Matcher::_in_arg_limit, unaligned 1.4427 +// h ^ | in | 5 1.4428 +// | | args | 4 Holes in incoming args owned by SELF 1.4429 +// | | | | 3 1.4430 +// | | +--------+ 1.4431 +// V | | old out| Empty on Intel, window on Sparc 1.4432 +// | old |preserve| Must be even aligned. 1.4433 +// | SP-+--------+----> Matcher::_old_SP, even aligned 1.4434 +// | | in | 3 area for Intel ret address 1.4435 +// Owned by |preserve| Empty on Sparc. 1.4436 +// SELF +--------+ 1.4437 +// | | pad2 | 2 pad to align old SP 1.4438 +// | +--------+ 1 1.4439 +// | | locks | 0 1.4440 +// | +--------+----> OptoReg::stack0(), even aligned 1.4441 +// | | pad1 | 11 pad to align new SP 1.4442 +// | +--------+ 1.4443 +// | | | 10 1.4444 +// | | spills | 9 spills 1.4445 +// V | | 8 (pad0 slot for callee) 1.4446 +// -----------+--------+----> Matcher::_out_arg_limit, unaligned 1.4447 +// ^ | out | 7 1.4448 +// | | args | 6 Holes in outgoing args owned by CALLEE 1.4449 +// Owned by +--------+ 1.4450 +// CALLEE | new out| 6 Empty on Intel, window on Sparc 1.4451 +// | new |preserve| Must be even-aligned. 1.4452 +// | SP-+--------+----> Matcher::_new_SP, even aligned 1.4453 +// | | | 1.4454 +// 1.4455 +// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 1.4456 +// known from SELF's arguments and the Java calling convention. 1.4457 +// Region 6-7 is determined per call site. 1.4458 +// Note 2: If the calling convention leaves holes in the incoming argument 1.4459 +// area, those holes are owned by SELF. Holes in the outgoing area 1.4460 +// are owned by the CALLEE. Holes should not be nessecary in the 1.4461 +// incoming area, as the Java calling convention is completely under 1.4462 +// the control of the AD file. Doubles can be sorted and packed to 1.4463 +// avoid holes. Holes in the outgoing arguments may be nessecary for 1.4464 +// varargs C calling conventions. 1.4465 +// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 1.4466 +// even aligned with pad0 as needed. 1.4467 +// Region 6 is even aligned. Region 6-7 is NOT even aligned; 1.4468 +// region 6-11 is even aligned; it may be padded out more so that 1.4469 +// the region from SP to FP meets the minimum stack alignment. 1.4470 + 1.4471 +frame %{ 1.4472 + // What direction does stack grow in (assumed to be same for C & Java) 1.4473 + stack_direction(TOWARDS_LOW); 1.4474 + 1.4475 + // These three registers define part of the calling convention 1.4476 + // between compiled code and the interpreter. 1.4477 + inline_cache_reg(EAX); // Inline Cache Register 1.4478 + interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter 1.4479 + 1.4480 + // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 1.4481 + cisc_spilling_operand_name(indOffset32); 1.4482 + 1.4483 + // Number of stack slots consumed by locking an object 1.4484 + sync_stack_slots(1); 1.4485 + 1.4486 + // Compiled code's Frame Pointer 1.4487 + frame_pointer(ESP); 1.4488 + // Interpreter stores its frame pointer in a register which is 1.4489 + // stored to the stack by I2CAdaptors. 1.4490 + // I2CAdaptors convert from interpreted java to compiled java. 1.4491 + interpreter_frame_pointer(EBP); 1.4492 + 1.4493 + // Stack alignment requirement 1.4494 + // Alignment size in bytes (128-bit -> 16 bytes) 1.4495 + stack_alignment(StackAlignmentInBytes); 1.4496 + 1.4497 + // Number of stack slots between incoming argument block and the start of 1.4498 + // a new frame. The PROLOG must add this many slots to the stack. The 1.4499 + // EPILOG must remove this many slots. Intel needs one slot for 1.4500 + // return address and one for rbp, (must save rbp) 1.4501 + in_preserve_stack_slots(2+VerifyStackAtCalls); 1.4502 + 1.4503 + // Number of outgoing stack slots killed above the out_preserve_stack_slots 1.4504 + // for calls to C. Supports the var-args backing area for register parms. 1.4505 + varargs_C_out_slots_killed(0); 1.4506 + 1.4507 + // The after-PROLOG location of the return address. Location of 1.4508 + // return address specifies a type (REG or STACK) and a number 1.4509 + // representing the register number (i.e. - use a register name) or 1.4510 + // stack slot. 1.4511 + // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 1.4512 + // Otherwise, it is above the locks and verification slot and alignment word 1.4513 + return_addr(STACK - 1 + 1.4514 + round_to(1+VerifyStackAtCalls+ 1.4515 + Compile::current()->fixed_slots(), 1.4516 + (StackAlignmentInBytes/wordSize))); 1.4517 + 1.4518 + // Body of function which returns an integer array locating 1.4519 + // arguments either in registers or in stack slots. Passed an array 1.4520 + // of ideal registers called "sig" and a "length" count. Stack-slot 1.4521 + // offsets are based on outgoing arguments, i.e. a CALLER setting up 1.4522 + // arguments for a CALLEE. Incoming stack arguments are 1.4523 + // automatically biased by the preserve_stack_slots field above. 1.4524 + calling_convention %{ 1.4525 + // No difference between ingoing/outgoing just pass false 1.4526 + SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 1.4527 + %} 1.4528 + 1.4529 + 1.4530 + // Body of function which returns an integer array locating 1.4531 + // arguments either in registers or in stack slots. Passed an array 1.4532 + // of ideal registers called "sig" and a "length" count. Stack-slot 1.4533 + // offsets are based on outgoing arguments, i.e. a CALLER setting up 1.4534 + // arguments for a CALLEE. Incoming stack arguments are 1.4535 + // automatically biased by the preserve_stack_slots field above. 1.4536 + c_calling_convention %{ 1.4537 + // This is obviously always outgoing 1.4538 + (void) SharedRuntime::c_calling_convention(sig_bt, regs, length); 1.4539 + %} 1.4540 + 1.4541 + // Location of C & interpreter return values 1.4542 + c_return_value %{ 1.4543 + assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 1.4544 + static int lo[Op_RegL+1] = { 0, 0, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 1.4545 + static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 1.4546 + 1.4547 + // in SSE2+ mode we want to keep the FPU stack clean so pretend 1.4548 + // that C functions return float and double results in XMM0. 1.4549 + if( ideal_reg == Op_RegD && UseSSE>=2 ) 1.4550 + return OptoRegPair(XMM0b_num,XMM0a_num); 1.4551 + if( ideal_reg == Op_RegF && UseSSE>=2 ) 1.4552 + return OptoRegPair(OptoReg::Bad,XMM0a_num); 1.4553 + 1.4554 + return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 1.4555 + %} 1.4556 + 1.4557 + // Location of return values 1.4558 + return_value %{ 1.4559 + assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 1.4560 + static int lo[Op_RegL+1] = { 0, 0, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 1.4561 + static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 1.4562 + if( ideal_reg == Op_RegD && UseSSE>=2 ) 1.4563 + return OptoRegPair(XMM0b_num,XMM0a_num); 1.4564 + if( ideal_reg == Op_RegF && UseSSE>=1 ) 1.4565 + return OptoRegPair(OptoReg::Bad,XMM0a_num); 1.4566 + return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 1.4567 + %} 1.4568 + 1.4569 +%} 1.4570 + 1.4571 +//----------ATTRIBUTES--------------------------------------------------------- 1.4572 +//----------Operand Attributes------------------------------------------------- 1.4573 +op_attrib op_cost(0); // Required cost attribute 1.4574 + 1.4575 +//----------Instruction Attributes--------------------------------------------- 1.4576 +ins_attrib ins_cost(100); // Required cost attribute 1.4577 +ins_attrib ins_size(8); // Required size attribute (in bits) 1.4578 +ins_attrib ins_pc_relative(0); // Required PC Relative flag 1.4579 +ins_attrib ins_short_branch(0); // Required flag: is this instruction a 1.4580 + // non-matching short branch variant of some 1.4581 + // long branch? 1.4582 +ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 1.4583 + // specifies the alignment that some part of the instruction (not 1.4584 + // necessarily the start) requires. If > 1, a compute_padding() 1.4585 + // function must be provided for the instruction 1.4586 + 1.4587 +//----------OPERANDS----------------------------------------------------------- 1.4588 +// Operand definitions must precede instruction definitions for correct parsing 1.4589 +// in the ADLC because operands constitute user defined types which are used in 1.4590 +// instruction definitions. 1.4591 + 1.4592 +//----------Simple Operands---------------------------------------------------- 1.4593 +// Immediate Operands 1.4594 +// Integer Immediate 1.4595 +operand immI() %{ 1.4596 + match(ConI); 1.4597 + 1.4598 + op_cost(10); 1.4599 + format %{ %} 1.4600 + interface(CONST_INTER); 1.4601 +%} 1.4602 + 1.4603 +// Constant for test vs zero 1.4604 +operand immI0() %{ 1.4605 + predicate(n->get_int() == 0); 1.4606 + match(ConI); 1.4607 + 1.4608 + op_cost(0); 1.4609 + format %{ %} 1.4610 + interface(CONST_INTER); 1.4611 +%} 1.4612 + 1.4613 +// Constant for increment 1.4614 +operand immI1() %{ 1.4615 + predicate(n->get_int() == 1); 1.4616 + match(ConI); 1.4617 + 1.4618 + op_cost(0); 1.4619 + format %{ %} 1.4620 + interface(CONST_INTER); 1.4621 +%} 1.4622 + 1.4623 +// Constant for decrement 1.4624 +operand immI_M1() %{ 1.4625 + predicate(n->get_int() == -1); 1.4626 + match(ConI); 1.4627 + 1.4628 + op_cost(0); 1.4629 + format %{ %} 1.4630 + interface(CONST_INTER); 1.4631 +%} 1.4632 + 1.4633 +// Valid scale values for addressing modes 1.4634 +operand immI2() %{ 1.4635 + predicate(0 <= n->get_int() && (n->get_int() <= 3)); 1.4636 + match(ConI); 1.4637 + 1.4638 + format %{ %} 1.4639 + interface(CONST_INTER); 1.4640 +%} 1.4641 + 1.4642 +operand immI8() %{ 1.4643 + predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 1.4644 + match(ConI); 1.4645 + 1.4646 + op_cost(5); 1.4647 + format %{ %} 1.4648 + interface(CONST_INTER); 1.4649 +%} 1.4650 + 1.4651 +operand immI16() %{ 1.4652 + predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 1.4653 + match(ConI); 1.4654 + 1.4655 + op_cost(10); 1.4656 + format %{ %} 1.4657 + interface(CONST_INTER); 1.4658 +%} 1.4659 + 1.4660 +// Constant for long shifts 1.4661 +operand immI_32() %{ 1.4662 + predicate( n->get_int() == 32 ); 1.4663 + match(ConI); 1.4664 + 1.4665 + op_cost(0); 1.4666 + format %{ %} 1.4667 + interface(CONST_INTER); 1.4668 +%} 1.4669 + 1.4670 +operand immI_1_31() %{ 1.4671 + predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 1.4672 + match(ConI); 1.4673 + 1.4674 + op_cost(0); 1.4675 + format %{ %} 1.4676 + interface(CONST_INTER); 1.4677 +%} 1.4678 + 1.4679 +operand immI_32_63() %{ 1.4680 + predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 1.4681 + match(ConI); 1.4682 + op_cost(0); 1.4683 + 1.4684 + format %{ %} 1.4685 + interface(CONST_INTER); 1.4686 +%} 1.4687 + 1.4688 +// Pointer Immediate 1.4689 +operand immP() %{ 1.4690 + match(ConP); 1.4691 + 1.4692 + op_cost(10); 1.4693 + format %{ %} 1.4694 + interface(CONST_INTER); 1.4695 +%} 1.4696 + 1.4697 +// NULL Pointer Immediate 1.4698 +operand immP0() %{ 1.4699 + predicate( n->get_ptr() == 0 ); 1.4700 + match(ConP); 1.4701 + op_cost(0); 1.4702 + 1.4703 + format %{ %} 1.4704 + interface(CONST_INTER); 1.4705 +%} 1.4706 + 1.4707 +// Long Immediate 1.4708 +operand immL() %{ 1.4709 + match(ConL); 1.4710 + 1.4711 + op_cost(20); 1.4712 + format %{ %} 1.4713 + interface(CONST_INTER); 1.4714 +%} 1.4715 + 1.4716 +// Long Immediate zero 1.4717 +operand immL0() %{ 1.4718 + predicate( n->get_long() == 0L ); 1.4719 + match(ConL); 1.4720 + op_cost(0); 1.4721 + 1.4722 + format %{ %} 1.4723 + interface(CONST_INTER); 1.4724 +%} 1.4725 + 1.4726 +// Long immediate from 0 to 127. 1.4727 +// Used for a shorter form of long mul by 10. 1.4728 +operand immL_127() %{ 1.4729 + predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 1.4730 + match(ConL); 1.4731 + op_cost(0); 1.4732 + 1.4733 + format %{ %} 1.4734 + interface(CONST_INTER); 1.4735 +%} 1.4736 + 1.4737 +// Long Immediate: low 32-bit mask 1.4738 +operand immL_32bits() %{ 1.4739 + predicate(n->get_long() == 0xFFFFFFFFL); 1.4740 + match(ConL); 1.4741 + op_cost(0); 1.4742 + 1.4743 + format %{ %} 1.4744 + interface(CONST_INTER); 1.4745 +%} 1.4746 + 1.4747 +// Long Immediate: low 32-bit mask 1.4748 +operand immL32() %{ 1.4749 + predicate(n->get_long() == (int)(n->get_long())); 1.4750 + match(ConL); 1.4751 + op_cost(20); 1.4752 + 1.4753 + format %{ %} 1.4754 + interface(CONST_INTER); 1.4755 +%} 1.4756 + 1.4757 +//Double Immediate zero 1.4758 +operand immD0() %{ 1.4759 + // Do additional (and counter-intuitive) test against NaN to work around VC++ 1.4760 + // bug that generates code such that NaNs compare equal to 0.0 1.4761 + predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 1.4762 + match(ConD); 1.4763 + 1.4764 + op_cost(5); 1.4765 + format %{ %} 1.4766 + interface(CONST_INTER); 1.4767 +%} 1.4768 + 1.4769 +// Double Immediate 1.4770 +operand immD1() %{ 1.4771 + predicate( UseSSE<=1 && n->getd() == 1.0 ); 1.4772 + match(ConD); 1.4773 + 1.4774 + op_cost(5); 1.4775 + format %{ %} 1.4776 + interface(CONST_INTER); 1.4777 +%} 1.4778 + 1.4779 +// Double Immediate 1.4780 +operand immD() %{ 1.4781 + predicate(UseSSE<=1); 1.4782 + match(ConD); 1.4783 + 1.4784 + op_cost(5); 1.4785 + format %{ %} 1.4786 + interface(CONST_INTER); 1.4787 +%} 1.4788 + 1.4789 +operand immXD() %{ 1.4790 + predicate(UseSSE>=2); 1.4791 + match(ConD); 1.4792 + 1.4793 + op_cost(5); 1.4794 + format %{ %} 1.4795 + interface(CONST_INTER); 1.4796 +%} 1.4797 + 1.4798 +// Double Immediate zero 1.4799 +operand immXD0() %{ 1.4800 + // Do additional (and counter-intuitive) test against NaN to work around VC++ 1.4801 + // bug that generates code such that NaNs compare equal to 0.0 AND do not 1.4802 + // compare equal to -0.0. 1.4803 + predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 1.4804 + match(ConD); 1.4805 + 1.4806 + format %{ %} 1.4807 + interface(CONST_INTER); 1.4808 +%} 1.4809 + 1.4810 +// Float Immediate zero 1.4811 +operand immF0() %{ 1.4812 + predicate( UseSSE == 0 && n->getf() == 0.0 ); 1.4813 + match(ConF); 1.4814 + 1.4815 + op_cost(5); 1.4816 + format %{ %} 1.4817 + interface(CONST_INTER); 1.4818 +%} 1.4819 + 1.4820 +// Float Immediate 1.4821 +operand immF() %{ 1.4822 + predicate( UseSSE == 0 ); 1.4823 + match(ConF); 1.4824 + 1.4825 + op_cost(5); 1.4826 + format %{ %} 1.4827 + interface(CONST_INTER); 1.4828 +%} 1.4829 + 1.4830 +// Float Immediate 1.4831 +operand immXF() %{ 1.4832 + predicate(UseSSE >= 1); 1.4833 + match(ConF); 1.4834 + 1.4835 + op_cost(5); 1.4836 + format %{ %} 1.4837 + interface(CONST_INTER); 1.4838 +%} 1.4839 + 1.4840 +// Float Immediate zero. Zero and not -0.0 1.4841 +operand immXF0() %{ 1.4842 + predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 1.4843 + match(ConF); 1.4844 + 1.4845 + op_cost(5); 1.4846 + format %{ %} 1.4847 + interface(CONST_INTER); 1.4848 +%} 1.4849 + 1.4850 +// Immediates for special shifts (sign extend) 1.4851 + 1.4852 +// Constants for increment 1.4853 +operand immI_16() %{ 1.4854 + predicate( n->get_int() == 16 ); 1.4855 + match(ConI); 1.4856 + 1.4857 + format %{ %} 1.4858 + interface(CONST_INTER); 1.4859 +%} 1.4860 + 1.4861 +operand immI_24() %{ 1.4862 + predicate( n->get_int() == 24 ); 1.4863 + match(ConI); 1.4864 + 1.4865 + format %{ %} 1.4866 + interface(CONST_INTER); 1.4867 +%} 1.4868 + 1.4869 +// Constant for byte-wide masking 1.4870 +operand immI_255() %{ 1.4871 + predicate( n->get_int() == 255 ); 1.4872 + match(ConI); 1.4873 + 1.4874 + format %{ %} 1.4875 + interface(CONST_INTER); 1.4876 +%} 1.4877 + 1.4878 +// Register Operands 1.4879 +// Integer Register 1.4880 +operand eRegI() %{ 1.4881 + constraint(ALLOC_IN_RC(e_reg)); 1.4882 + match(RegI); 1.4883 + match(xRegI); 1.4884 + match(eAXRegI); 1.4885 + match(eBXRegI); 1.4886 + match(eCXRegI); 1.4887 + match(eDXRegI); 1.4888 + match(eDIRegI); 1.4889 + match(eSIRegI); 1.4890 + 1.4891 + format %{ %} 1.4892 + interface(REG_INTER); 1.4893 +%} 1.4894 + 1.4895 +// Subset of Integer Register 1.4896 +operand xRegI(eRegI reg) %{ 1.4897 + constraint(ALLOC_IN_RC(x_reg)); 1.4898 + match(reg); 1.4899 + match(eAXRegI); 1.4900 + match(eBXRegI); 1.4901 + match(eCXRegI); 1.4902 + match(eDXRegI); 1.4903 + 1.4904 + format %{ %} 1.4905 + interface(REG_INTER); 1.4906 +%} 1.4907 + 1.4908 +// Special Registers 1.4909 +operand eAXRegI(xRegI reg) %{ 1.4910 + constraint(ALLOC_IN_RC(eax_reg)); 1.4911 + match(reg); 1.4912 + match(eRegI); 1.4913 + 1.4914 + format %{ "EAX" %} 1.4915 + interface(REG_INTER); 1.4916 +%} 1.4917 + 1.4918 +// Special Registers 1.4919 +operand eBXRegI(xRegI reg) %{ 1.4920 + constraint(ALLOC_IN_RC(ebx_reg)); 1.4921 + match(reg); 1.4922 + match(eRegI); 1.4923 + 1.4924 + format %{ "EBX" %} 1.4925 + interface(REG_INTER); 1.4926 +%} 1.4927 + 1.4928 +operand eCXRegI(xRegI reg) %{ 1.4929 + constraint(ALLOC_IN_RC(ecx_reg)); 1.4930 + match(reg); 1.4931 + match(eRegI); 1.4932 + 1.4933 + format %{ "ECX" %} 1.4934 + interface(REG_INTER); 1.4935 +%} 1.4936 + 1.4937 +operand eDXRegI(xRegI reg) %{ 1.4938 + constraint(ALLOC_IN_RC(edx_reg)); 1.4939 + match(reg); 1.4940 + match(eRegI); 1.4941 + 1.4942 + format %{ "EDX" %} 1.4943 + interface(REG_INTER); 1.4944 +%} 1.4945 + 1.4946 +operand eDIRegI(xRegI reg) %{ 1.4947 + constraint(ALLOC_IN_RC(edi_reg)); 1.4948 + match(reg); 1.4949 + match(eRegI); 1.4950 + 1.4951 + format %{ "EDI" %} 1.4952 + interface(REG_INTER); 1.4953 +%} 1.4954 + 1.4955 +operand naxRegI() %{ 1.4956 + constraint(ALLOC_IN_RC(nax_reg)); 1.4957 + match(RegI); 1.4958 + match(eCXRegI); 1.4959 + match(eDXRegI); 1.4960 + match(eSIRegI); 1.4961 + match(eDIRegI); 1.4962 + 1.4963 + format %{ %} 1.4964 + interface(REG_INTER); 1.4965 +%} 1.4966 + 1.4967 +operand nadxRegI() %{ 1.4968 + constraint(ALLOC_IN_RC(nadx_reg)); 1.4969 + match(RegI); 1.4970 + match(eBXRegI); 1.4971 + match(eCXRegI); 1.4972 + match(eSIRegI); 1.4973 + match(eDIRegI); 1.4974 + 1.4975 + format %{ %} 1.4976 + interface(REG_INTER); 1.4977 +%} 1.4978 + 1.4979 +operand ncxRegI() %{ 1.4980 + constraint(ALLOC_IN_RC(ncx_reg)); 1.4981 + match(RegI); 1.4982 + match(eAXRegI); 1.4983 + match(eDXRegI); 1.4984 + match(eSIRegI); 1.4985 + match(eDIRegI); 1.4986 + 1.4987 + format %{ %} 1.4988 + interface(REG_INTER); 1.4989 +%} 1.4990 + 1.4991 +// // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 1.4992 +// // 1.4993 +operand eSIRegI(xRegI reg) %{ 1.4994 + constraint(ALLOC_IN_RC(esi_reg)); 1.4995 + match(reg); 1.4996 + match(eRegI); 1.4997 + 1.4998 + format %{ "ESI" %} 1.4999 + interface(REG_INTER); 1.5000 +%} 1.5001 + 1.5002 +// Pointer Register 1.5003 +operand anyRegP() %{ 1.5004 + constraint(ALLOC_IN_RC(any_reg)); 1.5005 + match(RegP); 1.5006 + match(eAXRegP); 1.5007 + match(eBXRegP); 1.5008 + match(eCXRegP); 1.5009 + match(eDIRegP); 1.5010 + match(eRegP); 1.5011 + 1.5012 + format %{ %} 1.5013 + interface(REG_INTER); 1.5014 +%} 1.5015 + 1.5016 +operand eRegP() %{ 1.5017 + constraint(ALLOC_IN_RC(e_reg)); 1.5018 + match(RegP); 1.5019 + match(eAXRegP); 1.5020 + match(eBXRegP); 1.5021 + match(eCXRegP); 1.5022 + match(eDIRegP); 1.5023 + 1.5024 + format %{ %} 1.5025 + interface(REG_INTER); 1.5026 +%} 1.5027 + 1.5028 +// On windows95, EBP is not safe to use for implicit null tests. 1.5029 +operand eRegP_no_EBP() %{ 1.5030 + constraint(ALLOC_IN_RC(e_reg_no_rbp)); 1.5031 + match(RegP); 1.5032 + match(eAXRegP); 1.5033 + match(eBXRegP); 1.5034 + match(eCXRegP); 1.5035 + match(eDIRegP); 1.5036 + 1.5037 + op_cost(100); 1.5038 + format %{ %} 1.5039 + interface(REG_INTER); 1.5040 +%} 1.5041 + 1.5042 +operand naxRegP() %{ 1.5043 + constraint(ALLOC_IN_RC(nax_reg)); 1.5044 + match(RegP); 1.5045 + match(eBXRegP); 1.5046 + match(eDXRegP); 1.5047 + match(eCXRegP); 1.5048 + match(eSIRegP); 1.5049 + match(eDIRegP); 1.5050 + 1.5051 + format %{ %} 1.5052 + interface(REG_INTER); 1.5053 +%} 1.5054 + 1.5055 +operand nabxRegP() %{ 1.5056 + constraint(ALLOC_IN_RC(nabx_reg)); 1.5057 + match(RegP); 1.5058 + match(eCXRegP); 1.5059 + match(eDXRegP); 1.5060 + match(eSIRegP); 1.5061 + match(eDIRegP); 1.5062 + 1.5063 + format %{ %} 1.5064 + interface(REG_INTER); 1.5065 +%} 1.5066 + 1.5067 +operand pRegP() %{ 1.5068 + constraint(ALLOC_IN_RC(p_reg)); 1.5069 + match(RegP); 1.5070 + match(eBXRegP); 1.5071 + match(eDXRegP); 1.5072 + match(eSIRegP); 1.5073 + match(eDIRegP); 1.5074 + 1.5075 + format %{ %} 1.5076 + interface(REG_INTER); 1.5077 +%} 1.5078 + 1.5079 +// Special Registers 1.5080 +// Return a pointer value 1.5081 +operand eAXRegP(eRegP reg) %{ 1.5082 + constraint(ALLOC_IN_RC(eax_reg)); 1.5083 + match(reg); 1.5084 + format %{ "EAX" %} 1.5085 + interface(REG_INTER); 1.5086 +%} 1.5087 + 1.5088 +// Used in AtomicAdd 1.5089 +operand eBXRegP(eRegP reg) %{ 1.5090 + constraint(ALLOC_IN_RC(ebx_reg)); 1.5091 + match(reg); 1.5092 + format %{ "EBX" %} 1.5093 + interface(REG_INTER); 1.5094 +%} 1.5095 + 1.5096 +// Tail-call (interprocedural jump) to interpreter 1.5097 +operand eCXRegP(eRegP reg) %{ 1.5098 + constraint(ALLOC_IN_RC(ecx_reg)); 1.5099 + match(reg); 1.5100 + format %{ "ECX" %} 1.5101 + interface(REG_INTER); 1.5102 +%} 1.5103 + 1.5104 +operand eSIRegP(eRegP reg) %{ 1.5105 + constraint(ALLOC_IN_RC(esi_reg)); 1.5106 + match(reg); 1.5107 + format %{ "ESI" %} 1.5108 + interface(REG_INTER); 1.5109 +%} 1.5110 + 1.5111 +// Used in rep stosw 1.5112 +operand eDIRegP(eRegP reg) %{ 1.5113 + constraint(ALLOC_IN_RC(edi_reg)); 1.5114 + match(reg); 1.5115 + format %{ "EDI" %} 1.5116 + interface(REG_INTER); 1.5117 +%} 1.5118 + 1.5119 +operand eBPRegP() %{ 1.5120 + constraint(ALLOC_IN_RC(ebp_reg)); 1.5121 + match(RegP); 1.5122 + format %{ "EBP" %} 1.5123 + interface(REG_INTER); 1.5124 +%} 1.5125 + 1.5126 +operand eRegL() %{ 1.5127 + constraint(ALLOC_IN_RC(long_reg)); 1.5128 + match(RegL); 1.5129 + match(eADXRegL); 1.5130 + 1.5131 + format %{ %} 1.5132 + interface(REG_INTER); 1.5133 +%} 1.5134 + 1.5135 +operand eADXRegL( eRegL reg ) %{ 1.5136 + constraint(ALLOC_IN_RC(eadx_reg)); 1.5137 + match(reg); 1.5138 + 1.5139 + format %{ "EDX:EAX" %} 1.5140 + interface(REG_INTER); 1.5141 +%} 1.5142 + 1.5143 +operand eBCXRegL( eRegL reg ) %{ 1.5144 + constraint(ALLOC_IN_RC(ebcx_reg)); 1.5145 + match(reg); 1.5146 + 1.5147 + format %{ "EBX:ECX" %} 1.5148 + interface(REG_INTER); 1.5149 +%} 1.5150 + 1.5151 +// Special case for integer high multiply 1.5152 +operand eADXRegL_low_only() %{ 1.5153 + constraint(ALLOC_IN_RC(eadx_reg)); 1.5154 + match(RegL); 1.5155 + 1.5156 + format %{ "EAX" %} 1.5157 + interface(REG_INTER); 1.5158 +%} 1.5159 + 1.5160 +// Flags register, used as output of compare instructions 1.5161 +operand eFlagsReg() %{ 1.5162 + constraint(ALLOC_IN_RC(int_flags)); 1.5163 + match(RegFlags); 1.5164 + 1.5165 + format %{ "EFLAGS" %} 1.5166 + interface(REG_INTER); 1.5167 +%} 1.5168 + 1.5169 +// Flags register, used as output of FLOATING POINT compare instructions 1.5170 +operand eFlagsRegU() %{ 1.5171 + constraint(ALLOC_IN_RC(int_flags)); 1.5172 + match(RegFlags); 1.5173 + 1.5174 + format %{ "EFLAGS_U" %} 1.5175 + interface(REG_INTER); 1.5176 +%} 1.5177 + 1.5178 +// Condition Code Register used by long compare 1.5179 +operand flagsReg_long_LTGE() %{ 1.5180 + constraint(ALLOC_IN_RC(int_flags)); 1.5181 + match(RegFlags); 1.5182 + format %{ "FLAGS_LTGE" %} 1.5183 + interface(REG_INTER); 1.5184 +%} 1.5185 +operand flagsReg_long_EQNE() %{ 1.5186 + constraint(ALLOC_IN_RC(int_flags)); 1.5187 + match(RegFlags); 1.5188 + format %{ "FLAGS_EQNE" %} 1.5189 + interface(REG_INTER); 1.5190 +%} 1.5191 +operand flagsReg_long_LEGT() %{ 1.5192 + constraint(ALLOC_IN_RC(int_flags)); 1.5193 + match(RegFlags); 1.5194 + format %{ "FLAGS_LEGT" %} 1.5195 + interface(REG_INTER); 1.5196 +%} 1.5197 + 1.5198 +// Float register operands 1.5199 +operand regD() %{ 1.5200 + predicate( UseSSE < 2 ); 1.5201 + constraint(ALLOC_IN_RC(dbl_reg)); 1.5202 + match(RegD); 1.5203 + match(regDPR1); 1.5204 + match(regDPR2); 1.5205 + format %{ %} 1.5206 + interface(REG_INTER); 1.5207 +%} 1.5208 + 1.5209 +operand regDPR1(regD reg) %{ 1.5210 + predicate( UseSSE < 2 ); 1.5211 + constraint(ALLOC_IN_RC(dbl_reg0)); 1.5212 + match(reg); 1.5213 + format %{ "FPR1" %} 1.5214 + interface(REG_INTER); 1.5215 +%} 1.5216 + 1.5217 +operand regDPR2(regD reg) %{ 1.5218 + predicate( UseSSE < 2 ); 1.5219 + constraint(ALLOC_IN_RC(dbl_reg1)); 1.5220 + match(reg); 1.5221 + format %{ "FPR2" %} 1.5222 + interface(REG_INTER); 1.5223 +%} 1.5224 + 1.5225 +operand regnotDPR1(regD reg) %{ 1.5226 + predicate( UseSSE < 2 ); 1.5227 + constraint(ALLOC_IN_RC(dbl_notreg0)); 1.5228 + match(reg); 1.5229 + format %{ %} 1.5230 + interface(REG_INTER); 1.5231 +%} 1.5232 + 1.5233 +// XMM Double register operands 1.5234 +operand regXD() %{ 1.5235 + predicate( UseSSE>=2 ); 1.5236 + constraint(ALLOC_IN_RC(xdb_reg)); 1.5237 + match(RegD); 1.5238 + match(regXD6); 1.5239 + match(regXD7); 1.5240 + format %{ %} 1.5241 + interface(REG_INTER); 1.5242 +%} 1.5243 + 1.5244 +// XMM6 double register operands 1.5245 +operand regXD6(regXD reg) %{ 1.5246 + predicate( UseSSE>=2 ); 1.5247 + constraint(ALLOC_IN_RC(xdb_reg6)); 1.5248 + match(reg); 1.5249 + format %{ "XMM6" %} 1.5250 + interface(REG_INTER); 1.5251 +%} 1.5252 + 1.5253 +// XMM7 double register operands 1.5254 +operand regXD7(regXD reg) %{ 1.5255 + predicate( UseSSE>=2 ); 1.5256 + constraint(ALLOC_IN_RC(xdb_reg7)); 1.5257 + match(reg); 1.5258 + format %{ "XMM7" %} 1.5259 + interface(REG_INTER); 1.5260 +%} 1.5261 + 1.5262 +// Float register operands 1.5263 +operand regF() %{ 1.5264 + predicate( UseSSE < 2 ); 1.5265 + constraint(ALLOC_IN_RC(flt_reg)); 1.5266 + match(RegF); 1.5267 + match(regFPR1); 1.5268 + format %{ %} 1.5269 + interface(REG_INTER); 1.5270 +%} 1.5271 + 1.5272 +// Float register operands 1.5273 +operand regFPR1(regF reg) %{ 1.5274 + predicate( UseSSE < 2 ); 1.5275 + constraint(ALLOC_IN_RC(flt_reg0)); 1.5276 + match(reg); 1.5277 + format %{ "FPR1" %} 1.5278 + interface(REG_INTER); 1.5279 +%} 1.5280 + 1.5281 +// XMM register operands 1.5282 +operand regX() %{ 1.5283 + predicate( UseSSE>=1 ); 1.5284 + constraint(ALLOC_IN_RC(xmm_reg)); 1.5285 + match(RegF); 1.5286 + format %{ %} 1.5287 + interface(REG_INTER); 1.5288 +%} 1.5289 + 1.5290 + 1.5291 +//----------Memory Operands---------------------------------------------------- 1.5292 +// Direct Memory Operand 1.5293 +operand direct(immP addr) %{ 1.5294 + match(addr); 1.5295 + 1.5296 + format %{ "[$addr]" %} 1.5297 + interface(MEMORY_INTER) %{ 1.5298 + base(0xFFFFFFFF); 1.5299 + index(0x4); 1.5300 + scale(0x0); 1.5301 + disp($addr); 1.5302 + %} 1.5303 +%} 1.5304 + 1.5305 +// Indirect Memory Operand 1.5306 +operand indirect(eRegP reg) %{ 1.5307 + constraint(ALLOC_IN_RC(e_reg)); 1.5308 + match(reg); 1.5309 + 1.5310 + format %{ "[$reg]" %} 1.5311 + interface(MEMORY_INTER) %{ 1.5312 + base($reg); 1.5313 + index(0x4); 1.5314 + scale(0x0); 1.5315 + disp(0x0); 1.5316 + %} 1.5317 +%} 1.5318 + 1.5319 +// Indirect Memory Plus Short Offset Operand 1.5320 +operand indOffset8(eRegP reg, immI8 off) %{ 1.5321 + match(AddP reg off); 1.5322 + 1.5323 + format %{ "[$reg + $off]" %} 1.5324 + interface(MEMORY_INTER) %{ 1.5325 + base($reg); 1.5326 + index(0x4); 1.5327 + scale(0x0); 1.5328 + disp($off); 1.5329 + %} 1.5330 +%} 1.5331 + 1.5332 +// Indirect Memory Plus Long Offset Operand 1.5333 +operand indOffset32(eRegP reg, immI off) %{ 1.5334 + match(AddP reg off); 1.5335 + 1.5336 + format %{ "[$reg + $off]" %} 1.5337 + interface(MEMORY_INTER) %{ 1.5338 + base($reg); 1.5339 + index(0x4); 1.5340 + scale(0x0); 1.5341 + disp($off); 1.5342 + %} 1.5343 +%} 1.5344 + 1.5345 +// Indirect Memory Plus Long Offset Operand 1.5346 +operand indOffset32X(eRegI reg, immP off) %{ 1.5347 + match(AddP off reg); 1.5348 + 1.5349 + format %{ "[$reg + $off]" %} 1.5350 + interface(MEMORY_INTER) %{ 1.5351 + base($reg); 1.5352 + index(0x4); 1.5353 + scale(0x0); 1.5354 + disp($off); 1.5355 + %} 1.5356 +%} 1.5357 + 1.5358 +// Indirect Memory Plus Index Register Plus Offset Operand 1.5359 +operand indIndexOffset(eRegP reg, eRegI ireg, immI off) %{ 1.5360 + match(AddP (AddP reg ireg) off); 1.5361 + 1.5362 + op_cost(10); 1.5363 + format %{"[$reg + $off + $ireg]" %} 1.5364 + interface(MEMORY_INTER) %{ 1.5365 + base($reg); 1.5366 + index($ireg); 1.5367 + scale(0x0); 1.5368 + disp($off); 1.5369 + %} 1.5370 +%} 1.5371 + 1.5372 +// Indirect Memory Plus Index Register Plus Offset Operand 1.5373 +operand indIndex(eRegP reg, eRegI ireg) %{ 1.5374 + match(AddP reg ireg); 1.5375 + 1.5376 + op_cost(10); 1.5377 + format %{"[$reg + $ireg]" %} 1.5378 + interface(MEMORY_INTER) %{ 1.5379 + base($reg); 1.5380 + index($ireg); 1.5381 + scale(0x0); 1.5382 + disp(0x0); 1.5383 + %} 1.5384 +%} 1.5385 + 1.5386 +// // ------------------------------------------------------------------------- 1.5387 +// // 486 architecture doesn't support "scale * index + offset" with out a base 1.5388 +// // ------------------------------------------------------------------------- 1.5389 +// // Scaled Memory Operands 1.5390 +// // Indirect Memory Times Scale Plus Offset Operand 1.5391 +// operand indScaleOffset(immP off, eRegI ireg, immI2 scale) %{ 1.5392 +// match(AddP off (LShiftI ireg scale)); 1.5393 +// 1.5394 +// op_cost(10); 1.5395 +// format %{"[$off + $ireg << $scale]" %} 1.5396 +// interface(MEMORY_INTER) %{ 1.5397 +// base(0x4); 1.5398 +// index($ireg); 1.5399 +// scale($scale); 1.5400 +// disp($off); 1.5401 +// %} 1.5402 +// %} 1.5403 + 1.5404 +// Indirect Memory Times Scale Plus Index Register 1.5405 +operand indIndexScale(eRegP reg, eRegI ireg, immI2 scale) %{ 1.5406 + match(AddP reg (LShiftI ireg scale)); 1.5407 + 1.5408 + op_cost(10); 1.5409 + format %{"[$reg + $ireg << $scale]" %} 1.5410 + interface(MEMORY_INTER) %{ 1.5411 + base($reg); 1.5412 + index($ireg); 1.5413 + scale($scale); 1.5414 + disp(0x0); 1.5415 + %} 1.5416 +%} 1.5417 + 1.5418 +// Indirect Memory Times Scale Plus Index Register Plus Offset Operand 1.5419 +operand indIndexScaleOffset(eRegP reg, immI off, eRegI ireg, immI2 scale) %{ 1.5420 + match(AddP (AddP reg (LShiftI ireg scale)) off); 1.5421 + 1.5422 + op_cost(10); 1.5423 + format %{"[$reg + $off + $ireg << $scale]" %} 1.5424 + interface(MEMORY_INTER) %{ 1.5425 + base($reg); 1.5426 + index($ireg); 1.5427 + scale($scale); 1.5428 + disp($off); 1.5429 + %} 1.5430 +%} 1.5431 + 1.5432 +//----------Load Long Memory Operands------------------------------------------ 1.5433 +// The load-long idiom will use it's address expression again after loading 1.5434 +// the first word of the long. If the load-long destination overlaps with 1.5435 +// registers used in the addressing expression, the 2nd half will be loaded 1.5436 +// from a clobbered address. Fix this by requiring that load-long use 1.5437 +// address registers that do not overlap with the load-long target. 1.5438 + 1.5439 +// load-long support 1.5440 +operand load_long_RegP() %{ 1.5441 + constraint(ALLOC_IN_RC(esi_reg)); 1.5442 + match(RegP); 1.5443 + match(eSIRegP); 1.5444 + op_cost(100); 1.5445 + format %{ %} 1.5446 + interface(REG_INTER); 1.5447 +%} 1.5448 + 1.5449 +// Indirect Memory Operand Long 1.5450 +operand load_long_indirect(load_long_RegP reg) %{ 1.5451 + constraint(ALLOC_IN_RC(esi_reg)); 1.5452 + match(reg); 1.5453 + 1.5454 + format %{ "[$reg]" %} 1.5455 + interface(MEMORY_INTER) %{ 1.5456 + base($reg); 1.5457 + index(0x4); 1.5458 + scale(0x0); 1.5459 + disp(0x0); 1.5460 + %} 1.5461 +%} 1.5462 + 1.5463 +// Indirect Memory Plus Long Offset Operand 1.5464 +operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 1.5465 + match(AddP reg off); 1.5466 + 1.5467 + format %{ "[$reg + $off]" %} 1.5468 + interface(MEMORY_INTER) %{ 1.5469 + base($reg); 1.5470 + index(0x4); 1.5471 + scale(0x0); 1.5472 + disp($off); 1.5473 + %} 1.5474 +%} 1.5475 + 1.5476 +opclass load_long_memory(load_long_indirect, load_long_indOffset32); 1.5477 + 1.5478 + 1.5479 +//----------Special Memory Operands-------------------------------------------- 1.5480 +// Stack Slot Operand - This operand is used for loading and storing temporary 1.5481 +// values on the stack where a match requires a value to 1.5482 +// flow through memory. 1.5483 +operand stackSlotP(sRegP reg) %{ 1.5484 + constraint(ALLOC_IN_RC(stack_slots)); 1.5485 + // No match rule because this operand is only generated in matching 1.5486 + format %{ "[$reg]" %} 1.5487 + interface(MEMORY_INTER) %{ 1.5488 + base(0x4); // ESP 1.5489 + index(0x4); // No Index 1.5490 + scale(0x0); // No Scale 1.5491 + disp($reg); // Stack Offset 1.5492 + %} 1.5493 +%} 1.5494 + 1.5495 +operand stackSlotI(sRegI reg) %{ 1.5496 + constraint(ALLOC_IN_RC(stack_slots)); 1.5497 + // No match rule because this operand is only generated in matching 1.5498 + format %{ "[$reg]" %} 1.5499 + interface(MEMORY_INTER) %{ 1.5500 + base(0x4); // ESP 1.5501 + index(0x4); // No Index 1.5502 + scale(0x0); // No Scale 1.5503 + disp($reg); // Stack Offset 1.5504 + %} 1.5505 +%} 1.5506 + 1.5507 +operand stackSlotF(sRegF reg) %{ 1.5508 + constraint(ALLOC_IN_RC(stack_slots)); 1.5509 + // No match rule because this operand is only generated in matching 1.5510 + format %{ "[$reg]" %} 1.5511 + interface(MEMORY_INTER) %{ 1.5512 + base(0x4); // ESP 1.5513 + index(0x4); // No Index 1.5514 + scale(0x0); // No Scale 1.5515 + disp($reg); // Stack Offset 1.5516 + %} 1.5517 +%} 1.5518 + 1.5519 +operand stackSlotD(sRegD reg) %{ 1.5520 + constraint(ALLOC_IN_RC(stack_slots)); 1.5521 + // No match rule because this operand is only generated in matching 1.5522 + format %{ "[$reg]" %} 1.5523 + interface(MEMORY_INTER) %{ 1.5524 + base(0x4); // ESP 1.5525 + index(0x4); // No Index 1.5526 + scale(0x0); // No Scale 1.5527 + disp($reg); // Stack Offset 1.5528 + %} 1.5529 +%} 1.5530 + 1.5531 +operand stackSlotL(sRegL reg) %{ 1.5532 + constraint(ALLOC_IN_RC(stack_slots)); 1.5533 + // No match rule because this operand is only generated in matching 1.5534 + format %{ "[$reg]" %} 1.5535 + interface(MEMORY_INTER) %{ 1.5536 + base(0x4); // ESP 1.5537 + index(0x4); // No Index 1.5538 + scale(0x0); // No Scale 1.5539 + disp($reg); // Stack Offset 1.5540 + %} 1.5541 +%} 1.5542 + 1.5543 +//----------Memory Operands - Win95 Implicit Null Variants---------------- 1.5544 +// Indirect Memory Operand 1.5545 +operand indirect_win95_safe(eRegP_no_EBP reg) 1.5546 +%{ 1.5547 + constraint(ALLOC_IN_RC(e_reg)); 1.5548 + match(reg); 1.5549 + 1.5550 + op_cost(100); 1.5551 + format %{ "[$reg]" %} 1.5552 + interface(MEMORY_INTER) %{ 1.5553 + base($reg); 1.5554 + index(0x4); 1.5555 + scale(0x0); 1.5556 + disp(0x0); 1.5557 + %} 1.5558 +%} 1.5559 + 1.5560 +// Indirect Memory Plus Short Offset Operand 1.5561 +operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) 1.5562 +%{ 1.5563 + match(AddP reg off); 1.5564 + 1.5565 + op_cost(100); 1.5566 + format %{ "[$reg + $off]" %} 1.5567 + interface(MEMORY_INTER) %{ 1.5568 + base($reg); 1.5569 + index(0x4); 1.5570 + scale(0x0); 1.5571 + disp($off); 1.5572 + %} 1.5573 +%} 1.5574 + 1.5575 +// Indirect Memory Plus Long Offset Operand 1.5576 +operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) 1.5577 +%{ 1.5578 + match(AddP reg off); 1.5579 + 1.5580 + op_cost(100); 1.5581 + format %{ "[$reg + $off]" %} 1.5582 + interface(MEMORY_INTER) %{ 1.5583 + base($reg); 1.5584 + index(0x4); 1.5585 + scale(0x0); 1.5586 + disp($off); 1.5587 + %} 1.5588 +%} 1.5589 + 1.5590 +// Indirect Memory Plus Index Register Plus Offset Operand 1.5591 +operand indIndexOffset_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI off) 1.5592 +%{ 1.5593 + match(AddP (AddP reg ireg) off); 1.5594 + 1.5595 + op_cost(100); 1.5596 + format %{"[$reg + $off + $ireg]" %} 1.5597 + interface(MEMORY_INTER) %{ 1.5598 + base($reg); 1.5599 + index($ireg); 1.5600 + scale(0x0); 1.5601 + disp($off); 1.5602 + %} 1.5603 +%} 1.5604 + 1.5605 +// Indirect Memory Times Scale Plus Index Register 1.5606 +operand indIndexScale_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI2 scale) 1.5607 +%{ 1.5608 + match(AddP reg (LShiftI ireg scale)); 1.5609 + 1.5610 + op_cost(100); 1.5611 + format %{"[$reg + $ireg << $scale]" %} 1.5612 + interface(MEMORY_INTER) %{ 1.5613 + base($reg); 1.5614 + index($ireg); 1.5615 + scale($scale); 1.5616 + disp(0x0); 1.5617 + %} 1.5618 +%} 1.5619 + 1.5620 +// Indirect Memory Times Scale Plus Index Register Plus Offset Operand 1.5621 +operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, eRegI ireg, immI2 scale) 1.5622 +%{ 1.5623 + match(AddP (AddP reg (LShiftI ireg scale)) off); 1.5624 + 1.5625 + op_cost(100); 1.5626 + format %{"[$reg + $off + $ireg << $scale]" %} 1.5627 + interface(MEMORY_INTER) %{ 1.5628 + base($reg); 1.5629 + index($ireg); 1.5630 + scale($scale); 1.5631 + disp($off); 1.5632 + %} 1.5633 +%} 1.5634 + 1.5635 +//----------Conditional Branch Operands---------------------------------------- 1.5636 +// Comparison Op - This is the operation of the comparison, and is limited to 1.5637 +// the following set of codes: 1.5638 +// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 1.5639 +// 1.5640 +// Other attributes of the comparison, such as unsignedness, are specified 1.5641 +// by the comparison instruction that sets a condition code flags register. 1.5642 +// That result is represented by a flags operand whose subtype is appropriate 1.5643 +// to the unsignedness (etc.) of the comparison. 1.5644 +// 1.5645 +// Later, the instruction which matches both the Comparison Op (a Bool) and 1.5646 +// the flags (produced by the Cmp) specifies the coding of the comparison op 1.5647 +// by matching a specific subtype of Bool operand below, such as cmpOpU. 1.5648 + 1.5649 +// Comparision Code 1.5650 +operand cmpOp() %{ 1.5651 + match(Bool); 1.5652 + 1.5653 + format %{ "" %} 1.5654 + interface(COND_INTER) %{ 1.5655 + equal(0x4); 1.5656 + not_equal(0x5); 1.5657 + less(0xC); 1.5658 + greater_equal(0xD); 1.5659 + less_equal(0xE); 1.5660 + greater(0xF); 1.5661 + %} 1.5662 +%} 1.5663 + 1.5664 +// Comparison Code, unsigned compare. Used by FP also, with 1.5665 +// C2 (unordered) turned into GT or LT already. The other bits 1.5666 +// C0 and C3 are turned into Carry & Zero flags. 1.5667 +operand cmpOpU() %{ 1.5668 + match(Bool); 1.5669 + 1.5670 + format %{ "" %} 1.5671 + interface(COND_INTER) %{ 1.5672 + equal(0x4); 1.5673 + not_equal(0x5); 1.5674 + less(0x2); 1.5675 + greater_equal(0x3); 1.5676 + less_equal(0x6); 1.5677 + greater(0x7); 1.5678 + %} 1.5679 +%} 1.5680 + 1.5681 +// Comparison Code for FP conditional move 1.5682 +operand cmpOp_fcmov() %{ 1.5683 + match(Bool); 1.5684 + 1.5685 + format %{ "" %} 1.5686 + interface(COND_INTER) %{ 1.5687 + equal (0x0C8); 1.5688 + not_equal (0x1C8); 1.5689 + less (0x0C0); 1.5690 + greater_equal(0x1C0); 1.5691 + less_equal (0x0D0); 1.5692 + greater (0x1D0); 1.5693 + %} 1.5694 +%} 1.5695 + 1.5696 +// Comparision Code used in long compares 1.5697 +operand cmpOp_commute() %{ 1.5698 + match(Bool); 1.5699 + 1.5700 + format %{ "" %} 1.5701 + interface(COND_INTER) %{ 1.5702 + equal(0x4); 1.5703 + not_equal(0x5); 1.5704 + less(0xF); 1.5705 + greater_equal(0xE); 1.5706 + less_equal(0xD); 1.5707 + greater(0xC); 1.5708 + %} 1.5709 +%} 1.5710 + 1.5711 +//----------OPERAND CLASSES---------------------------------------------------- 1.5712 +// Operand Classes are groups of operands that are used as to simplify 1.5713 +// instruction definitions by not requiring the AD writer to specify seperate 1.5714 +// instructions for every form of operand when the instruction accepts 1.5715 +// multiple operand types with the same basic encoding and format. The classic 1.5716 +// case of this is memory operands. 1.5717 + 1.5718 +opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 1.5719 + indIndex, indIndexScale, indIndexScaleOffset); 1.5720 + 1.5721 +// Long memory operations are encoded in 2 instructions and a +4 offset. 1.5722 +// This means some kind of offset is always required and you cannot use 1.5723 +// an oop as the offset (done when working on static globals). 1.5724 +opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 1.5725 + indIndex, indIndexScale, indIndexScaleOffset); 1.5726 + 1.5727 + 1.5728 +//----------PIPELINE----------------------------------------------------------- 1.5729 +// Rules which define the behavior of the target architectures pipeline. 1.5730 +pipeline %{ 1.5731 + 1.5732 +//----------ATTRIBUTES--------------------------------------------------------- 1.5733 +attributes %{ 1.5734 + variable_size_instructions; // Fixed size instructions 1.5735 + max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 1.5736 + instruction_unit_size = 1; // An instruction is 1 bytes long 1.5737 + instruction_fetch_unit_size = 16; // The processor fetches one line 1.5738 + instruction_fetch_units = 1; // of 16 bytes 1.5739 + 1.5740 + // List of nop instructions 1.5741 + nops( MachNop ); 1.5742 +%} 1.5743 + 1.5744 +//----------RESOURCES---------------------------------------------------------- 1.5745 +// Resources are the functional units available to the machine 1.5746 + 1.5747 +// Generic P2/P3 pipeline 1.5748 +// 3 decoders, only D0 handles big operands; a "bundle" is the limit of 1.5749 +// 3 instructions decoded per cycle. 1.5750 +// 2 load/store ops per cycle, 1 branch, 1 FPU, 1.5751 +// 2 ALU op, only ALU0 handles mul/div instructions. 1.5752 +resources( D0, D1, D2, DECODE = D0 | D1 | D2, 1.5753 + MS0, MS1, MEM = MS0 | MS1, 1.5754 + BR, FPU, 1.5755 + ALU0, ALU1, ALU = ALU0 | ALU1 ); 1.5756 + 1.5757 +//----------PIPELINE DESCRIPTION----------------------------------------------- 1.5758 +// Pipeline Description specifies the stages in the machine's pipeline 1.5759 + 1.5760 +// Generic P2/P3 pipeline 1.5761 +pipe_desc(S0, S1, S2, S3, S4, S5); 1.5762 + 1.5763 +//----------PIPELINE CLASSES--------------------------------------------------- 1.5764 +// Pipeline Classes describe the stages in which input and output are 1.5765 +// referenced by the hardware pipeline. 1.5766 + 1.5767 +// Naming convention: ialu or fpu 1.5768 +// Then: _reg 1.5769 +// Then: _reg if there is a 2nd register 1.5770 +// Then: _long if it's a pair of instructions implementing a long 1.5771 +// Then: _fat if it requires the big decoder 1.5772 +// Or: _mem if it requires the big decoder and a memory unit. 1.5773 + 1.5774 +// Integer ALU reg operation 1.5775 +pipe_class ialu_reg(eRegI dst) %{ 1.5776 + single_instruction; 1.5777 + dst : S4(write); 1.5778 + dst : S3(read); 1.5779 + DECODE : S0; // any decoder 1.5780 + ALU : S3; // any alu 1.5781 +%} 1.5782 + 1.5783 +// Long ALU reg operation 1.5784 +pipe_class ialu_reg_long(eRegL dst) %{ 1.5785 + instruction_count(2); 1.5786 + dst : S4(write); 1.5787 + dst : S3(read); 1.5788 + DECODE : S0(2); // any 2 decoders 1.5789 + ALU : S3(2); // both alus 1.5790 +%} 1.5791 + 1.5792 +// Integer ALU reg operation using big decoder 1.5793 +pipe_class ialu_reg_fat(eRegI dst) %{ 1.5794 + single_instruction; 1.5795 + dst : S4(write); 1.5796 + dst : S3(read); 1.5797 + D0 : S0; // big decoder only 1.5798 + ALU : S3; // any alu 1.5799 +%} 1.5800 + 1.5801 +// Long ALU reg operation using big decoder 1.5802 +pipe_class ialu_reg_long_fat(eRegL dst) %{ 1.5803 + instruction_count(2); 1.5804 + dst : S4(write); 1.5805 + dst : S3(read); 1.5806 + D0 : S0(2); // big decoder only; twice 1.5807 + ALU : S3(2); // any 2 alus 1.5808 +%} 1.5809 + 1.5810 +// Integer ALU reg-reg operation 1.5811 +pipe_class ialu_reg_reg(eRegI dst, eRegI src) %{ 1.5812 + single_instruction; 1.5813 + dst : S4(write); 1.5814 + src : S3(read); 1.5815 + DECODE : S0; // any decoder 1.5816 + ALU : S3; // any alu 1.5817 +%} 1.5818 + 1.5819 +// Long ALU reg-reg operation 1.5820 +pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 1.5821 + instruction_count(2); 1.5822 + dst : S4(write); 1.5823 + src : S3(read); 1.5824 + DECODE : S0(2); // any 2 decoders 1.5825 + ALU : S3(2); // both alus 1.5826 +%} 1.5827 + 1.5828 +// Integer ALU reg-reg operation 1.5829 +pipe_class ialu_reg_reg_fat(eRegI dst, memory src) %{ 1.5830 + single_instruction; 1.5831 + dst : S4(write); 1.5832 + src : S3(read); 1.5833 + D0 : S0; // big decoder only 1.5834 + ALU : S3; // any alu 1.5835 +%} 1.5836 + 1.5837 +// Long ALU reg-reg operation 1.5838 +pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 1.5839 + instruction_count(2); 1.5840 + dst : S4(write); 1.5841 + src : S3(read); 1.5842 + D0 : S0(2); // big decoder only; twice 1.5843 + ALU : S3(2); // both alus 1.5844 +%} 1.5845 + 1.5846 +// Integer ALU reg-mem operation 1.5847 +pipe_class ialu_reg_mem(eRegI dst, memory mem) %{ 1.5848 + single_instruction; 1.5849 + dst : S5(write); 1.5850 + mem : S3(read); 1.5851 + D0 : S0; // big decoder only 1.5852 + ALU : S4; // any alu 1.5853 + MEM : S3; // any mem 1.5854 +%} 1.5855 + 1.5856 +// Long ALU reg-mem operation 1.5857 +pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 1.5858 + instruction_count(2); 1.5859 + dst : S5(write); 1.5860 + mem : S3(read); 1.5861 + D0 : S0(2); // big decoder only; twice 1.5862 + ALU : S4(2); // any 2 alus 1.5863 + MEM : S3(2); // both mems 1.5864 +%} 1.5865 + 1.5866 +// Integer mem operation (prefetch) 1.5867 +pipe_class ialu_mem(memory mem) 1.5868 +%{ 1.5869 + single_instruction; 1.5870 + mem : S3(read); 1.5871 + D0 : S0; // big decoder only 1.5872 + MEM : S3; // any mem 1.5873 +%} 1.5874 + 1.5875 +// Integer Store to Memory 1.5876 +pipe_class ialu_mem_reg(memory mem, eRegI src) %{ 1.5877 + single_instruction; 1.5878 + mem : S3(read); 1.5879 + src : S5(read); 1.5880 + D0 : S0; // big decoder only 1.5881 + ALU : S4; // any alu 1.5882 + MEM : S3; 1.5883 +%} 1.5884 + 1.5885 +// Long Store to Memory 1.5886 +pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 1.5887 + instruction_count(2); 1.5888 + mem : S3(read); 1.5889 + src : S5(read); 1.5890 + D0 : S0(2); // big decoder only; twice 1.5891 + ALU : S4(2); // any 2 alus 1.5892 + MEM : S3(2); // Both mems 1.5893 +%} 1.5894 + 1.5895 +// Integer Store to Memory 1.5896 +pipe_class ialu_mem_imm(memory mem) %{ 1.5897 + single_instruction; 1.5898 + mem : S3(read); 1.5899 + D0 : S0; // big decoder only 1.5900 + ALU : S4; // any alu 1.5901 + MEM : S3; 1.5902 +%} 1.5903 + 1.5904 +// Integer ALU0 reg-reg operation 1.5905 +pipe_class ialu_reg_reg_alu0(eRegI dst, eRegI src) %{ 1.5906 + single_instruction; 1.5907 + dst : S4(write); 1.5908 + src : S3(read); 1.5909 + D0 : S0; // Big decoder only 1.5910 + ALU0 : S3; // only alu0 1.5911 +%} 1.5912 + 1.5913 +// Integer ALU0 reg-mem operation 1.5914 +pipe_class ialu_reg_mem_alu0(eRegI dst, memory mem) %{ 1.5915 + single_instruction; 1.5916 + dst : S5(write); 1.5917 + mem : S3(read); 1.5918 + D0 : S0; // big decoder only 1.5919 + ALU0 : S4; // ALU0 only 1.5920 + MEM : S3; // any mem 1.5921 +%} 1.5922 + 1.5923 +// Integer ALU reg-reg operation 1.5924 +pipe_class ialu_cr_reg_reg(eFlagsReg cr, eRegI src1, eRegI src2) %{ 1.5925 + single_instruction; 1.5926 + cr : S4(write); 1.5927 + src1 : S3(read); 1.5928 + src2 : S3(read); 1.5929 + DECODE : S0; // any decoder 1.5930 + ALU : S3; // any alu 1.5931 +%} 1.5932 + 1.5933 +// Integer ALU reg-imm operation 1.5934 +pipe_class ialu_cr_reg_imm(eFlagsReg cr, eRegI src1) %{ 1.5935 + single_instruction; 1.5936 + cr : S4(write); 1.5937 + src1 : S3(read); 1.5938 + DECODE : S0; // any decoder 1.5939 + ALU : S3; // any alu 1.5940 +%} 1.5941 + 1.5942 +// Integer ALU reg-mem operation 1.5943 +pipe_class ialu_cr_reg_mem(eFlagsReg cr, eRegI src1, memory src2) %{ 1.5944 + single_instruction; 1.5945 + cr : S4(write); 1.5946 + src1 : S3(read); 1.5947 + src2 : S3(read); 1.5948 + D0 : S0; // big decoder only 1.5949 + ALU : S4; // any alu 1.5950 + MEM : S3; 1.5951 +%} 1.5952 + 1.5953 +// Conditional move reg-reg 1.5954 +pipe_class pipe_cmplt( eRegI p, eRegI q, eRegI y ) %{ 1.5955 + instruction_count(4); 1.5956 + y : S4(read); 1.5957 + q : S3(read); 1.5958 + p : S3(read); 1.5959 + DECODE : S0(4); // any decoder 1.5960 +%} 1.5961 + 1.5962 +// Conditional move reg-reg 1.5963 +pipe_class pipe_cmov_reg( eRegI dst, eRegI src, eFlagsReg cr ) %{ 1.5964 + single_instruction; 1.5965 + dst : S4(write); 1.5966 + src : S3(read); 1.5967 + cr : S3(read); 1.5968 + DECODE : S0; // any decoder 1.5969 +%} 1.5970 + 1.5971 +// Conditional move reg-mem 1.5972 +pipe_class pipe_cmov_mem( eFlagsReg cr, eRegI dst, memory src) %{ 1.5973 + single_instruction; 1.5974 + dst : S4(write); 1.5975 + src : S3(read); 1.5976 + cr : S3(read); 1.5977 + DECODE : S0; // any decoder 1.5978 + MEM : S3; 1.5979 +%} 1.5980 + 1.5981 +// Conditional move reg-reg long 1.5982 +pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 1.5983 + single_instruction; 1.5984 + dst : S4(write); 1.5985 + src : S3(read); 1.5986 + cr : S3(read); 1.5987 + DECODE : S0(2); // any 2 decoders 1.5988 +%} 1.5989 + 1.5990 +// Conditional move double reg-reg 1.5991 +pipe_class pipe_cmovD_reg( eFlagsReg cr, regDPR1 dst, regD src) %{ 1.5992 + single_instruction; 1.5993 + dst : S4(write); 1.5994 + src : S3(read); 1.5995 + cr : S3(read); 1.5996 + DECODE : S0; // any decoder 1.5997 +%} 1.5998 + 1.5999 +// Float reg-reg operation 1.6000 +pipe_class fpu_reg(regD dst) %{ 1.6001 + instruction_count(2); 1.6002 + dst : S3(read); 1.6003 + DECODE : S0(2); // any 2 decoders 1.6004 + FPU : S3; 1.6005 +%} 1.6006 + 1.6007 +// Float reg-reg operation 1.6008 +pipe_class fpu_reg_reg(regD dst, regD src) %{ 1.6009 + instruction_count(2); 1.6010 + dst : S4(write); 1.6011 + src : S3(read); 1.6012 + DECODE : S0(2); // any 2 decoders 1.6013 + FPU : S3; 1.6014 +%} 1.6015 + 1.6016 +// Float reg-reg operation 1.6017 +pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2) %{ 1.6018 + instruction_count(3); 1.6019 + dst : S4(write); 1.6020 + src1 : S3(read); 1.6021 + src2 : S3(read); 1.6022 + DECODE : S0(3); // any 3 decoders 1.6023 + FPU : S3(2); 1.6024 +%} 1.6025 + 1.6026 +// Float reg-reg operation 1.6027 +pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ 1.6028 + instruction_count(4); 1.6029 + dst : S4(write); 1.6030 + src1 : S3(read); 1.6031 + src2 : S3(read); 1.6032 + src3 : S3(read); 1.6033 + DECODE : S0(4); // any 3 decoders 1.6034 + FPU : S3(2); 1.6035 +%} 1.6036 + 1.6037 +// Float reg-reg operation 1.6038 +pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3) %{ 1.6039 + instruction_count(4); 1.6040 + dst : S4(write); 1.6041 + src1 : S3(read); 1.6042 + src2 : S3(read); 1.6043 + src3 : S3(read); 1.6044 + DECODE : S1(3); // any 3 decoders 1.6045 + D0 : S0; // Big decoder only 1.6046 + FPU : S3(2); 1.6047 + MEM : S3; 1.6048 +%} 1.6049 + 1.6050 +// Float reg-mem operation 1.6051 +pipe_class fpu_reg_mem(regD dst, memory mem) %{ 1.6052 + instruction_count(2); 1.6053 + dst : S5(write); 1.6054 + mem : S3(read); 1.6055 + D0 : S0; // big decoder only 1.6056 + DECODE : S1; // any decoder for FPU POP 1.6057 + FPU : S4; 1.6058 + MEM : S3; // any mem 1.6059 +%} 1.6060 + 1.6061 +// Float reg-mem operation 1.6062 +pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem) %{ 1.6063 + instruction_count(3); 1.6064 + dst : S5(write); 1.6065 + src1 : S3(read); 1.6066 + mem : S3(read); 1.6067 + D0 : S0; // big decoder only 1.6068 + DECODE : S1(2); // any decoder for FPU POP 1.6069 + FPU : S4; 1.6070 + MEM : S3; // any mem 1.6071 +%} 1.6072 + 1.6073 +// Float mem-reg operation 1.6074 +pipe_class fpu_mem_reg(memory mem, regD src) %{ 1.6075 + instruction_count(2); 1.6076 + src : S5(read); 1.6077 + mem : S3(read); 1.6078 + DECODE : S0; // any decoder for FPU PUSH 1.6079 + D0 : S1; // big decoder only 1.6080 + FPU : S4; 1.6081 + MEM : S3; // any mem 1.6082 +%} 1.6083 + 1.6084 +pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2) %{ 1.6085 + instruction_count(3); 1.6086 + src1 : S3(read); 1.6087 + src2 : S3(read); 1.6088 + mem : S3(read); 1.6089 + DECODE : S0(2); // any decoder for FPU PUSH 1.6090 + D0 : S1; // big decoder only 1.6091 + FPU : S4; 1.6092 + MEM : S3; // any mem 1.6093 +%} 1.6094 + 1.6095 +pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2) %{ 1.6096 + instruction_count(3); 1.6097 + src1 : S3(read); 1.6098 + src2 : S3(read); 1.6099 + mem : S4(read); 1.6100 + DECODE : S0; // any decoder for FPU PUSH 1.6101 + D0 : S0(2); // big decoder only 1.6102 + FPU : S4; 1.6103 + MEM : S3(2); // any mem 1.6104 +%} 1.6105 + 1.6106 +pipe_class fpu_mem_mem(memory dst, memory src1) %{ 1.6107 + instruction_count(2); 1.6108 + src1 : S3(read); 1.6109 + dst : S4(read); 1.6110 + D0 : S0(2); // big decoder only 1.6111 + MEM : S3(2); // any mem 1.6112 +%} 1.6113 + 1.6114 +pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 1.6115 + instruction_count(3); 1.6116 + src1 : S3(read); 1.6117 + src2 : S3(read); 1.6118 + dst : S4(read); 1.6119 + D0 : S0(3); // big decoder only 1.6120 + FPU : S4; 1.6121 + MEM : S3(3); // any mem 1.6122 +%} 1.6123 + 1.6124 +pipe_class fpu_mem_reg_con(memory mem, regD src1) %{ 1.6125 + instruction_count(3); 1.6126 + src1 : S4(read); 1.6127 + mem : S4(read); 1.6128 + DECODE : S0; // any decoder for FPU PUSH 1.6129 + D0 : S0(2); // big decoder only 1.6130 + FPU : S4; 1.6131 + MEM : S3(2); // any mem 1.6132 +%} 1.6133 + 1.6134 +// Float load constant 1.6135 +pipe_class fpu_reg_con(regD dst) %{ 1.6136 + instruction_count(2); 1.6137 + dst : S5(write); 1.6138 + D0 : S0; // big decoder only for the load 1.6139 + DECODE : S1; // any decoder for FPU POP 1.6140 + FPU : S4; 1.6141 + MEM : S3; // any mem 1.6142 +%} 1.6143 + 1.6144 +// Float load constant 1.6145 +pipe_class fpu_reg_reg_con(regD dst, regD src) %{ 1.6146 + instruction_count(3); 1.6147 + dst : S5(write); 1.6148 + src : S3(read); 1.6149 + D0 : S0; // big decoder only for the load 1.6150 + DECODE : S1(2); // any decoder for FPU POP 1.6151 + FPU : S4; 1.6152 + MEM : S3; // any mem 1.6153 +%} 1.6154 + 1.6155 +// UnConditional branch 1.6156 +pipe_class pipe_jmp( label labl ) %{ 1.6157 + single_instruction; 1.6158 + BR : S3; 1.6159 +%} 1.6160 + 1.6161 +// Conditional branch 1.6162 +pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 1.6163 + single_instruction; 1.6164 + cr : S1(read); 1.6165 + BR : S3; 1.6166 +%} 1.6167 + 1.6168 +// Allocation idiom 1.6169 +pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 1.6170 + instruction_count(1); force_serialization; 1.6171 + fixed_latency(6); 1.6172 + heap_ptr : S3(read); 1.6173 + DECODE : S0(3); 1.6174 + D0 : S2; 1.6175 + MEM : S3; 1.6176 + ALU : S3(2); 1.6177 + dst : S5(write); 1.6178 + BR : S5; 1.6179 +%} 1.6180 + 1.6181 +// Generic big/slow expanded idiom 1.6182 +pipe_class pipe_slow( ) %{ 1.6183 + instruction_count(10); multiple_bundles; force_serialization; 1.6184 + fixed_latency(100); 1.6185 + D0 : S0(2); 1.6186 + MEM : S3(2); 1.6187 +%} 1.6188 + 1.6189 +// The real do-nothing guy 1.6190 +pipe_class empty( ) %{ 1.6191 + instruction_count(0); 1.6192 +%} 1.6193 + 1.6194 +// Define the class for the Nop node 1.6195 +define %{ 1.6196 + MachNop = empty; 1.6197 +%} 1.6198 + 1.6199 +%} 1.6200 + 1.6201 +//----------INSTRUCTIONS------------------------------------------------------- 1.6202 +// 1.6203 +// match -- States which machine-independent subtree may be replaced 1.6204 +// by this instruction. 1.6205 +// ins_cost -- The estimated cost of this instruction is used by instruction 1.6206 +// selection to identify a minimum cost tree of machine 1.6207 +// instructions that matches a tree of machine-independent 1.6208 +// instructions. 1.6209 +// format -- A string providing the disassembly for this instruction. 1.6210 +// The value of an instruction's operand may be inserted 1.6211 +// by referring to it with a '$' prefix. 1.6212 +// opcode -- Three instruction opcodes may be provided. These are referred 1.6213 +// to within an encode class as $primary, $secondary, and $tertiary 1.6214 +// respectively. The primary opcode is commonly used to 1.6215 +// indicate the type of machine instruction, while secondary 1.6216 +// and tertiary are often used for prefix options or addressing 1.6217 +// modes. 1.6218 +// ins_encode -- A list of encode classes with parameters. The encode class 1.6219 +// name must have been defined in an 'enc_class' specification 1.6220 +// in the encode section of the architecture description. 1.6221 + 1.6222 +//----------BSWAP-Instruction-------------------------------------------------- 1.6223 +instruct bytes_reverse_int(eRegI dst) %{ 1.6224 + match(Set dst (ReverseBytesI dst)); 1.6225 + 1.6226 + format %{ "BSWAP $dst" %} 1.6227 + opcode(0x0F, 0xC8); 1.6228 + ins_encode( OpcP, OpcSReg(dst) ); 1.6229 + ins_pipe( ialu_reg ); 1.6230 +%} 1.6231 + 1.6232 +instruct bytes_reverse_long(eRegL dst) %{ 1.6233 + match(Set dst (ReverseBytesL dst)); 1.6234 + 1.6235 + format %{ "BSWAP $dst.lo\n\t" 1.6236 + "BSWAP $dst.hi\n\t" 1.6237 + "XCHG $dst.lo $dst.hi" %} 1.6238 + 1.6239 + ins_cost(125); 1.6240 + ins_encode( bswap_long_bytes(dst) ); 1.6241 + ins_pipe( ialu_reg_reg); 1.6242 +%} 1.6243 + 1.6244 + 1.6245 +//----------Load/Store/Move Instructions--------------------------------------- 1.6246 +//----------Load Instructions-------------------------------------------------- 1.6247 +// Load Byte (8bit signed) 1.6248 +instruct loadB(xRegI dst, memory mem) %{ 1.6249 + match(Set dst (LoadB mem)); 1.6250 + 1.6251 + ins_cost(125); 1.6252 + format %{ "MOVSX8 $dst,$mem" %} 1.6253 + opcode(0xBE, 0x0F); 1.6254 + ins_encode( OpcS, OpcP, RegMem(dst,mem)); 1.6255 + ins_pipe( ialu_reg_mem ); 1.6256 +%} 1.6257 + 1.6258 +// Load Byte (8bit UNsigned) 1.6259 +instruct loadUB(xRegI dst, memory mem, immI_255 bytemask) %{ 1.6260 + match(Set dst (AndI (LoadB mem) bytemask)); 1.6261 + 1.6262 + ins_cost(125); 1.6263 + format %{ "MOVZX8 $dst,$mem" %} 1.6264 + opcode(0xB6, 0x0F); 1.6265 + ins_encode( OpcS, OpcP, RegMem(dst,mem)); 1.6266 + ins_pipe( ialu_reg_mem ); 1.6267 +%} 1.6268 + 1.6269 +// Load Char (16bit unsigned) 1.6270 +instruct loadC(eRegI dst, memory mem) %{ 1.6271 + match(Set dst (LoadC mem)); 1.6272 + 1.6273 + ins_cost(125); 1.6274 + format %{ "MOVZX $dst,$mem" %} 1.6275 + opcode(0xB7, 0x0F); 1.6276 + ins_encode( OpcS, OpcP, RegMem(dst,mem)); 1.6277 + ins_pipe( ialu_reg_mem ); 1.6278 +%} 1.6279 + 1.6280 +// Load Integer 1.6281 +instruct loadI(eRegI dst, memory mem) %{ 1.6282 + match(Set dst (LoadI mem)); 1.6283 + 1.6284 + ins_cost(125); 1.6285 + format %{ "MOV $dst,$mem" %} 1.6286 + opcode(0x8B); 1.6287 + ins_encode( OpcP, RegMem(dst,mem)); 1.6288 + ins_pipe( ialu_reg_mem ); 1.6289 +%} 1.6290 + 1.6291 +// Load Long. Cannot clobber address while loading, so restrict address 1.6292 +// register to ESI 1.6293 +instruct loadL(eRegL dst, load_long_memory mem) %{ 1.6294 + predicate(!((LoadLNode*)n)->require_atomic_access()); 1.6295 + match(Set dst (LoadL mem)); 1.6296 + 1.6297 + ins_cost(250); 1.6298 + format %{ "MOV $dst.lo,$mem\n\t" 1.6299 + "MOV $dst.hi,$mem+4" %} 1.6300 + opcode(0x8B, 0x8B); 1.6301 + ins_encode( OpcP, RegMem(dst,mem), OpcS, RegMem_Hi(dst,mem)); 1.6302 + ins_pipe( ialu_reg_long_mem ); 1.6303 +%} 1.6304 + 1.6305 +// Volatile Load Long. Must be atomic, so do 64-bit FILD 1.6306 +// then store it down to the stack and reload on the int 1.6307 +// side. 1.6308 +instruct loadL_volatile(stackSlotL dst, memory mem) %{ 1.6309 + predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 1.6310 + match(Set dst (LoadL mem)); 1.6311 + 1.6312 + ins_cost(200); 1.6313 + format %{ "FILD $mem\t# Atomic volatile long load\n\t" 1.6314 + "FISTp $dst" %} 1.6315 + ins_encode(enc_loadL_volatile(mem,dst)); 1.6316 + ins_pipe( fpu_reg_mem ); 1.6317 +%} 1.6318 + 1.6319 +instruct loadLX_volatile(stackSlotL dst, memory mem, regXD tmp) %{ 1.6320 + predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 1.6321 + match(Set dst (LoadL mem)); 1.6322 + effect(TEMP tmp); 1.6323 + ins_cost(180); 1.6324 + format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 1.6325 + "MOVSD $dst,$tmp" %} 1.6326 + ins_encode(enc_loadLX_volatile(mem, dst, tmp)); 1.6327 + ins_pipe( pipe_slow ); 1.6328 +%} 1.6329 + 1.6330 +instruct loadLX_reg_volatile(eRegL dst, memory mem, regXD tmp) %{ 1.6331 + predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 1.6332 + match(Set dst (LoadL mem)); 1.6333 + effect(TEMP tmp); 1.6334 + ins_cost(160); 1.6335 + format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 1.6336 + "MOVD $dst.lo,$tmp\n\t" 1.6337 + "PSRLQ $tmp,32\n\t" 1.6338 + "MOVD $dst.hi,$tmp" %} 1.6339 + ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp)); 1.6340 + ins_pipe( pipe_slow ); 1.6341 +%} 1.6342 + 1.6343 +// Load Range 1.6344 +instruct loadRange(eRegI dst, memory mem) %{ 1.6345 + match(Set dst (LoadRange mem)); 1.6346 + 1.6347 + ins_cost(125); 1.6348 + format %{ "MOV $dst,$mem" %} 1.6349 + opcode(0x8B); 1.6350 + ins_encode( OpcP, RegMem(dst,mem)); 1.6351 + ins_pipe( ialu_reg_mem ); 1.6352 +%} 1.6353 + 1.6354 + 1.6355 +// Load Pointer 1.6356 +instruct loadP(eRegP dst, memory mem) %{ 1.6357 + match(Set dst (LoadP mem)); 1.6358 + 1.6359 + ins_cost(125); 1.6360 + format %{ "MOV $dst,$mem" %} 1.6361 + opcode(0x8B); 1.6362 + ins_encode( OpcP, RegMem(dst,mem)); 1.6363 + ins_pipe( ialu_reg_mem ); 1.6364 +%} 1.6365 + 1.6366 +// Load Klass Pointer 1.6367 +instruct loadKlass(eRegP dst, memory mem) %{ 1.6368 + match(Set dst (LoadKlass mem)); 1.6369 + 1.6370 + ins_cost(125); 1.6371 + format %{ "MOV $dst,$mem" %} 1.6372 + opcode(0x8B); 1.6373 + ins_encode( OpcP, RegMem(dst,mem)); 1.6374 + ins_pipe( ialu_reg_mem ); 1.6375 +%} 1.6376 + 1.6377 +// Load Short (16bit signed) 1.6378 +instruct loadS(eRegI dst, memory mem) %{ 1.6379 + match(Set dst (LoadS mem)); 1.6380 + 1.6381 + ins_cost(125); 1.6382 + format %{ "MOVSX $dst,$mem" %} 1.6383 + opcode(0xBF, 0x0F); 1.6384 + ins_encode( OpcS, OpcP, RegMem(dst,mem)); 1.6385 + ins_pipe( ialu_reg_mem ); 1.6386 +%} 1.6387 + 1.6388 +// Load Double 1.6389 +instruct loadD(regD dst, memory mem) %{ 1.6390 + predicate(UseSSE<=1); 1.6391 + match(Set dst (LoadD mem)); 1.6392 + 1.6393 + ins_cost(150); 1.6394 + format %{ "FLD_D ST,$mem\n\t" 1.6395 + "FSTP $dst" %} 1.6396 + opcode(0xDD); /* DD /0 */ 1.6397 + ins_encode( OpcP, RMopc_Mem(0x00,mem), 1.6398 + Pop_Reg_D(dst) ); 1.6399 + ins_pipe( fpu_reg_mem ); 1.6400 +%} 1.6401 + 1.6402 +// Load Double to XMM 1.6403 +instruct loadXD(regXD dst, memory mem) %{ 1.6404 + predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 1.6405 + match(Set dst (LoadD mem)); 1.6406 + ins_cost(145); 1.6407 + format %{ "MOVSD $dst,$mem" %} 1.6408 + ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem)); 1.6409 + ins_pipe( pipe_slow ); 1.6410 +%} 1.6411 + 1.6412 +instruct loadXD_partial(regXD dst, memory mem) %{ 1.6413 + predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 1.6414 + match(Set dst (LoadD mem)); 1.6415 + ins_cost(145); 1.6416 + format %{ "MOVLPD $dst,$mem" %} 1.6417 + ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,mem)); 1.6418 + ins_pipe( pipe_slow ); 1.6419 +%} 1.6420 + 1.6421 +// Load to XMM register (single-precision floating point) 1.6422 +// MOVSS instruction 1.6423 +instruct loadX(regX dst, memory mem) %{ 1.6424 + predicate(UseSSE>=1); 1.6425 + match(Set dst (LoadF mem)); 1.6426 + ins_cost(145); 1.6427 + format %{ "MOVSS $dst,$mem" %} 1.6428 + ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem)); 1.6429 + ins_pipe( pipe_slow ); 1.6430 +%} 1.6431 + 1.6432 +// Load Float 1.6433 +instruct loadF(regF dst, memory mem) %{ 1.6434 + predicate(UseSSE==0); 1.6435 + match(Set dst (LoadF mem)); 1.6436 + 1.6437 + ins_cost(150); 1.6438 + format %{ "FLD_S ST,$mem\n\t" 1.6439 + "FSTP $dst" %} 1.6440 + opcode(0xD9); /* D9 /0 */ 1.6441 + ins_encode( OpcP, RMopc_Mem(0x00,mem), 1.6442 + Pop_Reg_F(dst) ); 1.6443 + ins_pipe( fpu_reg_mem ); 1.6444 +%} 1.6445 + 1.6446 +// Load Aligned Packed Byte to XMM register 1.6447 +instruct loadA8B(regXD dst, memory mem) %{ 1.6448 + predicate(UseSSE>=1); 1.6449 + match(Set dst (Load8B mem)); 1.6450 + ins_cost(125); 1.6451 + format %{ "MOVQ $dst,$mem\t! packed8B" %} 1.6452 + ins_encode( movq_ld(dst, mem)); 1.6453 + ins_pipe( pipe_slow ); 1.6454 +%} 1.6455 + 1.6456 +// Load Aligned Packed Short to XMM register 1.6457 +instruct loadA4S(regXD dst, memory mem) %{ 1.6458 + predicate(UseSSE>=1); 1.6459 + match(Set dst (Load4S mem)); 1.6460 + ins_cost(125); 1.6461 + format %{ "MOVQ $dst,$mem\t! packed4S" %} 1.6462 + ins_encode( movq_ld(dst, mem)); 1.6463 + ins_pipe( pipe_slow ); 1.6464 +%} 1.6465 + 1.6466 +// Load Aligned Packed Char to XMM register 1.6467 +instruct loadA4C(regXD dst, memory mem) %{ 1.6468 + predicate(UseSSE>=1); 1.6469 + match(Set dst (Load4C mem)); 1.6470 + ins_cost(125); 1.6471 + format %{ "MOVQ $dst,$mem\t! packed4C" %} 1.6472 + ins_encode( movq_ld(dst, mem)); 1.6473 + ins_pipe( pipe_slow ); 1.6474 +%} 1.6475 + 1.6476 +// Load Aligned Packed Integer to XMM register 1.6477 +instruct load2IU(regXD dst, memory mem) %{ 1.6478 + predicate(UseSSE>=1); 1.6479 + match(Set dst (Load2I mem)); 1.6480 + ins_cost(125); 1.6481 + format %{ "MOVQ $dst,$mem\t! packed2I" %} 1.6482 + ins_encode( movq_ld(dst, mem)); 1.6483 + ins_pipe( pipe_slow ); 1.6484 +%} 1.6485 + 1.6486 +// Load Aligned Packed Single to XMM 1.6487 +instruct loadA2F(regXD dst, memory mem) %{ 1.6488 + predicate(UseSSE>=1); 1.6489 + match(Set dst (Load2F mem)); 1.6490 + ins_cost(145); 1.6491 + format %{ "MOVQ $dst,$mem\t! packed2F" %} 1.6492 + ins_encode( movq_ld(dst, mem)); 1.6493 + ins_pipe( pipe_slow ); 1.6494 +%} 1.6495 + 1.6496 +// Load Effective Address 1.6497 +instruct leaP8(eRegP dst, indOffset8 mem) %{ 1.6498 + match(Set dst mem); 1.6499 + 1.6500 + ins_cost(110); 1.6501 + format %{ "LEA $dst,$mem" %} 1.6502 + opcode(0x8D); 1.6503 + ins_encode( OpcP, RegMem(dst,mem)); 1.6504 + ins_pipe( ialu_reg_reg_fat ); 1.6505 +%} 1.6506 + 1.6507 +instruct leaP32(eRegP dst, indOffset32 mem) %{ 1.6508 + match(Set dst mem); 1.6509 + 1.6510 + ins_cost(110); 1.6511 + format %{ "LEA $dst,$mem" %} 1.6512 + opcode(0x8D); 1.6513 + ins_encode( OpcP, RegMem(dst,mem)); 1.6514 + ins_pipe( ialu_reg_reg_fat ); 1.6515 +%} 1.6516 + 1.6517 +instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 1.6518 + match(Set dst mem); 1.6519 + 1.6520 + ins_cost(110); 1.6521 + format %{ "LEA $dst,$mem" %} 1.6522 + opcode(0x8D); 1.6523 + ins_encode( OpcP, RegMem(dst,mem)); 1.6524 + ins_pipe( ialu_reg_reg_fat ); 1.6525 +%} 1.6526 + 1.6527 +instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 1.6528 + match(Set dst mem); 1.6529 + 1.6530 + ins_cost(110); 1.6531 + format %{ "LEA $dst,$mem" %} 1.6532 + opcode(0x8D); 1.6533 + ins_encode( OpcP, RegMem(dst,mem)); 1.6534 + ins_pipe( ialu_reg_reg_fat ); 1.6535 +%} 1.6536 + 1.6537 +instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 1.6538 + match(Set dst mem); 1.6539 + 1.6540 + ins_cost(110); 1.6541 + format %{ "LEA $dst,$mem" %} 1.6542 + opcode(0x8D); 1.6543 + ins_encode( OpcP, RegMem(dst,mem)); 1.6544 + ins_pipe( ialu_reg_reg_fat ); 1.6545 +%} 1.6546 + 1.6547 +// Load Constant 1.6548 +instruct loadConI(eRegI dst, immI src) %{ 1.6549 + match(Set dst src); 1.6550 + 1.6551 + format %{ "MOV $dst,$src" %} 1.6552 + ins_encode( LdImmI(dst, src) ); 1.6553 + ins_pipe( ialu_reg_fat ); 1.6554 +%} 1.6555 + 1.6556 +// Load Constant zero 1.6557 +instruct loadConI0(eRegI dst, immI0 src, eFlagsReg cr) %{ 1.6558 + match(Set dst src); 1.6559 + effect(KILL cr); 1.6560 + 1.6561 + ins_cost(50); 1.6562 + format %{ "XOR $dst,$dst" %} 1.6563 + opcode(0x33); /* + rd */ 1.6564 + ins_encode( OpcP, RegReg( dst, dst ) ); 1.6565 + ins_pipe( ialu_reg ); 1.6566 +%} 1.6567 + 1.6568 +instruct loadConP(eRegP dst, immP src) %{ 1.6569 + match(Set dst src); 1.6570 + 1.6571 + format %{ "MOV $dst,$src" %} 1.6572 + opcode(0xB8); /* + rd */ 1.6573 + ins_encode( LdImmP(dst, src) ); 1.6574 + ins_pipe( ialu_reg_fat ); 1.6575 +%} 1.6576 + 1.6577 +instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 1.6578 + match(Set dst src); 1.6579 + effect(KILL cr); 1.6580 + ins_cost(200); 1.6581 + format %{ "MOV $dst.lo,$src.lo\n\t" 1.6582 + "MOV $dst.hi,$src.hi" %} 1.6583 + opcode(0xB8); 1.6584 + ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 1.6585 + ins_pipe( ialu_reg_long_fat ); 1.6586 +%} 1.6587 + 1.6588 +instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 1.6589 + match(Set dst src); 1.6590 + effect(KILL cr); 1.6591 + ins_cost(150); 1.6592 + format %{ "XOR $dst.lo,$dst.lo\n\t" 1.6593 + "XOR $dst.hi,$dst.hi" %} 1.6594 + opcode(0x33,0x33); 1.6595 + ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 1.6596 + ins_pipe( ialu_reg_long ); 1.6597 +%} 1.6598 + 1.6599 +// The instruction usage is guarded by predicate in operand immF(). 1.6600 +instruct loadConF(regF dst, immF src) %{ 1.6601 + match(Set dst src); 1.6602 + ins_cost(125); 1.6603 + 1.6604 + format %{ "FLD_S ST,$src\n\t" 1.6605 + "FSTP $dst" %} 1.6606 + opcode(0xD9, 0x00); /* D9 /0 */ 1.6607 + ins_encode(LdImmF(src), Pop_Reg_F(dst) ); 1.6608 + ins_pipe( fpu_reg_con ); 1.6609 +%} 1.6610 + 1.6611 +// The instruction usage is guarded by predicate in operand immXF(). 1.6612 +instruct loadConX(regX dst, immXF con) %{ 1.6613 + match(Set dst con); 1.6614 + ins_cost(125); 1.6615 + format %{ "MOVSS $dst,[$con]" %} 1.6616 + ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), LdImmX(dst, con)); 1.6617 + ins_pipe( pipe_slow ); 1.6618 +%} 1.6619 + 1.6620 +// The instruction usage is guarded by predicate in operand immXF0(). 1.6621 +instruct loadConX0(regX dst, immXF0 src) %{ 1.6622 + match(Set dst src); 1.6623 + ins_cost(100); 1.6624 + format %{ "XORPS $dst,$dst\t# float 0.0" %} 1.6625 + ins_encode( Opcode(0x0F), Opcode(0x57), RegReg(dst,dst)); 1.6626 + ins_pipe( pipe_slow ); 1.6627 +%} 1.6628 + 1.6629 +// The instruction usage is guarded by predicate in operand immD(). 1.6630 +instruct loadConD(regD dst, immD src) %{ 1.6631 + match(Set dst src); 1.6632 + ins_cost(125); 1.6633 + 1.6634 + format %{ "FLD_D ST,$src\n\t" 1.6635 + "FSTP $dst" %} 1.6636 + ins_encode(LdImmD(src), Pop_Reg_D(dst) ); 1.6637 + ins_pipe( fpu_reg_con ); 1.6638 +%} 1.6639 + 1.6640 +// The instruction usage is guarded by predicate in operand immXD(). 1.6641 +instruct loadConXD(regXD dst, immXD con) %{ 1.6642 + match(Set dst con); 1.6643 + ins_cost(125); 1.6644 + format %{ "MOVSD $dst,[$con]" %} 1.6645 + ins_encode(load_conXD(dst, con)); 1.6646 + ins_pipe( pipe_slow ); 1.6647 +%} 1.6648 + 1.6649 +// The instruction usage is guarded by predicate in operand immXD0(). 1.6650 +instruct loadConXD0(regXD dst, immXD0 src) %{ 1.6651 + match(Set dst src); 1.6652 + ins_cost(100); 1.6653 + format %{ "XORPD $dst,$dst\t# double 0.0" %} 1.6654 + ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x57), RegReg(dst,dst)); 1.6655 + ins_pipe( pipe_slow ); 1.6656 +%} 1.6657 + 1.6658 +// Load Stack Slot 1.6659 +instruct loadSSI(eRegI dst, stackSlotI src) %{ 1.6660 + match(Set dst src); 1.6661 + ins_cost(125); 1.6662 + 1.6663 + format %{ "MOV $dst,$src" %} 1.6664 + opcode(0x8B); 1.6665 + ins_encode( OpcP, RegMem(dst,src)); 1.6666 + ins_pipe( ialu_reg_mem ); 1.6667 +%} 1.6668 + 1.6669 +instruct loadSSL(eRegL dst, stackSlotL src) %{ 1.6670 + match(Set dst src); 1.6671 + 1.6672 + ins_cost(200); 1.6673 + format %{ "MOV $dst,$src.lo\n\t" 1.6674 + "MOV $dst+4,$src.hi" %} 1.6675 + opcode(0x8B, 0x8B); 1.6676 + ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 1.6677 + ins_pipe( ialu_mem_long_reg ); 1.6678 +%} 1.6679 + 1.6680 +// Load Stack Slot 1.6681 +instruct loadSSP(eRegP dst, stackSlotP src) %{ 1.6682 + match(Set dst src); 1.6683 + ins_cost(125); 1.6684 + 1.6685 + format %{ "MOV $dst,$src" %} 1.6686 + opcode(0x8B); 1.6687 + ins_encode( OpcP, RegMem(dst,src)); 1.6688 + ins_pipe( ialu_reg_mem ); 1.6689 +%} 1.6690 + 1.6691 +// Load Stack Slot 1.6692 +instruct loadSSF(regF dst, stackSlotF src) %{ 1.6693 + match(Set dst src); 1.6694 + ins_cost(125); 1.6695 + 1.6696 + format %{ "FLD_S $src\n\t" 1.6697 + "FSTP $dst" %} 1.6698 + opcode(0xD9); /* D9 /0, FLD m32real */ 1.6699 + ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 1.6700 + Pop_Reg_F(dst) ); 1.6701 + ins_pipe( fpu_reg_mem ); 1.6702 +%} 1.6703 + 1.6704 +// Load Stack Slot 1.6705 +instruct loadSSD(regD dst, stackSlotD src) %{ 1.6706 + match(Set dst src); 1.6707 + ins_cost(125); 1.6708 + 1.6709 + format %{ "FLD_D $src\n\t" 1.6710 + "FSTP $dst" %} 1.6711 + opcode(0xDD); /* DD /0, FLD m64real */ 1.6712 + ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 1.6713 + Pop_Reg_D(dst) ); 1.6714 + ins_pipe( fpu_reg_mem ); 1.6715 +%} 1.6716 + 1.6717 +// Prefetch instructions. 1.6718 +// Must be safe to execute with invalid address (cannot fault). 1.6719 + 1.6720 +instruct prefetchr0( memory mem ) %{ 1.6721 + predicate(UseSSE==0 && !VM_Version::supports_3dnow()); 1.6722 + match(PrefetchRead mem); 1.6723 + ins_cost(0); 1.6724 + size(0); 1.6725 + format %{ "PREFETCHR (non-SSE is empty encoding)" %} 1.6726 + ins_encode(); 1.6727 + ins_pipe(empty); 1.6728 +%} 1.6729 + 1.6730 +instruct prefetchr( memory mem ) %{ 1.6731 + predicate(UseSSE==0 && VM_Version::supports_3dnow() || ReadPrefetchInstr==3); 1.6732 + match(PrefetchRead mem); 1.6733 + ins_cost(100); 1.6734 + 1.6735 + format %{ "PREFETCHR $mem\t! Prefetch into level 1 cache for read" %} 1.6736 + opcode(0x0F, 0x0d); /* Opcode 0F 0d /0 */ 1.6737 + ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem)); 1.6738 + ins_pipe(ialu_mem); 1.6739 +%} 1.6740 + 1.6741 +instruct prefetchrNTA( memory mem ) %{ 1.6742 + predicate(UseSSE>=1 && ReadPrefetchInstr==0); 1.6743 + match(PrefetchRead mem); 1.6744 + ins_cost(100); 1.6745 + 1.6746 + format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for read" %} 1.6747 + opcode(0x0F, 0x18); /* Opcode 0F 18 /0 */ 1.6748 + ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem)); 1.6749 + ins_pipe(ialu_mem); 1.6750 +%} 1.6751 + 1.6752 +instruct prefetchrT0( memory mem ) %{ 1.6753 + predicate(UseSSE>=1 && ReadPrefetchInstr==1); 1.6754 + match(PrefetchRead mem); 1.6755 + ins_cost(100); 1.6756 + 1.6757 + format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for read" %} 1.6758 + opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */ 1.6759 + ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem)); 1.6760 + ins_pipe(ialu_mem); 1.6761 +%} 1.6762 + 1.6763 +instruct prefetchrT2( memory mem ) %{ 1.6764 + predicate(UseSSE>=1 && ReadPrefetchInstr==2); 1.6765 + match(PrefetchRead mem); 1.6766 + ins_cost(100); 1.6767 + 1.6768 + format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for read" %} 1.6769 + opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */ 1.6770 + ins_encode(OpcP, OpcS, RMopc_Mem(0x03,mem)); 1.6771 + ins_pipe(ialu_mem); 1.6772 +%} 1.6773 + 1.6774 +instruct prefetchw0( memory mem ) %{ 1.6775 + predicate(UseSSE==0 && !VM_Version::supports_3dnow()); 1.6776 + match(PrefetchWrite mem); 1.6777 + ins_cost(0); 1.6778 + size(0); 1.6779 + format %{ "Prefetch (non-SSE is empty encoding)" %} 1.6780 + ins_encode(); 1.6781 + ins_pipe(empty); 1.6782 +%} 1.6783 + 1.6784 +instruct prefetchw( memory mem ) %{ 1.6785 + predicate(UseSSE==0 && VM_Version::supports_3dnow() || AllocatePrefetchInstr==3); 1.6786 + match( PrefetchWrite mem ); 1.6787 + ins_cost(100); 1.6788 + 1.6789 + format %{ "PREFETCHW $mem\t! Prefetch into L1 cache and mark modified" %} 1.6790 + opcode(0x0F, 0x0D); /* Opcode 0F 0D /1 */ 1.6791 + ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem)); 1.6792 + ins_pipe(ialu_mem); 1.6793 +%} 1.6794 + 1.6795 +instruct prefetchwNTA( memory mem ) %{ 1.6796 + predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 1.6797 + match(PrefetchWrite mem); 1.6798 + ins_cost(100); 1.6799 + 1.6800 + format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for write" %} 1.6801 + opcode(0x0F, 0x18); /* Opcode 0F 18 /0 */ 1.6802 + ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem)); 1.6803 + ins_pipe(ialu_mem); 1.6804 +%} 1.6805 + 1.6806 +instruct prefetchwT0( memory mem ) %{ 1.6807 + predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 1.6808 + match(PrefetchWrite mem); 1.6809 + ins_cost(100); 1.6810 + 1.6811 + format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for write" %} 1.6812 + opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */ 1.6813 + ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem)); 1.6814 + ins_pipe(ialu_mem); 1.6815 +%} 1.6816 + 1.6817 +instruct prefetchwT2( memory mem ) %{ 1.6818 + predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 1.6819 + match(PrefetchWrite mem); 1.6820 + ins_cost(100); 1.6821 + 1.6822 + format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for write" %} 1.6823 + opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */ 1.6824 + ins_encode(OpcP, OpcS, RMopc_Mem(0x03,mem)); 1.6825 + ins_pipe(ialu_mem); 1.6826 +%} 1.6827 + 1.6828 +//----------Store Instructions------------------------------------------------- 1.6829 + 1.6830 +// Store Byte 1.6831 +instruct storeB(memory mem, xRegI src) %{ 1.6832 + match(Set mem (StoreB mem src)); 1.6833 + 1.6834 + ins_cost(125); 1.6835 + format %{ "MOV8 $mem,$src" %} 1.6836 + opcode(0x88); 1.6837 + ins_encode( OpcP, RegMem( src, mem ) ); 1.6838 + ins_pipe( ialu_mem_reg ); 1.6839 +%} 1.6840 + 1.6841 +// Store Char/Short 1.6842 +instruct storeC(memory mem, eRegI src) %{ 1.6843 + match(Set mem (StoreC mem src)); 1.6844 + 1.6845 + ins_cost(125); 1.6846 + format %{ "MOV16 $mem,$src" %} 1.6847 + opcode(0x89, 0x66); 1.6848 + ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 1.6849 + ins_pipe( ialu_mem_reg ); 1.6850 +%} 1.6851 + 1.6852 +// Store Integer 1.6853 +instruct storeI(memory mem, eRegI src) %{ 1.6854 + match(Set mem (StoreI mem src)); 1.6855 + 1.6856 + ins_cost(125); 1.6857 + format %{ "MOV $mem,$src" %} 1.6858 + opcode(0x89); 1.6859 + ins_encode( OpcP, RegMem( src, mem ) ); 1.6860 + ins_pipe( ialu_mem_reg ); 1.6861 +%} 1.6862 + 1.6863 +// Store Long 1.6864 +instruct storeL(long_memory mem, eRegL src) %{ 1.6865 + predicate(!((StoreLNode*)n)->require_atomic_access()); 1.6866 + match(Set mem (StoreL mem src)); 1.6867 + 1.6868 + ins_cost(200); 1.6869 + format %{ "MOV $mem,$src.lo\n\t" 1.6870 + "MOV $mem+4,$src.hi" %} 1.6871 + opcode(0x89, 0x89); 1.6872 + ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 1.6873 + ins_pipe( ialu_mem_long_reg ); 1.6874 +%} 1.6875 + 1.6876 +// Volatile Store Long. Must be atomic, so move it into 1.6877 +// the FP TOS and then do a 64-bit FIST. Has to probe the 1.6878 +// target address before the store (for null-ptr checks) 1.6879 +// so the memory operand is used twice in the encoding. 1.6880 +instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 1.6881 + predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 1.6882 + match(Set mem (StoreL mem src)); 1.6883 + effect( KILL cr ); 1.6884 + ins_cost(400); 1.6885 + format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 1.6886 + "FILD $src\n\t" 1.6887 + "FISTp $mem\t # 64-bit atomic volatile long store" %} 1.6888 + opcode(0x3B); 1.6889 + ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 1.6890 + ins_pipe( fpu_reg_mem ); 1.6891 +%} 1.6892 + 1.6893 +instruct storeLX_volatile(memory mem, stackSlotL src, regXD tmp, eFlagsReg cr) %{ 1.6894 + predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 1.6895 + match(Set mem (StoreL mem src)); 1.6896 + effect( TEMP tmp, KILL cr ); 1.6897 + ins_cost(380); 1.6898 + format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 1.6899 + "MOVSD $tmp,$src\n\t" 1.6900 + "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 1.6901 + opcode(0x3B); 1.6902 + ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_volatile(mem, src, tmp)); 1.6903 + ins_pipe( pipe_slow ); 1.6904 +%} 1.6905 + 1.6906 +instruct storeLX_reg_volatile(memory mem, eRegL src, regXD tmp2, regXD tmp, eFlagsReg cr) %{ 1.6907 + predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 1.6908 + match(Set mem (StoreL mem src)); 1.6909 + effect( TEMP tmp2 , TEMP tmp, KILL cr ); 1.6910 + ins_cost(360); 1.6911 + format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 1.6912 + "MOVD $tmp,$src.lo\n\t" 1.6913 + "MOVD $tmp2,$src.hi\n\t" 1.6914 + "PUNPCKLDQ $tmp,$tmp2\n\t" 1.6915 + "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 1.6916 + opcode(0x3B); 1.6917 + ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_reg_volatile(mem, src, tmp, tmp2)); 1.6918 + ins_pipe( pipe_slow ); 1.6919 +%} 1.6920 + 1.6921 +// Store Pointer; for storing unknown oops and raw pointers 1.6922 +instruct storeP(memory mem, anyRegP src) %{ 1.6923 + match(Set mem (StoreP mem src)); 1.6924 + 1.6925 + ins_cost(125); 1.6926 + format %{ "MOV $mem,$src" %} 1.6927 + opcode(0x89); 1.6928 + ins_encode( OpcP, RegMem( src, mem ) ); 1.6929 + ins_pipe( ialu_mem_reg ); 1.6930 +%} 1.6931 + 1.6932 +// Store Integer Immediate 1.6933 +instruct storeImmI(memory mem, immI src) %{ 1.6934 + match(Set mem (StoreI mem src)); 1.6935 + 1.6936 + ins_cost(150); 1.6937 + format %{ "MOV $mem,$src" %} 1.6938 + opcode(0xC7); /* C7 /0 */ 1.6939 + ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 1.6940 + ins_pipe( ialu_mem_imm ); 1.6941 +%} 1.6942 + 1.6943 +// Store Short/Char Immediate 1.6944 +instruct storeImmI16(memory mem, immI16 src) %{ 1.6945 + predicate(UseStoreImmI16); 1.6946 + match(Set mem (StoreC mem src)); 1.6947 + 1.6948 + ins_cost(150); 1.6949 + format %{ "MOV16 $mem,$src" %} 1.6950 + opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 1.6951 + ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 1.6952 + ins_pipe( ialu_mem_imm ); 1.6953 +%} 1.6954 + 1.6955 +// Store Pointer Immediate; null pointers or constant oops that do not 1.6956 +// need card-mark barriers. 1.6957 +instruct storeImmP(memory mem, immP src) %{ 1.6958 + match(Set mem (StoreP mem src)); 1.6959 + 1.6960 + ins_cost(150); 1.6961 + format %{ "MOV $mem,$src" %} 1.6962 + opcode(0xC7); /* C7 /0 */ 1.6963 + ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 1.6964 + ins_pipe( ialu_mem_imm ); 1.6965 +%} 1.6966 + 1.6967 +// Store Byte Immediate 1.6968 +instruct storeImmB(memory mem, immI8 src) %{ 1.6969 + match(Set mem (StoreB mem src)); 1.6970 + 1.6971 + ins_cost(150); 1.6972 + format %{ "MOV8 $mem,$src" %} 1.6973 + opcode(0xC6); /* C6 /0 */ 1.6974 + ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 1.6975 + ins_pipe( ialu_mem_imm ); 1.6976 +%} 1.6977 + 1.6978 +// Store Aligned Packed Byte XMM register to memory 1.6979 +instruct storeA8B(memory mem, regXD src) %{ 1.6980 + predicate(UseSSE>=1); 1.6981 + match(Set mem (Store8B mem src)); 1.6982 + ins_cost(145); 1.6983 + format %{ "MOVQ $mem,$src\t! packed8B" %} 1.6984 + ins_encode( movq_st(mem, src)); 1.6985 + ins_pipe( pipe_slow ); 1.6986 +%} 1.6987 + 1.6988 +// Store Aligned Packed Char/Short XMM register to memory 1.6989 +instruct storeA4C(memory mem, regXD src) %{ 1.6990 + predicate(UseSSE>=1); 1.6991 + match(Set mem (Store4C mem src)); 1.6992 + ins_cost(145); 1.6993 + format %{ "MOVQ $mem,$src\t! packed4C" %} 1.6994 + ins_encode( movq_st(mem, src)); 1.6995 + ins_pipe( pipe_slow ); 1.6996 +%} 1.6997 + 1.6998 +// Store Aligned Packed Integer XMM register to memory 1.6999 +instruct storeA2I(memory mem, regXD src) %{ 1.7000 + predicate(UseSSE>=1); 1.7001 + match(Set mem (Store2I mem src)); 1.7002 + ins_cost(145); 1.7003 + format %{ "MOVQ $mem,$src\t! packed2I" %} 1.7004 + ins_encode( movq_st(mem, src)); 1.7005 + ins_pipe( pipe_slow ); 1.7006 +%} 1.7007 + 1.7008 +// Store CMS card-mark Immediate 1.7009 +instruct storeImmCM(memory mem, immI8 src) %{ 1.7010 + match(Set mem (StoreCM mem src)); 1.7011 + 1.7012 + ins_cost(150); 1.7013 + format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 1.7014 + opcode(0xC6); /* C6 /0 */ 1.7015 + ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 1.7016 + ins_pipe( ialu_mem_imm ); 1.7017 +%} 1.7018 + 1.7019 +// Store Double 1.7020 +instruct storeD( memory mem, regDPR1 src) %{ 1.7021 + predicate(UseSSE<=1); 1.7022 + match(Set mem (StoreD mem src)); 1.7023 + 1.7024 + ins_cost(100); 1.7025 + format %{ "FST_D $mem,$src" %} 1.7026 + opcode(0xDD); /* DD /2 */ 1.7027 + ins_encode( enc_FP_store(mem,src) ); 1.7028 + ins_pipe( fpu_mem_reg ); 1.7029 +%} 1.7030 + 1.7031 +// Store double does rounding on x86 1.7032 +instruct storeD_rounded( memory mem, regDPR1 src) %{ 1.7033 + predicate(UseSSE<=1); 1.7034 + match(Set mem (StoreD mem (RoundDouble src))); 1.7035 + 1.7036 + ins_cost(100); 1.7037 + format %{ "FST_D $mem,$src\t# round" %} 1.7038 + opcode(0xDD); /* DD /2 */ 1.7039 + ins_encode( enc_FP_store(mem,src) ); 1.7040 + ins_pipe( fpu_mem_reg ); 1.7041 +%} 1.7042 + 1.7043 +// Store XMM register to memory (double-precision floating points) 1.7044 +// MOVSD instruction 1.7045 +instruct storeXD(memory mem, regXD src) %{ 1.7046 + predicate(UseSSE>=2); 1.7047 + match(Set mem (StoreD mem src)); 1.7048 + ins_cost(95); 1.7049 + format %{ "MOVSD $mem,$src" %} 1.7050 + ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src, mem)); 1.7051 + ins_pipe( pipe_slow ); 1.7052 +%} 1.7053 + 1.7054 +// Store XMM register to memory (single-precision floating point) 1.7055 +// MOVSS instruction 1.7056 +instruct storeX(memory mem, regX src) %{ 1.7057 + predicate(UseSSE>=1); 1.7058 + match(Set mem (StoreF mem src)); 1.7059 + ins_cost(95); 1.7060 + format %{ "MOVSS $mem,$src" %} 1.7061 + ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, mem)); 1.7062 + ins_pipe( pipe_slow ); 1.7063 +%} 1.7064 + 1.7065 +// Store Aligned Packed Single Float XMM register to memory 1.7066 +instruct storeA2F(memory mem, regXD src) %{ 1.7067 + predicate(UseSSE>=1); 1.7068 + match(Set mem (Store2F mem src)); 1.7069 + ins_cost(145); 1.7070 + format %{ "MOVQ $mem,$src\t! packed2F" %} 1.7071 + ins_encode( movq_st(mem, src)); 1.7072 + ins_pipe( pipe_slow ); 1.7073 +%} 1.7074 + 1.7075 +// Store Float 1.7076 +instruct storeF( memory mem, regFPR1 src) %{ 1.7077 + predicate(UseSSE==0); 1.7078 + match(Set mem (StoreF mem src)); 1.7079 + 1.7080 + ins_cost(100); 1.7081 + format %{ "FST_S $mem,$src" %} 1.7082 + opcode(0xD9); /* D9 /2 */ 1.7083 + ins_encode( enc_FP_store(mem,src) ); 1.7084 + ins_pipe( fpu_mem_reg ); 1.7085 +%} 1.7086 + 1.7087 +// Store Float does rounding on x86 1.7088 +instruct storeF_rounded( memory mem, regFPR1 src) %{ 1.7089 + predicate(UseSSE==0); 1.7090 + match(Set mem (StoreF mem (RoundFloat src))); 1.7091 + 1.7092 + ins_cost(100); 1.7093 + format %{ "FST_S $mem,$src\t# round" %} 1.7094 + opcode(0xD9); /* D9 /2 */ 1.7095 + ins_encode( enc_FP_store(mem,src) ); 1.7096 + ins_pipe( fpu_mem_reg ); 1.7097 +%} 1.7098 + 1.7099 +// Store Float does rounding on x86 1.7100 +instruct storeF_Drounded( memory mem, regDPR1 src) %{ 1.7101 + predicate(UseSSE<=1); 1.7102 + match(Set mem (StoreF mem (ConvD2F src))); 1.7103 + 1.7104 + ins_cost(100); 1.7105 + format %{ "FST_S $mem,$src\t# D-round" %} 1.7106 + opcode(0xD9); /* D9 /2 */ 1.7107 + ins_encode( enc_FP_store(mem,src) ); 1.7108 + ins_pipe( fpu_mem_reg ); 1.7109 +%} 1.7110 + 1.7111 +// Store immediate Float value (it is faster than store from FPU register) 1.7112 +// The instruction usage is guarded by predicate in operand immF(). 1.7113 +instruct storeF_imm( memory mem, immF src) %{ 1.7114 + match(Set mem (StoreF mem src)); 1.7115 + 1.7116 + ins_cost(50); 1.7117 + format %{ "MOV $mem,$src\t# store float" %} 1.7118 + opcode(0xC7); /* C7 /0 */ 1.7119 + ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 1.7120 + ins_pipe( ialu_mem_imm ); 1.7121 +%} 1.7122 + 1.7123 +// Store immediate Float value (it is faster than store from XMM register) 1.7124 +// The instruction usage is guarded by predicate in operand immXF(). 1.7125 +instruct storeX_imm( memory mem, immXF src) %{ 1.7126 + match(Set mem (StoreF mem src)); 1.7127 + 1.7128 + ins_cost(50); 1.7129 + format %{ "MOV $mem,$src\t# store float" %} 1.7130 + opcode(0xC7); /* C7 /0 */ 1.7131 + ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32XF_as_bits( src )); 1.7132 + ins_pipe( ialu_mem_imm ); 1.7133 +%} 1.7134 + 1.7135 +// Store Integer to stack slot 1.7136 +instruct storeSSI(stackSlotI dst, eRegI src) %{ 1.7137 + match(Set dst src); 1.7138 + 1.7139 + ins_cost(100); 1.7140 + format %{ "MOV $dst,$src" %} 1.7141 + opcode(0x89); 1.7142 + ins_encode( OpcPRegSS( dst, src ) ); 1.7143 + ins_pipe( ialu_mem_reg ); 1.7144 +%} 1.7145 + 1.7146 +// Store Integer to stack slot 1.7147 +instruct storeSSP(stackSlotP dst, eRegP src) %{ 1.7148 + match(Set dst src); 1.7149 + 1.7150 + ins_cost(100); 1.7151 + format %{ "MOV $dst,$src" %} 1.7152 + opcode(0x89); 1.7153 + ins_encode( OpcPRegSS( dst, src ) ); 1.7154 + ins_pipe( ialu_mem_reg ); 1.7155 +%} 1.7156 + 1.7157 +// Store Long to stack slot 1.7158 +instruct storeSSL(stackSlotL dst, eRegL src) %{ 1.7159 + match(Set dst src); 1.7160 + 1.7161 + ins_cost(200); 1.7162 + format %{ "MOV $dst,$src.lo\n\t" 1.7163 + "MOV $dst+4,$src.hi" %} 1.7164 + opcode(0x89, 0x89); 1.7165 + ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 1.7166 + ins_pipe( ialu_mem_long_reg ); 1.7167 +%} 1.7168 + 1.7169 +//----------MemBar Instructions----------------------------------------------- 1.7170 +// Memory barrier flavors 1.7171 + 1.7172 +instruct membar_acquire() %{ 1.7173 + match(MemBarAcquire); 1.7174 + ins_cost(400); 1.7175 + 1.7176 + size(0); 1.7177 + format %{ "MEMBAR-acquire" %} 1.7178 + ins_encode( enc_membar_acquire ); 1.7179 + ins_pipe(pipe_slow); 1.7180 +%} 1.7181 + 1.7182 +instruct membar_acquire_lock() %{ 1.7183 + match(MemBarAcquire); 1.7184 + predicate(Matcher::prior_fast_lock(n)); 1.7185 + ins_cost(0); 1.7186 + 1.7187 + size(0); 1.7188 + format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 1.7189 + ins_encode( ); 1.7190 + ins_pipe(empty); 1.7191 +%} 1.7192 + 1.7193 +instruct membar_release() %{ 1.7194 + match(MemBarRelease); 1.7195 + ins_cost(400); 1.7196 + 1.7197 + size(0); 1.7198 + format %{ "MEMBAR-release" %} 1.7199 + ins_encode( enc_membar_release ); 1.7200 + ins_pipe(pipe_slow); 1.7201 +%} 1.7202 + 1.7203 +instruct membar_release_lock() %{ 1.7204 + match(MemBarRelease); 1.7205 + predicate(Matcher::post_fast_unlock(n)); 1.7206 + ins_cost(0); 1.7207 + 1.7208 + size(0); 1.7209 + format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 1.7210 + ins_encode( ); 1.7211 + ins_pipe(empty); 1.7212 +%} 1.7213 + 1.7214 +instruct membar_volatile() %{ 1.7215 + match(MemBarVolatile); 1.7216 + ins_cost(400); 1.7217 + 1.7218 + format %{ "MEMBAR-volatile" %} 1.7219 + ins_encode( enc_membar_volatile ); 1.7220 + ins_pipe(pipe_slow); 1.7221 +%} 1.7222 + 1.7223 +instruct unnecessary_membar_volatile() %{ 1.7224 + match(MemBarVolatile); 1.7225 + predicate(Matcher::post_store_load_barrier(n)); 1.7226 + ins_cost(0); 1.7227 + 1.7228 + size(0); 1.7229 + format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 1.7230 + ins_encode( ); 1.7231 + ins_pipe(empty); 1.7232 +%} 1.7233 + 1.7234 +//----------Move Instructions-------------------------------------------------- 1.7235 +instruct castX2P(eAXRegP dst, eAXRegI src) %{ 1.7236 + match(Set dst (CastX2P src)); 1.7237 + format %{ "# X2P $dst, $src" %} 1.7238 + ins_encode( /*empty encoding*/ ); 1.7239 + ins_cost(0); 1.7240 + ins_pipe(empty); 1.7241 +%} 1.7242 + 1.7243 +instruct castP2X(eRegI dst, eRegP src ) %{ 1.7244 + match(Set dst (CastP2X src)); 1.7245 + ins_cost(50); 1.7246 + format %{ "MOV $dst, $src\t# CastP2X" %} 1.7247 + ins_encode( enc_Copy( dst, src) ); 1.7248 + ins_pipe( ialu_reg_reg ); 1.7249 +%} 1.7250 + 1.7251 +//----------Conditional Move--------------------------------------------------- 1.7252 +// Conditional move 1.7253 +instruct cmovI_reg(eRegI dst, eRegI src, eFlagsReg cr, cmpOp cop ) %{ 1.7254 + predicate(VM_Version::supports_cmov() ); 1.7255 + match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 1.7256 + ins_cost(200); 1.7257 + format %{ "CMOV$cop $dst,$src" %} 1.7258 + opcode(0x0F,0x40); 1.7259 + ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 1.7260 + ins_pipe( pipe_cmov_reg ); 1.7261 +%} 1.7262 + 1.7263 +instruct cmovI_regU( eRegI dst, eRegI src, eFlagsRegU cr, cmpOpU cop ) %{ 1.7264 + predicate(VM_Version::supports_cmov() ); 1.7265 + match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 1.7266 + ins_cost(200); 1.7267 + format %{ "CMOV$cop $dst,$src" %} 1.7268 + opcode(0x0F,0x40); 1.7269 + ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 1.7270 + ins_pipe( pipe_cmov_reg ); 1.7271 +%} 1.7272 + 1.7273 +// Conditional move 1.7274 +instruct cmovI_mem(cmpOp cop, eFlagsReg cr, eRegI dst, memory src) %{ 1.7275 + predicate(VM_Version::supports_cmov() ); 1.7276 + match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 1.7277 + ins_cost(250); 1.7278 + format %{ "CMOV$cop $dst,$src" %} 1.7279 + opcode(0x0F,0x40); 1.7280 + ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 1.7281 + ins_pipe( pipe_cmov_mem ); 1.7282 +%} 1.7283 + 1.7284 +// Conditional move 1.7285 +instruct cmovI_memu(cmpOpU cop, eFlagsRegU cr, eRegI dst, memory src) %{ 1.7286 + predicate(VM_Version::supports_cmov() ); 1.7287 + match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 1.7288 + ins_cost(250); 1.7289 + format %{ "CMOV$cop $dst,$src" %} 1.7290 + opcode(0x0F,0x40); 1.7291 + ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 1.7292 + ins_pipe( pipe_cmov_mem ); 1.7293 +%} 1.7294 + 1.7295 +// Conditional move 1.7296 +instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 1.7297 + predicate(VM_Version::supports_cmov() ); 1.7298 + match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 1.7299 + ins_cost(200); 1.7300 + format %{ "CMOV$cop $dst,$src\t# ptr" %} 1.7301 + opcode(0x0F,0x40); 1.7302 + ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 1.7303 + ins_pipe( pipe_cmov_reg ); 1.7304 +%} 1.7305 + 1.7306 +// Conditional move (non-P6 version) 1.7307 +// Note: a CMoveP is generated for stubs and native wrappers 1.7308 +// regardless of whether we are on a P6, so we 1.7309 +// emulate a cmov here 1.7310 +instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 1.7311 + match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 1.7312 + ins_cost(300); 1.7313 + format %{ "Jn$cop skip\n\t" 1.7314 + "MOV $dst,$src\t# pointer\n" 1.7315 + "skip:" %} 1.7316 + opcode(0x8b); 1.7317 + ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 1.7318 + ins_pipe( pipe_cmov_reg ); 1.7319 +%} 1.7320 + 1.7321 +// Conditional move 1.7322 +instruct cmovP_regU(eRegP dst, eRegP src, eFlagsRegU cr, cmpOpU cop ) %{ 1.7323 + predicate(VM_Version::supports_cmov() ); 1.7324 + match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 1.7325 + ins_cost(200); 1.7326 + format %{ "CMOV$cop $dst,$src\t# ptr" %} 1.7327 + opcode(0x0F,0x40); 1.7328 + ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 1.7329 + ins_pipe( pipe_cmov_reg ); 1.7330 +%} 1.7331 + 1.7332 +// DISABLED: Requires the ADLC to emit a bottom_type call that 1.7333 +// correctly meets the two pointer arguments; one is an incoming 1.7334 +// register but the other is a memory operand. ALSO appears to 1.7335 +// be buggy with implicit null checks. 1.7336 +// 1.7337 +//// Conditional move 1.7338 +//instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 1.7339 +// predicate(VM_Version::supports_cmov() ); 1.7340 +// match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 1.7341 +// ins_cost(250); 1.7342 +// format %{ "CMOV$cop $dst,$src\t# ptr" %} 1.7343 +// opcode(0x0F,0x40); 1.7344 +// ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 1.7345 +// ins_pipe( pipe_cmov_mem ); 1.7346 +//%} 1.7347 +// 1.7348 +//// Conditional move 1.7349 +//instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 1.7350 +// predicate(VM_Version::supports_cmov() ); 1.7351 +// match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 1.7352 +// ins_cost(250); 1.7353 +// format %{ "CMOV$cop $dst,$src\t# ptr" %} 1.7354 +// opcode(0x0F,0x40); 1.7355 +// ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 1.7356 +// ins_pipe( pipe_cmov_mem ); 1.7357 +//%} 1.7358 + 1.7359 +// Conditional move 1.7360 +instruct fcmovD_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regD src) %{ 1.7361 + predicate(UseSSE<=1); 1.7362 + match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 1.7363 + ins_cost(200); 1.7364 + format %{ "FCMOV$cop $dst,$src\t# double" %} 1.7365 + opcode(0xDA); 1.7366 + ins_encode( enc_cmov_d(cop,src) ); 1.7367 + ins_pipe( pipe_cmovD_reg ); 1.7368 +%} 1.7369 + 1.7370 +// Conditional move 1.7371 +instruct fcmovF_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regF src) %{ 1.7372 + predicate(UseSSE==0); 1.7373 + match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 1.7374 + ins_cost(200); 1.7375 + format %{ "FCMOV$cop $dst,$src\t# float" %} 1.7376 + opcode(0xDA); 1.7377 + ins_encode( enc_cmov_d(cop,src) ); 1.7378 + ins_pipe( pipe_cmovD_reg ); 1.7379 +%} 1.7380 + 1.7381 +// Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 1.7382 +instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 1.7383 + predicate(UseSSE<=1); 1.7384 + match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 1.7385 + ins_cost(200); 1.7386 + format %{ "Jn$cop skip\n\t" 1.7387 + "MOV $dst,$src\t# double\n" 1.7388 + "skip:" %} 1.7389 + opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 1.7390 + ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_D(src), OpcP, RegOpc(dst) ); 1.7391 + ins_pipe( pipe_cmovD_reg ); 1.7392 +%} 1.7393 + 1.7394 +// Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 1.7395 +instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 1.7396 + predicate(UseSSE==0); 1.7397 + match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 1.7398 + ins_cost(200); 1.7399 + format %{ "Jn$cop skip\n\t" 1.7400 + "MOV $dst,$src\t# float\n" 1.7401 + "skip:" %} 1.7402 + opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 1.7403 + ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_F(src), OpcP, RegOpc(dst) ); 1.7404 + ins_pipe( pipe_cmovD_reg ); 1.7405 +%} 1.7406 + 1.7407 +// No CMOVE with SSE/SSE2 1.7408 +instruct fcmovX_regS(cmpOp cop, eFlagsReg cr, regX dst, regX src) %{ 1.7409 + predicate (UseSSE>=1); 1.7410 + match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 1.7411 + ins_cost(200); 1.7412 + format %{ "Jn$cop skip\n\t" 1.7413 + "MOVSS $dst,$src\t# float\n" 1.7414 + "skip:" %} 1.7415 + ins_encode %{ 1.7416 + Label skip; 1.7417 + // Invert sense of branch from sense of CMOV 1.7418 + __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 1.7419 + __ movflt($dst$$XMMRegister, $src$$XMMRegister); 1.7420 + __ bind(skip); 1.7421 + %} 1.7422 + ins_pipe( pipe_slow ); 1.7423 +%} 1.7424 + 1.7425 +// No CMOVE with SSE/SSE2 1.7426 +instruct fcmovXD_regS(cmpOp cop, eFlagsReg cr, regXD dst, regXD src) %{ 1.7427 + predicate (UseSSE>=2); 1.7428 + match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 1.7429 + ins_cost(200); 1.7430 + format %{ "Jn$cop skip\n\t" 1.7431 + "MOVSD $dst,$src\t# float\n" 1.7432 + "skip:" %} 1.7433 + ins_encode %{ 1.7434 + Label skip; 1.7435 + // Invert sense of branch from sense of CMOV 1.7436 + __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 1.7437 + __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 1.7438 + __ bind(skip); 1.7439 + %} 1.7440 + ins_pipe( pipe_slow ); 1.7441 +%} 1.7442 + 1.7443 +// unsigned version 1.7444 +instruct fcmovX_regU(cmpOpU cop, eFlagsRegU cr, regX dst, regX src) %{ 1.7445 + predicate (UseSSE>=1); 1.7446 + match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 1.7447 + ins_cost(200); 1.7448 + format %{ "Jn$cop skip\n\t" 1.7449 + "MOVSS $dst,$src\t# float\n" 1.7450 + "skip:" %} 1.7451 + ins_encode %{ 1.7452 + Label skip; 1.7453 + // Invert sense of branch from sense of CMOV 1.7454 + __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 1.7455 + __ movflt($dst$$XMMRegister, $src$$XMMRegister); 1.7456 + __ bind(skip); 1.7457 + %} 1.7458 + ins_pipe( pipe_slow ); 1.7459 +%} 1.7460 + 1.7461 +// unsigned version 1.7462 +instruct fcmovXD_regU(cmpOpU cop, eFlagsRegU cr, regXD dst, regXD src) %{ 1.7463 + predicate (UseSSE>=2); 1.7464 + match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 1.7465 + ins_cost(200); 1.7466 + format %{ "Jn$cop skip\n\t" 1.7467 + "MOVSD $dst,$src\t# float\n" 1.7468 + "skip:" %} 1.7469 + ins_encode %{ 1.7470 + Label skip; 1.7471 + // Invert sense of branch from sense of CMOV 1.7472 + __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 1.7473 + __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 1.7474 + __ bind(skip); 1.7475 + %} 1.7476 + ins_pipe( pipe_slow ); 1.7477 +%} 1.7478 + 1.7479 +instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 1.7480 + predicate(VM_Version::supports_cmov() ); 1.7481 + match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 1.7482 + ins_cost(200); 1.7483 + format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 1.7484 + "CMOV$cop $dst.hi,$src.hi" %} 1.7485 + opcode(0x0F,0x40); 1.7486 + ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 1.7487 + ins_pipe( pipe_cmov_reg_long ); 1.7488 +%} 1.7489 + 1.7490 +instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 1.7491 + predicate(VM_Version::supports_cmov() ); 1.7492 + match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 1.7493 + ins_cost(200); 1.7494 + format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 1.7495 + "CMOV$cop $dst.hi,$src.hi" %} 1.7496 + opcode(0x0F,0x40); 1.7497 + ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 1.7498 + ins_pipe( pipe_cmov_reg_long ); 1.7499 +%} 1.7500 + 1.7501 +//----------Arithmetic Instructions-------------------------------------------- 1.7502 +//----------Addition Instructions---------------------------------------------- 1.7503 +// Integer Addition Instructions 1.7504 +instruct addI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{ 1.7505 + match(Set dst (AddI dst src)); 1.7506 + effect(KILL cr); 1.7507 + 1.7508 + size(2); 1.7509 + format %{ "ADD $dst,$src" %} 1.7510 + opcode(0x03); 1.7511 + ins_encode( OpcP, RegReg( dst, src) ); 1.7512 + ins_pipe( ialu_reg_reg ); 1.7513 +%} 1.7514 + 1.7515 +instruct addI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{ 1.7516 + match(Set dst (AddI dst src)); 1.7517 + effect(KILL cr); 1.7518 + 1.7519 + format %{ "ADD $dst,$src" %} 1.7520 + opcode(0x81, 0x00); /* /0 id */ 1.7521 + ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 1.7522 + ins_pipe( ialu_reg ); 1.7523 +%} 1.7524 + 1.7525 +instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{ 1.7526 + predicate(UseIncDec); 1.7527 + match(Set dst (AddI dst src)); 1.7528 + effect(KILL cr); 1.7529 + 1.7530 + size(1); 1.7531 + format %{ "INC $dst" %} 1.7532 + opcode(0x40); /* */ 1.7533 + ins_encode( Opc_plus( primary, dst ) ); 1.7534 + ins_pipe( ialu_reg ); 1.7535 +%} 1.7536 + 1.7537 +instruct leaI_eReg_immI(eRegI dst, eRegI src0, immI src1) %{ 1.7538 + match(Set dst (AddI src0 src1)); 1.7539 + ins_cost(110); 1.7540 + 1.7541 + format %{ "LEA $dst,[$src0 + $src1]" %} 1.7542 + opcode(0x8D); /* 0x8D /r */ 1.7543 + ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 1.7544 + ins_pipe( ialu_reg_reg ); 1.7545 +%} 1.7546 + 1.7547 +instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 1.7548 + match(Set dst (AddP src0 src1)); 1.7549 + ins_cost(110); 1.7550 + 1.7551 + format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 1.7552 + opcode(0x8D); /* 0x8D /r */ 1.7553 + ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 1.7554 + ins_pipe( ialu_reg_reg ); 1.7555 +%} 1.7556 + 1.7557 +instruct decI_eReg(eRegI dst, immI_M1 src, eFlagsReg cr) %{ 1.7558 + predicate(UseIncDec); 1.7559 + match(Set dst (AddI dst src)); 1.7560 + effect(KILL cr); 1.7561 + 1.7562 + size(1); 1.7563 + format %{ "DEC $dst" %} 1.7564 + opcode(0x48); /* */ 1.7565 + ins_encode( Opc_plus( primary, dst ) ); 1.7566 + ins_pipe( ialu_reg ); 1.7567 +%} 1.7568 + 1.7569 +instruct addP_eReg(eRegP dst, eRegI src, eFlagsReg cr) %{ 1.7570 + match(Set dst (AddP dst src)); 1.7571 + effect(KILL cr); 1.7572 + 1.7573 + size(2); 1.7574 + format %{ "ADD $dst,$src" %} 1.7575 + opcode(0x03); 1.7576 + ins_encode( OpcP, RegReg( dst, src) ); 1.7577 + ins_pipe( ialu_reg_reg ); 1.7578 +%} 1.7579 + 1.7580 +instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 1.7581 + match(Set dst (AddP dst src)); 1.7582 + effect(KILL cr); 1.7583 + 1.7584 + format %{ "ADD $dst,$src" %} 1.7585 + opcode(0x81,0x00); /* Opcode 81 /0 id */ 1.7586 + // ins_encode( RegImm( dst, src) ); 1.7587 + ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 1.7588 + ins_pipe( ialu_reg ); 1.7589 +%} 1.7590 + 1.7591 +instruct addI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{ 1.7592 + match(Set dst (AddI dst (LoadI src))); 1.7593 + effect(KILL cr); 1.7594 + 1.7595 + ins_cost(125); 1.7596 + format %{ "ADD $dst,$src" %} 1.7597 + opcode(0x03); 1.7598 + ins_encode( OpcP, RegMem( dst, src) ); 1.7599 + ins_pipe( ialu_reg_mem ); 1.7600 +%} 1.7601 + 1.7602 +instruct addI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{ 1.7603 + match(Set dst (StoreI dst (AddI (LoadI dst) src))); 1.7604 + effect(KILL cr); 1.7605 + 1.7606 + ins_cost(150); 1.7607 + format %{ "ADD $dst,$src" %} 1.7608 + opcode(0x01); /* Opcode 01 /r */ 1.7609 + ins_encode( OpcP, RegMem( src, dst ) ); 1.7610 + ins_pipe( ialu_mem_reg ); 1.7611 +%} 1.7612 + 1.7613 +// Add Memory with Immediate 1.7614 +instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 1.7615 + match(Set dst (StoreI dst (AddI (LoadI dst) src))); 1.7616 + effect(KILL cr); 1.7617 + 1.7618 + ins_cost(125); 1.7619 + format %{ "ADD $dst,$src" %} 1.7620 + opcode(0x81); /* Opcode 81 /0 id */ 1.7621 + ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 1.7622 + ins_pipe( ialu_mem_imm ); 1.7623 +%} 1.7624 + 1.7625 +instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ 1.7626 + match(Set dst (StoreI dst (AddI (LoadI dst) src))); 1.7627 + effect(KILL cr); 1.7628 + 1.7629 + ins_cost(125); 1.7630 + format %{ "INC $dst" %} 1.7631 + opcode(0xFF); /* Opcode FF /0 */ 1.7632 + ins_encode( OpcP, RMopc_Mem(0x00,dst)); 1.7633 + ins_pipe( ialu_mem_imm ); 1.7634 +%} 1.7635 + 1.7636 +instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 1.7637 + match(Set dst (StoreI dst (AddI (LoadI dst) src))); 1.7638 + effect(KILL cr); 1.7639 + 1.7640 + ins_cost(125); 1.7641 + format %{ "DEC $dst" %} 1.7642 + opcode(0xFF); /* Opcode FF /1 */ 1.7643 + ins_encode( OpcP, RMopc_Mem(0x01,dst)); 1.7644 + ins_pipe( ialu_mem_imm ); 1.7645 +%} 1.7646 + 1.7647 + 1.7648 +instruct checkCastPP( eRegP dst ) %{ 1.7649 + match(Set dst (CheckCastPP dst)); 1.7650 + 1.7651 + size(0); 1.7652 + format %{ "#checkcastPP of $dst" %} 1.7653 + ins_encode( /*empty encoding*/ ); 1.7654 + ins_pipe( empty ); 1.7655 +%} 1.7656 + 1.7657 +instruct castPP( eRegP dst ) %{ 1.7658 + match(Set dst (CastPP dst)); 1.7659 + format %{ "#castPP of $dst" %} 1.7660 + ins_encode( /*empty encoding*/ ); 1.7661 + ins_pipe( empty ); 1.7662 +%} 1.7663 + 1.7664 +instruct castII( eRegI dst ) %{ 1.7665 + match(Set dst (CastII dst)); 1.7666 + format %{ "#castII of $dst" %} 1.7667 + ins_encode( /*empty encoding*/ ); 1.7668 + ins_cost(0); 1.7669 + ins_pipe( empty ); 1.7670 +%} 1.7671 + 1.7672 + 1.7673 +// Load-locked - same as a regular pointer load when used with compare-swap 1.7674 +instruct loadPLocked(eRegP dst, memory mem) %{ 1.7675 + match(Set dst (LoadPLocked mem)); 1.7676 + 1.7677 + ins_cost(125); 1.7678 + format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 1.7679 + opcode(0x8B); 1.7680 + ins_encode( OpcP, RegMem(dst,mem)); 1.7681 + ins_pipe( ialu_reg_mem ); 1.7682 +%} 1.7683 + 1.7684 +// LoadLong-locked - same as a volatile long load when used with compare-swap 1.7685 +instruct loadLLocked(stackSlotL dst, load_long_memory mem) %{ 1.7686 + predicate(UseSSE<=1); 1.7687 + match(Set dst (LoadLLocked mem)); 1.7688 + 1.7689 + ins_cost(200); 1.7690 + format %{ "FILD $mem\t# Atomic volatile long load\n\t" 1.7691 + "FISTp $dst" %} 1.7692 + ins_encode(enc_loadL_volatile(mem,dst)); 1.7693 + ins_pipe( fpu_reg_mem ); 1.7694 +%} 1.7695 + 1.7696 +instruct loadLX_Locked(stackSlotL dst, load_long_memory mem, regXD tmp) %{ 1.7697 + predicate(UseSSE>=2); 1.7698 + match(Set dst (LoadLLocked mem)); 1.7699 + effect(TEMP tmp); 1.7700 + ins_cost(180); 1.7701 + format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 1.7702 + "MOVSD $dst,$tmp" %} 1.7703 + ins_encode(enc_loadLX_volatile(mem, dst, tmp)); 1.7704 + ins_pipe( pipe_slow ); 1.7705 +%} 1.7706 + 1.7707 +instruct loadLX_reg_Locked(eRegL dst, load_long_memory mem, regXD tmp) %{ 1.7708 + predicate(UseSSE>=2); 1.7709 + match(Set dst (LoadLLocked mem)); 1.7710 + effect(TEMP tmp); 1.7711 + ins_cost(160); 1.7712 + format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 1.7713 + "MOVD $dst.lo,$tmp\n\t" 1.7714 + "PSRLQ $tmp,32\n\t" 1.7715 + "MOVD $dst.hi,$tmp" %} 1.7716 + ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp)); 1.7717 + ins_pipe( pipe_slow ); 1.7718 +%} 1.7719 + 1.7720 +// Conditional-store of the updated heap-top. 1.7721 +// Used during allocation of the shared heap. 1.7722 +// Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 1.7723 +instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 1.7724 + match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 1.7725 + // EAX is killed if there is contention, but then it's also unused. 1.7726 + // In the common case of no contention, EAX holds the new oop address. 1.7727 + format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 1.7728 + ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 1.7729 + ins_pipe( pipe_cmpxchg ); 1.7730 +%} 1.7731 + 1.7732 +// Conditional-store of a long value 1.7733 +// Returns a boolean value (0/1) on success. Implemented with a CMPXCHG8 on Intel. 1.7734 +// mem_ptr can actually be in either ESI or EDI 1.7735 +instruct storeLConditional( eRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 1.7736 + match(Set res (StoreLConditional mem_ptr (Binary oldval newval))); 1.7737 + effect(KILL cr); 1.7738 + // EDX:EAX is killed if there is contention, but then it's also unused. 1.7739 + // In the common case of no contention, EDX:EAX holds the new oop address. 1.7740 + format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 1.7741 + "MOV $res,0\n\t" 1.7742 + "JNE,s fail\n\t" 1.7743 + "MOV $res,1\n" 1.7744 + "fail:" %} 1.7745 + ins_encode( enc_cmpxchg8(mem_ptr), 1.7746 + enc_flags_ne_to_boolean(res) ); 1.7747 + ins_pipe( pipe_cmpxchg ); 1.7748 +%} 1.7749 + 1.7750 +// Conditional-store of a long value 1.7751 +// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 1.7752 +// mem_ptr can actually be in either ESI or EDI 1.7753 +instruct storeLConditional_flags( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr, immI0 zero ) %{ 1.7754 + match(Set cr (CmpI (StoreLConditional mem_ptr (Binary oldval newval)) zero)); 1.7755 + // EDX:EAX is killed if there is contention, but then it's also unused. 1.7756 + // In the common case of no contention, EDX:EAX holds the new oop address. 1.7757 + format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 1.7758 + ins_encode( enc_cmpxchg8(mem_ptr) ); 1.7759 + ins_pipe( pipe_cmpxchg ); 1.7760 +%} 1.7761 + 1.7762 +// No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 1.7763 + 1.7764 +instruct compareAndSwapL( eRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 1.7765 + match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 1.7766 + effect(KILL cr, KILL oldval); 1.7767 + format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 1.7768 + "MOV $res,0\n\t" 1.7769 + "JNE,s fail\n\t" 1.7770 + "MOV $res,1\n" 1.7771 + "fail:" %} 1.7772 + ins_encode( enc_cmpxchg8(mem_ptr), 1.7773 + enc_flags_ne_to_boolean(res) ); 1.7774 + ins_pipe( pipe_cmpxchg ); 1.7775 +%} 1.7776 + 1.7777 +instruct compareAndSwapP( eRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 1.7778 + match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 1.7779 + effect(KILL cr, KILL oldval); 1.7780 + format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 1.7781 + "MOV $res,0\n\t" 1.7782 + "JNE,s fail\n\t" 1.7783 + "MOV $res,1\n" 1.7784 + "fail:" %} 1.7785 + ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 1.7786 + ins_pipe( pipe_cmpxchg ); 1.7787 +%} 1.7788 + 1.7789 +instruct compareAndSwapI( eRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 1.7790 + match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 1.7791 + effect(KILL cr, KILL oldval); 1.7792 + format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 1.7793 + "MOV $res,0\n\t" 1.7794 + "JNE,s fail\n\t" 1.7795 + "MOV $res,1\n" 1.7796 + "fail:" %} 1.7797 + ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 1.7798 + ins_pipe( pipe_cmpxchg ); 1.7799 +%} 1.7800 + 1.7801 +//----------Subtraction Instructions------------------------------------------- 1.7802 +// Integer Subtraction Instructions 1.7803 +instruct subI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{ 1.7804 + match(Set dst (SubI dst src)); 1.7805 + effect(KILL cr); 1.7806 + 1.7807 + size(2); 1.7808 + format %{ "SUB $dst,$src" %} 1.7809 + opcode(0x2B); 1.7810 + ins_encode( OpcP, RegReg( dst, src) ); 1.7811 + ins_pipe( ialu_reg_reg ); 1.7812 +%} 1.7813 + 1.7814 +instruct subI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{ 1.7815 + match(Set dst (SubI dst src)); 1.7816 + effect(KILL cr); 1.7817 + 1.7818 + format %{ "SUB $dst,$src" %} 1.7819 + opcode(0x81,0x05); /* Opcode 81 /5 */ 1.7820 + // ins_encode( RegImm( dst, src) ); 1.7821 + ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 1.7822 + ins_pipe( ialu_reg ); 1.7823 +%} 1.7824 + 1.7825 +instruct subI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{ 1.7826 + match(Set dst (SubI dst (LoadI src))); 1.7827 + effect(KILL cr); 1.7828 + 1.7829 + ins_cost(125); 1.7830 + format %{ "SUB $dst,$src" %} 1.7831 + opcode(0x2B); 1.7832 + ins_encode( OpcP, RegMem( dst, src) ); 1.7833 + ins_pipe( ialu_reg_mem ); 1.7834 +%} 1.7835 + 1.7836 +instruct subI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{ 1.7837 + match(Set dst (StoreI dst (SubI (LoadI dst) src))); 1.7838 + effect(KILL cr); 1.7839 + 1.7840 + ins_cost(150); 1.7841 + format %{ "SUB $dst,$src" %} 1.7842 + opcode(0x29); /* Opcode 29 /r */ 1.7843 + ins_encode( OpcP, RegMem( src, dst ) ); 1.7844 + ins_pipe( ialu_mem_reg ); 1.7845 +%} 1.7846 + 1.7847 +// Subtract from a pointer 1.7848 +instruct subP_eReg(eRegP dst, eRegI src, immI0 zero, eFlagsReg cr) %{ 1.7849 + match(Set dst (AddP dst (SubI zero src))); 1.7850 + effect(KILL cr); 1.7851 + 1.7852 + size(2); 1.7853 + format %{ "SUB $dst,$src" %} 1.7854 + opcode(0x2B); 1.7855 + ins_encode( OpcP, RegReg( dst, src) ); 1.7856 + ins_pipe( ialu_reg_reg ); 1.7857 +%} 1.7858 + 1.7859 +instruct negI_eReg(eRegI dst, immI0 zero, eFlagsReg cr) %{ 1.7860 + match(Set dst (SubI zero dst)); 1.7861 + effect(KILL cr); 1.7862 + 1.7863 + size(2); 1.7864 + format %{ "NEG $dst" %} 1.7865 + opcode(0xF7,0x03); // Opcode F7 /3 1.7866 + ins_encode( OpcP, RegOpc( dst ) ); 1.7867 + ins_pipe( ialu_reg ); 1.7868 +%} 1.7869 + 1.7870 + 1.7871 +//----------Multiplication/Division Instructions------------------------------- 1.7872 +// Integer Multiplication Instructions 1.7873 +// Multiply Register 1.7874 +instruct mulI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{ 1.7875 + match(Set dst (MulI dst src)); 1.7876 + effect(KILL cr); 1.7877 + 1.7878 + size(3); 1.7879 + ins_cost(300); 1.7880 + format %{ "IMUL $dst,$src" %} 1.7881 + opcode(0xAF, 0x0F); 1.7882 + ins_encode( OpcS, OpcP, RegReg( dst, src) ); 1.7883 + ins_pipe( ialu_reg_reg_alu0 ); 1.7884 +%} 1.7885 + 1.7886 +// Multiply 32-bit Immediate 1.7887 +instruct mulI_eReg_imm(eRegI dst, eRegI src, immI imm, eFlagsReg cr) %{ 1.7888 + match(Set dst (MulI src imm)); 1.7889 + effect(KILL cr); 1.7890 + 1.7891 + ins_cost(300); 1.7892 + format %{ "IMUL $dst,$src,$imm" %} 1.7893 + opcode(0x69); /* 69 /r id */ 1.7894 + ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 1.7895 + ins_pipe( ialu_reg_reg_alu0 ); 1.7896 +%} 1.7897 + 1.7898 +instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 1.7899 + match(Set dst src); 1.7900 + effect(KILL cr); 1.7901 + 1.7902 + // Note that this is artificially increased to make it more expensive than loadConL 1.7903 + ins_cost(250); 1.7904 + format %{ "MOV EAX,$src\t// low word only" %} 1.7905 + opcode(0xB8); 1.7906 + ins_encode( LdImmL_Lo(dst, src) ); 1.7907 + ins_pipe( ialu_reg_fat ); 1.7908 +%} 1.7909 + 1.7910 +// Multiply by 32-bit Immediate, taking the shifted high order results 1.7911 +// (special case for shift by 32) 1.7912 +instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 1.7913 + match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 1.7914 + predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 1.7915 + _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 1.7916 + _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 1.7917 + effect(USE src1, KILL cr); 1.7918 + 1.7919 + // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 1.7920 + ins_cost(0*100 + 1*400 - 150); 1.7921 + format %{ "IMUL EDX:EAX,$src1" %} 1.7922 + ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 1.7923 + ins_pipe( pipe_slow ); 1.7924 +%} 1.7925 + 1.7926 +// Multiply by 32-bit Immediate, taking the shifted high order results 1.7927 +instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 1.7928 + match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 1.7929 + predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 1.7930 + _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 1.7931 + _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 1.7932 + effect(USE src1, KILL cr); 1.7933 + 1.7934 + // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 1.7935 + ins_cost(1*100 + 1*400 - 150); 1.7936 + format %{ "IMUL EDX:EAX,$src1\n\t" 1.7937 + "SAR EDX,$cnt-32" %} 1.7938 + ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 1.7939 + ins_pipe( pipe_slow ); 1.7940 +%} 1.7941 + 1.7942 +// Multiply Memory 32-bit Immediate 1.7943 +instruct mulI_mem_imm(eRegI dst, memory src, immI imm, eFlagsReg cr) %{ 1.7944 + match(Set dst (MulI (LoadI src) imm)); 1.7945 + effect(KILL cr); 1.7946 + 1.7947 + ins_cost(300); 1.7948 + format %{ "IMUL $dst,$src,$imm" %} 1.7949 + opcode(0x69); /* 69 /r id */ 1.7950 + ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 1.7951 + ins_pipe( ialu_reg_mem_alu0 ); 1.7952 +%} 1.7953 + 1.7954 +// Multiply Memory 1.7955 +instruct mulI(eRegI dst, memory src, eFlagsReg cr) %{ 1.7956 + match(Set dst (MulI dst (LoadI src))); 1.7957 + effect(KILL cr); 1.7958 + 1.7959 + ins_cost(350); 1.7960 + format %{ "IMUL $dst,$src" %} 1.7961 + opcode(0xAF, 0x0F); 1.7962 + ins_encode( OpcS, OpcP, RegMem( dst, src) ); 1.7963 + ins_pipe( ialu_reg_mem_alu0 ); 1.7964 +%} 1.7965 + 1.7966 +// Multiply Register Int to Long 1.7967 +instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 1.7968 + // Basic Idea: long = (long)int * (long)int 1.7969 + match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 1.7970 + effect(DEF dst, USE src, USE src1, KILL flags); 1.7971 + 1.7972 + ins_cost(300); 1.7973 + format %{ "IMUL $dst,$src1" %} 1.7974 + 1.7975 + ins_encode( long_int_multiply( dst, src1 ) ); 1.7976 + ins_pipe( ialu_reg_reg_alu0 ); 1.7977 +%} 1.7978 + 1.7979 +instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 1.7980 + // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 1.7981 + match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 1.7982 + effect(KILL flags); 1.7983 + 1.7984 + ins_cost(300); 1.7985 + format %{ "MUL $dst,$src1" %} 1.7986 + 1.7987 + ins_encode( long_uint_multiply(dst, src1) ); 1.7988 + ins_pipe( ialu_reg_reg_alu0 ); 1.7989 +%} 1.7990 + 1.7991 +// Multiply Register Long 1.7992 +instruct mulL_eReg(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{ 1.7993 + match(Set dst (MulL dst src)); 1.7994 + effect(KILL cr, TEMP tmp); 1.7995 + ins_cost(4*100+3*400); 1.7996 +// Basic idea: lo(result) = lo(x_lo * y_lo) 1.7997 +// hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 1.7998 + format %{ "MOV $tmp,$src.lo\n\t" 1.7999 + "IMUL $tmp,EDX\n\t" 1.8000 + "MOV EDX,$src.hi\n\t" 1.8001 + "IMUL EDX,EAX\n\t" 1.8002 + "ADD $tmp,EDX\n\t" 1.8003 + "MUL EDX:EAX,$src.lo\n\t" 1.8004 + "ADD EDX,$tmp" %} 1.8005 + ins_encode( long_multiply( dst, src, tmp ) ); 1.8006 + ins_pipe( pipe_slow ); 1.8007 +%} 1.8008 + 1.8009 +// Multiply Register Long by small constant 1.8010 +instruct mulL_eReg_con(eADXRegL dst, immL_127 src, eRegI tmp, eFlagsReg cr) %{ 1.8011 + match(Set dst (MulL dst src)); 1.8012 + effect(KILL cr, TEMP tmp); 1.8013 + ins_cost(2*100+2*400); 1.8014 + size(12); 1.8015 +// Basic idea: lo(result) = lo(src * EAX) 1.8016 +// hi(result) = hi(src * EAX) + lo(src * EDX) 1.8017 + format %{ "IMUL $tmp,EDX,$src\n\t" 1.8018 + "MOV EDX,$src\n\t" 1.8019 + "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 1.8020 + "ADD EDX,$tmp" %} 1.8021 + ins_encode( long_multiply_con( dst, src, tmp ) ); 1.8022 + ins_pipe( pipe_slow ); 1.8023 +%} 1.8024 + 1.8025 +// Integer DIV with Register 1.8026 +instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 1.8027 + match(Set rax (DivI rax div)); 1.8028 + effect(KILL rdx, KILL cr); 1.8029 + size(26); 1.8030 + ins_cost(30*100+10*100); 1.8031 + format %{ "CMP EAX,0x80000000\n\t" 1.8032 + "JNE,s normal\n\t" 1.8033 + "XOR EDX,EDX\n\t" 1.8034 + "CMP ECX,-1\n\t" 1.8035 + "JE,s done\n" 1.8036 + "normal: CDQ\n\t" 1.8037 + "IDIV $div\n\t" 1.8038 + "done:" %} 1.8039 + opcode(0xF7, 0x7); /* Opcode F7 /7 */ 1.8040 + ins_encode( cdq_enc, OpcP, RegOpc(div) ); 1.8041 + ins_pipe( ialu_reg_reg_alu0 ); 1.8042 +%} 1.8043 + 1.8044 +// Divide Register Long 1.8045 +instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 1.8046 + match(Set dst (DivL src1 src2)); 1.8047 + effect( KILL cr, KILL cx, KILL bx ); 1.8048 + ins_cost(10000); 1.8049 + format %{ "PUSH $src1.hi\n\t" 1.8050 + "PUSH $src1.lo\n\t" 1.8051 + "PUSH $src2.hi\n\t" 1.8052 + "PUSH $src2.lo\n\t" 1.8053 + "CALL SharedRuntime::ldiv\n\t" 1.8054 + "ADD ESP,16" %} 1.8055 + ins_encode( long_div(src1,src2) ); 1.8056 + ins_pipe( pipe_slow ); 1.8057 +%} 1.8058 + 1.8059 +// Integer DIVMOD with Register, both quotient and mod results 1.8060 +instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 1.8061 + match(DivModI rax div); 1.8062 + effect(KILL cr); 1.8063 + size(26); 1.8064 + ins_cost(30*100+10*100); 1.8065 + format %{ "CMP EAX,0x80000000\n\t" 1.8066 + "JNE,s normal\n\t" 1.8067 + "XOR EDX,EDX\n\t" 1.8068 + "CMP ECX,-1\n\t" 1.8069 + "JE,s done\n" 1.8070 + "normal: CDQ\n\t" 1.8071 + "IDIV $div\n\t" 1.8072 + "done:" %} 1.8073 + opcode(0xF7, 0x7); /* Opcode F7 /7 */ 1.8074 + ins_encode( cdq_enc, OpcP, RegOpc(div) ); 1.8075 + ins_pipe( pipe_slow ); 1.8076 +%} 1.8077 + 1.8078 +// Integer MOD with Register 1.8079 +instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 1.8080 + match(Set rdx (ModI rax div)); 1.8081 + effect(KILL rax, KILL cr); 1.8082 + 1.8083 + size(26); 1.8084 + ins_cost(300); 1.8085 + format %{ "CDQ\n\t" 1.8086 + "IDIV $div" %} 1.8087 + opcode(0xF7, 0x7); /* Opcode F7 /7 */ 1.8088 + ins_encode( cdq_enc, OpcP, RegOpc(div) ); 1.8089 + ins_pipe( ialu_reg_reg_alu0 ); 1.8090 +%} 1.8091 + 1.8092 +// Remainder Register Long 1.8093 +instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 1.8094 + match(Set dst (ModL src1 src2)); 1.8095 + effect( KILL cr, KILL cx, KILL bx ); 1.8096 + ins_cost(10000); 1.8097 + format %{ "PUSH $src1.hi\n\t" 1.8098 + "PUSH $src1.lo\n\t" 1.8099 + "PUSH $src2.hi\n\t" 1.8100 + "PUSH $src2.lo\n\t" 1.8101 + "CALL SharedRuntime::lrem\n\t" 1.8102 + "ADD ESP,16" %} 1.8103 + ins_encode( long_mod(src1,src2) ); 1.8104 + ins_pipe( pipe_slow ); 1.8105 +%} 1.8106 + 1.8107 +// Integer Shift Instructions 1.8108 +// Shift Left by one 1.8109 +instruct shlI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{ 1.8110 + match(Set dst (LShiftI dst shift)); 1.8111 + effect(KILL cr); 1.8112 + 1.8113 + size(2); 1.8114 + format %{ "SHL $dst,$shift" %} 1.8115 + opcode(0xD1, 0x4); /* D1 /4 */ 1.8116 + ins_encode( OpcP, RegOpc( dst ) ); 1.8117 + ins_pipe( ialu_reg ); 1.8118 +%} 1.8119 + 1.8120 +// Shift Left by 8-bit immediate 1.8121 +instruct salI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{ 1.8122 + match(Set dst (LShiftI dst shift)); 1.8123 + effect(KILL cr); 1.8124 + 1.8125 + size(3); 1.8126 + format %{ "SHL $dst,$shift" %} 1.8127 + opcode(0xC1, 0x4); /* C1 /4 ib */ 1.8128 + ins_encode( RegOpcImm( dst, shift) ); 1.8129 + ins_pipe( ialu_reg ); 1.8130 +%} 1.8131 + 1.8132 +// Shift Left by variable 1.8133 +instruct salI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{ 1.8134 + match(Set dst (LShiftI dst shift)); 1.8135 + effect(KILL cr); 1.8136 + 1.8137 + size(2); 1.8138 + format %{ "SHL $dst,$shift" %} 1.8139 + opcode(0xD3, 0x4); /* D3 /4 */ 1.8140 + ins_encode( OpcP, RegOpc( dst ) ); 1.8141 + ins_pipe( ialu_reg_reg ); 1.8142 +%} 1.8143 + 1.8144 +// Arithmetic shift right by one 1.8145 +instruct sarI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{ 1.8146 + match(Set dst (RShiftI dst shift)); 1.8147 + effect(KILL cr); 1.8148 + 1.8149 + size(2); 1.8150 + format %{ "SAR $dst,$shift" %} 1.8151 + opcode(0xD1, 0x7); /* D1 /7 */ 1.8152 + ins_encode( OpcP, RegOpc( dst ) ); 1.8153 + ins_pipe( ialu_reg ); 1.8154 +%} 1.8155 + 1.8156 +// Arithmetic shift right by one 1.8157 +instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ 1.8158 + match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 1.8159 + effect(KILL cr); 1.8160 + format %{ "SAR $dst,$shift" %} 1.8161 + opcode(0xD1, 0x7); /* D1 /7 */ 1.8162 + ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 1.8163 + ins_pipe( ialu_mem_imm ); 1.8164 +%} 1.8165 + 1.8166 +// Arithmetic Shift Right by 8-bit immediate 1.8167 +instruct sarI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{ 1.8168 + match(Set dst (RShiftI dst shift)); 1.8169 + effect(KILL cr); 1.8170 + 1.8171 + size(3); 1.8172 + format %{ "SAR $dst,$shift" %} 1.8173 + opcode(0xC1, 0x7); /* C1 /7 ib */ 1.8174 + ins_encode( RegOpcImm( dst, shift ) ); 1.8175 + ins_pipe( ialu_mem_imm ); 1.8176 +%} 1.8177 + 1.8178 +// Arithmetic Shift Right by 8-bit immediate 1.8179 +instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 1.8180 + match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 1.8181 + effect(KILL cr); 1.8182 + 1.8183 + format %{ "SAR $dst,$shift" %} 1.8184 + opcode(0xC1, 0x7); /* C1 /7 ib */ 1.8185 + ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 1.8186 + ins_pipe( ialu_mem_imm ); 1.8187 +%} 1.8188 + 1.8189 +// Arithmetic Shift Right by variable 1.8190 +instruct sarI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{ 1.8191 + match(Set dst (RShiftI dst shift)); 1.8192 + effect(KILL cr); 1.8193 + 1.8194 + size(2); 1.8195 + format %{ "SAR $dst,$shift" %} 1.8196 + opcode(0xD3, 0x7); /* D3 /7 */ 1.8197 + ins_encode( OpcP, RegOpc( dst ) ); 1.8198 + ins_pipe( ialu_reg_reg ); 1.8199 +%} 1.8200 + 1.8201 +// Logical shift right by one 1.8202 +instruct shrI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{ 1.8203 + match(Set dst (URShiftI dst shift)); 1.8204 + effect(KILL cr); 1.8205 + 1.8206 + size(2); 1.8207 + format %{ "SHR $dst,$shift" %} 1.8208 + opcode(0xD1, 0x5); /* D1 /5 */ 1.8209 + ins_encode( OpcP, RegOpc( dst ) ); 1.8210 + ins_pipe( ialu_reg ); 1.8211 +%} 1.8212 + 1.8213 +// Logical Shift Right by 8-bit immediate 1.8214 +instruct shrI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{ 1.8215 + match(Set dst (URShiftI dst shift)); 1.8216 + effect(KILL cr); 1.8217 + 1.8218 + size(3); 1.8219 + format %{ "SHR $dst,$shift" %} 1.8220 + opcode(0xC1, 0x5); /* C1 /5 ib */ 1.8221 + ins_encode( RegOpcImm( dst, shift) ); 1.8222 + ins_pipe( ialu_reg ); 1.8223 +%} 1.8224 + 1.8225 +// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 1.8226 +// This idiom is used by the compiler for the i2b bytecode. 1.8227 +instruct i2b(eRegI dst, xRegI src, immI_24 twentyfour, eFlagsReg cr) %{ 1.8228 + match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 1.8229 + effect(KILL cr); 1.8230 + 1.8231 + size(3); 1.8232 + format %{ "MOVSX $dst,$src :8" %} 1.8233 + opcode(0xBE, 0x0F); 1.8234 + ins_encode( OpcS, OpcP, RegReg( dst, src)); 1.8235 + ins_pipe( ialu_reg_reg ); 1.8236 +%} 1.8237 + 1.8238 +// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 1.8239 +// This idiom is used by the compiler the i2s bytecode. 1.8240 +instruct i2s(eRegI dst, xRegI src, immI_16 sixteen, eFlagsReg cr) %{ 1.8241 + match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 1.8242 + effect(KILL cr); 1.8243 + 1.8244 + size(3); 1.8245 + format %{ "MOVSX $dst,$src :16" %} 1.8246 + opcode(0xBF, 0x0F); 1.8247 + ins_encode( OpcS, OpcP, RegReg( dst, src)); 1.8248 + ins_pipe( ialu_reg_reg ); 1.8249 +%} 1.8250 + 1.8251 + 1.8252 +// Logical Shift Right by variable 1.8253 +instruct shrI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{ 1.8254 + match(Set dst (URShiftI dst shift)); 1.8255 + effect(KILL cr); 1.8256 + 1.8257 + size(2); 1.8258 + format %{ "SHR $dst,$shift" %} 1.8259 + opcode(0xD3, 0x5); /* D3 /5 */ 1.8260 + ins_encode( OpcP, RegOpc( dst ) ); 1.8261 + ins_pipe( ialu_reg_reg ); 1.8262 +%} 1.8263 + 1.8264 + 1.8265 +//----------Logical Instructions----------------------------------------------- 1.8266 +//----------Integer Logical Instructions--------------------------------------- 1.8267 +// And Instructions 1.8268 +// And Register with Register 1.8269 +instruct andI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{ 1.8270 + match(Set dst (AndI dst src)); 1.8271 + effect(KILL cr); 1.8272 + 1.8273 + size(2); 1.8274 + format %{ "AND $dst,$src" %} 1.8275 + opcode(0x23); 1.8276 + ins_encode( OpcP, RegReg( dst, src) ); 1.8277 + ins_pipe( ialu_reg_reg ); 1.8278 +%} 1.8279 + 1.8280 +// And Register with Immediate 1.8281 +instruct andI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{ 1.8282 + match(Set dst (AndI dst src)); 1.8283 + effect(KILL cr); 1.8284 + 1.8285 + format %{ "AND $dst,$src" %} 1.8286 + opcode(0x81,0x04); /* Opcode 81 /4 */ 1.8287 + // ins_encode( RegImm( dst, src) ); 1.8288 + ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 1.8289 + ins_pipe( ialu_reg ); 1.8290 +%} 1.8291 + 1.8292 +// And Register with Memory 1.8293 +instruct andI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{ 1.8294 + match(Set dst (AndI dst (LoadI src))); 1.8295 + effect(KILL cr); 1.8296 + 1.8297 + ins_cost(125); 1.8298 + format %{ "AND $dst,$src" %} 1.8299 + opcode(0x23); 1.8300 + ins_encode( OpcP, RegMem( dst, src) ); 1.8301 + ins_pipe( ialu_reg_mem ); 1.8302 +%} 1.8303 + 1.8304 +// And Memory with Register 1.8305 +instruct andI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{ 1.8306 + match(Set dst (StoreI dst (AndI (LoadI dst) src))); 1.8307 + effect(KILL cr); 1.8308 + 1.8309 + ins_cost(150); 1.8310 + format %{ "AND $dst,$src" %} 1.8311 + opcode(0x21); /* Opcode 21 /r */ 1.8312 + ins_encode( OpcP, RegMem( src, dst ) ); 1.8313 + ins_pipe( ialu_mem_reg ); 1.8314 +%} 1.8315 + 1.8316 +// And Memory with Immediate 1.8317 +instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 1.8318 + match(Set dst (StoreI dst (AndI (LoadI dst) src))); 1.8319 + effect(KILL cr); 1.8320 + 1.8321 + ins_cost(125); 1.8322 + format %{ "AND $dst,$src" %} 1.8323 + opcode(0x81, 0x4); /* Opcode 81 /4 id */ 1.8324 + // ins_encode( MemImm( dst, src) ); 1.8325 + ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 1.8326 + ins_pipe( ialu_mem_imm ); 1.8327 +%} 1.8328 + 1.8329 +// Or Instructions 1.8330 +// Or Register with Register 1.8331 +instruct orI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{ 1.8332 + match(Set dst (OrI dst src)); 1.8333 + effect(KILL cr); 1.8334 + 1.8335 + size(2); 1.8336 + format %{ "OR $dst,$src" %} 1.8337 + opcode(0x0B); 1.8338 + ins_encode( OpcP, RegReg( dst, src) ); 1.8339 + ins_pipe( ialu_reg_reg ); 1.8340 +%} 1.8341 + 1.8342 +// Or Register with Immediate 1.8343 +instruct orI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{ 1.8344 + match(Set dst (OrI dst src)); 1.8345 + effect(KILL cr); 1.8346 + 1.8347 + format %{ "OR $dst,$src" %} 1.8348 + opcode(0x81,0x01); /* Opcode 81 /1 id */ 1.8349 + // ins_encode( RegImm( dst, src) ); 1.8350 + ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 1.8351 + ins_pipe( ialu_reg ); 1.8352 +%} 1.8353 + 1.8354 +// Or Register with Memory 1.8355 +instruct orI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{ 1.8356 + match(Set dst (OrI dst (LoadI src))); 1.8357 + effect(KILL cr); 1.8358 + 1.8359 + ins_cost(125); 1.8360 + format %{ "OR $dst,$src" %} 1.8361 + opcode(0x0B); 1.8362 + ins_encode( OpcP, RegMem( dst, src) ); 1.8363 + ins_pipe( ialu_reg_mem ); 1.8364 +%} 1.8365 + 1.8366 +// Or Memory with Register 1.8367 +instruct orI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{ 1.8368 + match(Set dst (StoreI dst (OrI (LoadI dst) src))); 1.8369 + effect(KILL cr); 1.8370 + 1.8371 + ins_cost(150); 1.8372 + format %{ "OR $dst,$src" %} 1.8373 + opcode(0x09); /* Opcode 09 /r */ 1.8374 + ins_encode( OpcP, RegMem( src, dst ) ); 1.8375 + ins_pipe( ialu_mem_reg ); 1.8376 +%} 1.8377 + 1.8378 +// Or Memory with Immediate 1.8379 +instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 1.8380 + match(Set dst (StoreI dst (OrI (LoadI dst) src))); 1.8381 + effect(KILL cr); 1.8382 + 1.8383 + ins_cost(125); 1.8384 + format %{ "OR $dst,$src" %} 1.8385 + opcode(0x81,0x1); /* Opcode 81 /1 id */ 1.8386 + // ins_encode( MemImm( dst, src) ); 1.8387 + ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 1.8388 + ins_pipe( ialu_mem_imm ); 1.8389 +%} 1.8390 + 1.8391 +// ROL/ROR 1.8392 +// ROL expand 1.8393 +instruct rolI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{ 1.8394 + effect(USE_DEF dst, USE shift, KILL cr); 1.8395 + 1.8396 + format %{ "ROL $dst, $shift" %} 1.8397 + opcode(0xD1, 0x0); /* Opcode D1 /0 */ 1.8398 + ins_encode( OpcP, RegOpc( dst )); 1.8399 + ins_pipe( ialu_reg ); 1.8400 +%} 1.8401 + 1.8402 +instruct rolI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{ 1.8403 + effect(USE_DEF dst, USE shift, KILL cr); 1.8404 + 1.8405 + format %{ "ROL $dst, $shift" %} 1.8406 + opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 1.8407 + ins_encode( RegOpcImm(dst, shift) ); 1.8408 + ins_pipe(ialu_reg); 1.8409 +%} 1.8410 + 1.8411 +instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 1.8412 + effect(USE_DEF dst, USE shift, KILL cr); 1.8413 + 1.8414 + format %{ "ROL $dst, $shift" %} 1.8415 + opcode(0xD3, 0x0); /* Opcode D3 /0 */ 1.8416 + ins_encode(OpcP, RegOpc(dst)); 1.8417 + ins_pipe( ialu_reg_reg ); 1.8418 +%} 1.8419 +// end of ROL expand 1.8420 + 1.8421 +// ROL 32bit by one once 1.8422 +instruct rolI_eReg_i1(eRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 1.8423 + match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 1.8424 + 1.8425 + expand %{ 1.8426 + rolI_eReg_imm1(dst, lshift, cr); 1.8427 + %} 1.8428 +%} 1.8429 + 1.8430 +// ROL 32bit var by imm8 once 1.8431 +instruct rolI_eReg_i8(eRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 1.8432 + predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 1.8433 + match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 1.8434 + 1.8435 + expand %{ 1.8436 + rolI_eReg_imm8(dst, lshift, cr); 1.8437 + %} 1.8438 +%} 1.8439 + 1.8440 +// ROL 32bit var by var once 1.8441 +instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 1.8442 + match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 1.8443 + 1.8444 + expand %{ 1.8445 + rolI_eReg_CL(dst, shift, cr); 1.8446 + %} 1.8447 +%} 1.8448 + 1.8449 +// ROL 32bit var by var once 1.8450 +instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 1.8451 + match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 1.8452 + 1.8453 + expand %{ 1.8454 + rolI_eReg_CL(dst, shift, cr); 1.8455 + %} 1.8456 +%} 1.8457 + 1.8458 +// ROR expand 1.8459 +instruct rorI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{ 1.8460 + effect(USE_DEF dst, USE shift, KILL cr); 1.8461 + 1.8462 + format %{ "ROR $dst, $shift" %} 1.8463 + opcode(0xD1,0x1); /* Opcode D1 /1 */ 1.8464 + ins_encode( OpcP, RegOpc( dst ) ); 1.8465 + ins_pipe( ialu_reg ); 1.8466 +%} 1.8467 + 1.8468 +instruct rorI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{ 1.8469 + effect (USE_DEF dst, USE shift, KILL cr); 1.8470 + 1.8471 + format %{ "ROR $dst, $shift" %} 1.8472 + opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 1.8473 + ins_encode( RegOpcImm(dst, shift) ); 1.8474 + ins_pipe( ialu_reg ); 1.8475 +%} 1.8476 + 1.8477 +instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 1.8478 + effect(USE_DEF dst, USE shift, KILL cr); 1.8479 + 1.8480 + format %{ "ROR $dst, $shift" %} 1.8481 + opcode(0xD3, 0x1); /* Opcode D3 /1 */ 1.8482 + ins_encode(OpcP, RegOpc(dst)); 1.8483 + ins_pipe( ialu_reg_reg ); 1.8484 +%} 1.8485 +// end of ROR expand 1.8486 + 1.8487 +// ROR right once 1.8488 +instruct rorI_eReg_i1(eRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 1.8489 + match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 1.8490 + 1.8491 + expand %{ 1.8492 + rorI_eReg_imm1(dst, rshift, cr); 1.8493 + %} 1.8494 +%} 1.8495 + 1.8496 +// ROR 32bit by immI8 once 1.8497 +instruct rorI_eReg_i8(eRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 1.8498 + predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 1.8499 + match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 1.8500 + 1.8501 + expand %{ 1.8502 + rorI_eReg_imm8(dst, rshift, cr); 1.8503 + %} 1.8504 +%} 1.8505 + 1.8506 +// ROR 32bit var by var once 1.8507 +instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 1.8508 + match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 1.8509 + 1.8510 + expand %{ 1.8511 + rorI_eReg_CL(dst, shift, cr); 1.8512 + %} 1.8513 +%} 1.8514 + 1.8515 +// ROR 32bit var by var once 1.8516 +instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 1.8517 + match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 1.8518 + 1.8519 + expand %{ 1.8520 + rorI_eReg_CL(dst, shift, cr); 1.8521 + %} 1.8522 +%} 1.8523 + 1.8524 +// Xor Instructions 1.8525 +// Xor Register with Register 1.8526 +instruct xorI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{ 1.8527 + match(Set dst (XorI dst src)); 1.8528 + effect(KILL cr); 1.8529 + 1.8530 + size(2); 1.8531 + format %{ "XOR $dst,$src" %} 1.8532 + opcode(0x33); 1.8533 + ins_encode( OpcP, RegReg( dst, src) ); 1.8534 + ins_pipe( ialu_reg_reg ); 1.8535 +%} 1.8536 + 1.8537 +// Xor Register with Immediate 1.8538 +instruct xorI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{ 1.8539 + match(Set dst (XorI dst src)); 1.8540 + effect(KILL cr); 1.8541 + 1.8542 + format %{ "XOR $dst,$src" %} 1.8543 + opcode(0x81,0x06); /* Opcode 81 /6 id */ 1.8544 + // ins_encode( RegImm( dst, src) ); 1.8545 + ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 1.8546 + ins_pipe( ialu_reg ); 1.8547 +%} 1.8548 + 1.8549 +// Xor Register with Memory 1.8550 +instruct xorI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{ 1.8551 + match(Set dst (XorI dst (LoadI src))); 1.8552 + effect(KILL cr); 1.8553 + 1.8554 + ins_cost(125); 1.8555 + format %{ "XOR $dst,$src" %} 1.8556 + opcode(0x33); 1.8557 + ins_encode( OpcP, RegMem(dst, src) ); 1.8558 + ins_pipe( ialu_reg_mem ); 1.8559 +%} 1.8560 + 1.8561 +// Xor Memory with Register 1.8562 +instruct xorI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{ 1.8563 + match(Set dst (StoreI dst (XorI (LoadI dst) src))); 1.8564 + effect(KILL cr); 1.8565 + 1.8566 + ins_cost(150); 1.8567 + format %{ "XOR $dst,$src" %} 1.8568 + opcode(0x31); /* Opcode 31 /r */ 1.8569 + ins_encode( OpcP, RegMem( src, dst ) ); 1.8570 + ins_pipe( ialu_mem_reg ); 1.8571 +%} 1.8572 + 1.8573 +// Xor Memory with Immediate 1.8574 +instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 1.8575 + match(Set dst (StoreI dst (XorI (LoadI dst) src))); 1.8576 + effect(KILL cr); 1.8577 + 1.8578 + ins_cost(125); 1.8579 + format %{ "XOR $dst,$src" %} 1.8580 + opcode(0x81,0x6); /* Opcode 81 /6 id */ 1.8581 + ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 1.8582 + ins_pipe( ialu_mem_imm ); 1.8583 +%} 1.8584 + 1.8585 +//----------Convert Int to Boolean--------------------------------------------- 1.8586 + 1.8587 +instruct movI_nocopy(eRegI dst, eRegI src) %{ 1.8588 + effect( DEF dst, USE src ); 1.8589 + format %{ "MOV $dst,$src" %} 1.8590 + ins_encode( enc_Copy( dst, src) ); 1.8591 + ins_pipe( ialu_reg_reg ); 1.8592 +%} 1.8593 + 1.8594 +instruct ci2b( eRegI dst, eRegI src, eFlagsReg cr ) %{ 1.8595 + effect( USE_DEF dst, USE src, KILL cr ); 1.8596 + 1.8597 + size(4); 1.8598 + format %{ "NEG $dst\n\t" 1.8599 + "ADC $dst,$src" %} 1.8600 + ins_encode( neg_reg(dst), 1.8601 + OpcRegReg(0x13,dst,src) ); 1.8602 + ins_pipe( ialu_reg_reg_long ); 1.8603 +%} 1.8604 + 1.8605 +instruct convI2B( eRegI dst, eRegI src, eFlagsReg cr ) %{ 1.8606 + match(Set dst (Conv2B src)); 1.8607 + 1.8608 + expand %{ 1.8609 + movI_nocopy(dst,src); 1.8610 + ci2b(dst,src,cr); 1.8611 + %} 1.8612 +%} 1.8613 + 1.8614 +instruct movP_nocopy(eRegI dst, eRegP src) %{ 1.8615 + effect( DEF dst, USE src ); 1.8616 + format %{ "MOV $dst,$src" %} 1.8617 + ins_encode( enc_Copy( dst, src) ); 1.8618 + ins_pipe( ialu_reg_reg ); 1.8619 +%} 1.8620 + 1.8621 +instruct cp2b( eRegI dst, eRegP src, eFlagsReg cr ) %{ 1.8622 + effect( USE_DEF dst, USE src, KILL cr ); 1.8623 + format %{ "NEG $dst\n\t" 1.8624 + "ADC $dst,$src" %} 1.8625 + ins_encode( neg_reg(dst), 1.8626 + OpcRegReg(0x13,dst,src) ); 1.8627 + ins_pipe( ialu_reg_reg_long ); 1.8628 +%} 1.8629 + 1.8630 +instruct convP2B( eRegI dst, eRegP src, eFlagsReg cr ) %{ 1.8631 + match(Set dst (Conv2B src)); 1.8632 + 1.8633 + expand %{ 1.8634 + movP_nocopy(dst,src); 1.8635 + cp2b(dst,src,cr); 1.8636 + %} 1.8637 +%} 1.8638 + 1.8639 +instruct cmpLTMask( eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr ) %{ 1.8640 + match(Set dst (CmpLTMask p q)); 1.8641 + effect( KILL cr ); 1.8642 + ins_cost(400); 1.8643 + 1.8644 + // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 1.8645 + format %{ "XOR $dst,$dst\n\t" 1.8646 + "CMP $p,$q\n\t" 1.8647 + "SETlt $dst\n\t" 1.8648 + "NEG $dst" %} 1.8649 + ins_encode( OpcRegReg(0x33,dst,dst), 1.8650 + OpcRegReg(0x3B,p,q), 1.8651 + setLT_reg(dst), neg_reg(dst) ); 1.8652 + ins_pipe( pipe_slow ); 1.8653 +%} 1.8654 + 1.8655 +instruct cmpLTMask0( eRegI dst, immI0 zero, eFlagsReg cr ) %{ 1.8656 + match(Set dst (CmpLTMask dst zero)); 1.8657 + effect( DEF dst, KILL cr ); 1.8658 + ins_cost(100); 1.8659 + 1.8660 + format %{ "SAR $dst,31" %} 1.8661 + opcode(0xC1, 0x7); /* C1 /7 ib */ 1.8662 + ins_encode( RegOpcImm( dst, 0x1F ) ); 1.8663 + ins_pipe( ialu_reg ); 1.8664 +%} 1.8665 + 1.8666 + 1.8667 +instruct cadd_cmpLTMask( ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp, eFlagsReg cr ) %{ 1.8668 + match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 1.8669 + effect( KILL tmp, KILL cr ); 1.8670 + ins_cost(400); 1.8671 + // annoyingly, $tmp has no edges so you cant ask for it in 1.8672 + // any format or encoding 1.8673 + format %{ "SUB $p,$q\n\t" 1.8674 + "SBB ECX,ECX\n\t" 1.8675 + "AND ECX,$y\n\t" 1.8676 + "ADD $p,ECX" %} 1.8677 + ins_encode( enc_cmpLTP(p,q,y,tmp) ); 1.8678 + ins_pipe( pipe_cmplt ); 1.8679 +%} 1.8680 + 1.8681 +/* If I enable this, I encourage spilling in the inner loop of compress. 1.8682 +instruct cadd_cmpLTMask_mem( ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr ) %{ 1.8683 + match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 1.8684 + effect( USE_KILL tmp, KILL cr ); 1.8685 + ins_cost(400); 1.8686 + 1.8687 + format %{ "SUB $p,$q\n\t" 1.8688 + "SBB ECX,ECX\n\t" 1.8689 + "AND ECX,$y\n\t" 1.8690 + "ADD $p,ECX" %} 1.8691 + ins_encode( enc_cmpLTP_mem(p,q,y,tmp) ); 1.8692 +%} 1.8693 +*/ 1.8694 + 1.8695 +//----------Long Instructions------------------------------------------------ 1.8696 +// Add Long Register with Register 1.8697 +instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 1.8698 + match(Set dst (AddL dst src)); 1.8699 + effect(KILL cr); 1.8700 + ins_cost(200); 1.8701 + format %{ "ADD $dst.lo,$src.lo\n\t" 1.8702 + "ADC $dst.hi,$src.hi" %} 1.8703 + opcode(0x03, 0x13); 1.8704 + ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 1.8705 + ins_pipe( ialu_reg_reg_long ); 1.8706 +%} 1.8707 + 1.8708 +// Add Long Register with Immediate 1.8709 +instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 1.8710 + match(Set dst (AddL dst src)); 1.8711 + effect(KILL cr); 1.8712 + format %{ "ADD $dst.lo,$src.lo\n\t" 1.8713 + "ADC $dst.hi,$src.hi" %} 1.8714 + opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 1.8715 + ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 1.8716 + ins_pipe( ialu_reg_long ); 1.8717 +%} 1.8718 + 1.8719 +// Add Long Register with Memory 1.8720 +instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 1.8721 + match(Set dst (AddL dst (LoadL mem))); 1.8722 + effect(KILL cr); 1.8723 + ins_cost(125); 1.8724 + format %{ "ADD $dst.lo,$mem\n\t" 1.8725 + "ADC $dst.hi,$mem+4" %} 1.8726 + opcode(0x03, 0x13); 1.8727 + ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 1.8728 + ins_pipe( ialu_reg_long_mem ); 1.8729 +%} 1.8730 + 1.8731 +// Subtract Long Register with Register. 1.8732 +instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 1.8733 + match(Set dst (SubL dst src)); 1.8734 + effect(KILL cr); 1.8735 + ins_cost(200); 1.8736 + format %{ "SUB $dst.lo,$src.lo\n\t" 1.8737 + "SBB $dst.hi,$src.hi" %} 1.8738 + opcode(0x2B, 0x1B); 1.8739 + ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 1.8740 + ins_pipe( ialu_reg_reg_long ); 1.8741 +%} 1.8742 + 1.8743 +// Subtract Long Register with Immediate 1.8744 +instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 1.8745 + match(Set dst (SubL dst src)); 1.8746 + effect(KILL cr); 1.8747 + format %{ "SUB $dst.lo,$src.lo\n\t" 1.8748 + "SBB $dst.hi,$src.hi" %} 1.8749 + opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 1.8750 + ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 1.8751 + ins_pipe( ialu_reg_long ); 1.8752 +%} 1.8753 + 1.8754 +// Subtract Long Register with Memory 1.8755 +instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 1.8756 + match(Set dst (SubL dst (LoadL mem))); 1.8757 + effect(KILL cr); 1.8758 + ins_cost(125); 1.8759 + format %{ "SUB $dst.lo,$mem\n\t" 1.8760 + "SBB $dst.hi,$mem+4" %} 1.8761 + opcode(0x2B, 0x1B); 1.8762 + ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 1.8763 + ins_pipe( ialu_reg_long_mem ); 1.8764 +%} 1.8765 + 1.8766 +instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 1.8767 + match(Set dst (SubL zero dst)); 1.8768 + effect(KILL cr); 1.8769 + ins_cost(300); 1.8770 + format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 1.8771 + ins_encode( neg_long(dst) ); 1.8772 + ins_pipe( ialu_reg_reg_long ); 1.8773 +%} 1.8774 + 1.8775 +// And Long Register with Register 1.8776 +instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 1.8777 + match(Set dst (AndL dst src)); 1.8778 + effect(KILL cr); 1.8779 + format %{ "AND $dst.lo,$src.lo\n\t" 1.8780 + "AND $dst.hi,$src.hi" %} 1.8781 + opcode(0x23,0x23); 1.8782 + ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 1.8783 + ins_pipe( ialu_reg_reg_long ); 1.8784 +%} 1.8785 + 1.8786 +// And Long Register with Immediate 1.8787 +instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 1.8788 + match(Set dst (AndL dst src)); 1.8789 + effect(KILL cr); 1.8790 + format %{ "AND $dst.lo,$src.lo\n\t" 1.8791 + "AND $dst.hi,$src.hi" %} 1.8792 + opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 1.8793 + ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 1.8794 + ins_pipe( ialu_reg_long ); 1.8795 +%} 1.8796 + 1.8797 +// And Long Register with Memory 1.8798 +instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 1.8799 + match(Set dst (AndL dst (LoadL mem))); 1.8800 + effect(KILL cr); 1.8801 + ins_cost(125); 1.8802 + format %{ "AND $dst.lo,$mem\n\t" 1.8803 + "AND $dst.hi,$mem+4" %} 1.8804 + opcode(0x23, 0x23); 1.8805 + ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 1.8806 + ins_pipe( ialu_reg_long_mem ); 1.8807 +%} 1.8808 + 1.8809 +// Or Long Register with Register 1.8810 +instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 1.8811 + match(Set dst (OrL dst src)); 1.8812 + effect(KILL cr); 1.8813 + format %{ "OR $dst.lo,$src.lo\n\t" 1.8814 + "OR $dst.hi,$src.hi" %} 1.8815 + opcode(0x0B,0x0B); 1.8816 + ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 1.8817 + ins_pipe( ialu_reg_reg_long ); 1.8818 +%} 1.8819 + 1.8820 +// Or Long Register with Immediate 1.8821 +instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 1.8822 + match(Set dst (OrL dst src)); 1.8823 + effect(KILL cr); 1.8824 + format %{ "OR $dst.lo,$src.lo\n\t" 1.8825 + "OR $dst.hi,$src.hi" %} 1.8826 + opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 1.8827 + ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 1.8828 + ins_pipe( ialu_reg_long ); 1.8829 +%} 1.8830 + 1.8831 +// Or Long Register with Memory 1.8832 +instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 1.8833 + match(Set dst (OrL dst (LoadL mem))); 1.8834 + effect(KILL cr); 1.8835 + ins_cost(125); 1.8836 + format %{ "OR $dst.lo,$mem\n\t" 1.8837 + "OR $dst.hi,$mem+4" %} 1.8838 + opcode(0x0B,0x0B); 1.8839 + ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 1.8840 + ins_pipe( ialu_reg_long_mem ); 1.8841 +%} 1.8842 + 1.8843 +// Xor Long Register with Register 1.8844 +instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 1.8845 + match(Set dst (XorL dst src)); 1.8846 + effect(KILL cr); 1.8847 + format %{ "XOR $dst.lo,$src.lo\n\t" 1.8848 + "XOR $dst.hi,$src.hi" %} 1.8849 + opcode(0x33,0x33); 1.8850 + ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 1.8851 + ins_pipe( ialu_reg_reg_long ); 1.8852 +%} 1.8853 + 1.8854 +// Xor Long Register with Immediate 1.8855 +instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 1.8856 + match(Set dst (XorL dst src)); 1.8857 + effect(KILL cr); 1.8858 + format %{ "XOR $dst.lo,$src.lo\n\t" 1.8859 + "XOR $dst.hi,$src.hi" %} 1.8860 + opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 1.8861 + ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 1.8862 + ins_pipe( ialu_reg_long ); 1.8863 +%} 1.8864 + 1.8865 +// Xor Long Register with Memory 1.8866 +instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 1.8867 + match(Set dst (XorL dst (LoadL mem))); 1.8868 + effect(KILL cr); 1.8869 + ins_cost(125); 1.8870 + format %{ "XOR $dst.lo,$mem\n\t" 1.8871 + "XOR $dst.hi,$mem+4" %} 1.8872 + opcode(0x33,0x33); 1.8873 + ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 1.8874 + ins_pipe( ialu_reg_long_mem ); 1.8875 +%} 1.8876 + 1.8877 +// Shift Left Long by 1-31 1.8878 +instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 1.8879 + match(Set dst (LShiftL dst cnt)); 1.8880 + effect(KILL cr); 1.8881 + ins_cost(200); 1.8882 + format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 1.8883 + "SHL $dst.lo,$cnt" %} 1.8884 + opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 1.8885 + ins_encode( move_long_small_shift(dst,cnt) ); 1.8886 + ins_pipe( ialu_reg_long ); 1.8887 +%} 1.8888 + 1.8889 +// Shift Left Long by 32-63 1.8890 +instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 1.8891 + match(Set dst (LShiftL dst cnt)); 1.8892 + effect(KILL cr); 1.8893 + ins_cost(300); 1.8894 + format %{ "MOV $dst.hi,$dst.lo\n" 1.8895 + "\tSHL $dst.hi,$cnt-32\n" 1.8896 + "\tXOR $dst.lo,$dst.lo" %} 1.8897 + opcode(0xC1, 0x4); /* C1 /4 ib */ 1.8898 + ins_encode( move_long_big_shift_clr(dst,cnt) ); 1.8899 + ins_pipe( ialu_reg_long ); 1.8900 +%} 1.8901 + 1.8902 +// Shift Left Long by variable 1.8903 +instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 1.8904 + match(Set dst (LShiftL dst shift)); 1.8905 + effect(KILL cr); 1.8906 + ins_cost(500+200); 1.8907 + size(17); 1.8908 + format %{ "TEST $shift,32\n\t" 1.8909 + "JEQ,s small\n\t" 1.8910 + "MOV $dst.hi,$dst.lo\n\t" 1.8911 + "XOR $dst.lo,$dst.lo\n" 1.8912 + "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 1.8913 + "SHL $dst.lo,$shift" %} 1.8914 + ins_encode( shift_left_long( dst, shift ) ); 1.8915 + ins_pipe( pipe_slow ); 1.8916 +%} 1.8917 + 1.8918 +// Shift Right Long by 1-31 1.8919 +instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 1.8920 + match(Set dst (URShiftL dst cnt)); 1.8921 + effect(KILL cr); 1.8922 + ins_cost(200); 1.8923 + format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 1.8924 + "SHR $dst.hi,$cnt" %} 1.8925 + opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 1.8926 + ins_encode( move_long_small_shift(dst,cnt) ); 1.8927 + ins_pipe( ialu_reg_long ); 1.8928 +%} 1.8929 + 1.8930 +// Shift Right Long by 32-63 1.8931 +instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 1.8932 + match(Set dst (URShiftL dst cnt)); 1.8933 + effect(KILL cr); 1.8934 + ins_cost(300); 1.8935 + format %{ "MOV $dst.lo,$dst.hi\n" 1.8936 + "\tSHR $dst.lo,$cnt-32\n" 1.8937 + "\tXOR $dst.hi,$dst.hi" %} 1.8938 + opcode(0xC1, 0x5); /* C1 /5 ib */ 1.8939 + ins_encode( move_long_big_shift_clr(dst,cnt) ); 1.8940 + ins_pipe( ialu_reg_long ); 1.8941 +%} 1.8942 + 1.8943 +// Shift Right Long by variable 1.8944 +instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 1.8945 + match(Set dst (URShiftL dst shift)); 1.8946 + effect(KILL cr); 1.8947 + ins_cost(600); 1.8948 + size(17); 1.8949 + format %{ "TEST $shift,32\n\t" 1.8950 + "JEQ,s small\n\t" 1.8951 + "MOV $dst.lo,$dst.hi\n\t" 1.8952 + "XOR $dst.hi,$dst.hi\n" 1.8953 + "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 1.8954 + "SHR $dst.hi,$shift" %} 1.8955 + ins_encode( shift_right_long( dst, shift ) ); 1.8956 + ins_pipe( pipe_slow ); 1.8957 +%} 1.8958 + 1.8959 +// Shift Right Long by 1-31 1.8960 +instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 1.8961 + match(Set dst (RShiftL dst cnt)); 1.8962 + effect(KILL cr); 1.8963 + ins_cost(200); 1.8964 + format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 1.8965 + "SAR $dst.hi,$cnt" %} 1.8966 + opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 1.8967 + ins_encode( move_long_small_shift(dst,cnt) ); 1.8968 + ins_pipe( ialu_reg_long ); 1.8969 +%} 1.8970 + 1.8971 +// Shift Right Long by 32-63 1.8972 +instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 1.8973 + match(Set dst (RShiftL dst cnt)); 1.8974 + effect(KILL cr); 1.8975 + ins_cost(300); 1.8976 + format %{ "MOV $dst.lo,$dst.hi\n" 1.8977 + "\tSAR $dst.lo,$cnt-32\n" 1.8978 + "\tSAR $dst.hi,31" %} 1.8979 + opcode(0xC1, 0x7); /* C1 /7 ib */ 1.8980 + ins_encode( move_long_big_shift_sign(dst,cnt) ); 1.8981 + ins_pipe( ialu_reg_long ); 1.8982 +%} 1.8983 + 1.8984 +// Shift Right arithmetic Long by variable 1.8985 +instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 1.8986 + match(Set dst (RShiftL dst shift)); 1.8987 + effect(KILL cr); 1.8988 + ins_cost(600); 1.8989 + size(18); 1.8990 + format %{ "TEST $shift,32\n\t" 1.8991 + "JEQ,s small\n\t" 1.8992 + "MOV $dst.lo,$dst.hi\n\t" 1.8993 + "SAR $dst.hi,31\n" 1.8994 + "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 1.8995 + "SAR $dst.hi,$shift" %} 1.8996 + ins_encode( shift_right_arith_long( dst, shift ) ); 1.8997 + ins_pipe( pipe_slow ); 1.8998 +%} 1.8999 + 1.9000 + 1.9001 +//----------Double Instructions------------------------------------------------ 1.9002 +// Double Math 1.9003 + 1.9004 +// Compare & branch 1.9005 + 1.9006 +// P6 version of float compare, sets condition codes in EFLAGS 1.9007 +instruct cmpD_cc_P6(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{ 1.9008 + predicate(VM_Version::supports_cmov() && UseSSE <=1); 1.9009 + match(Set cr (CmpD src1 src2)); 1.9010 + effect(KILL rax); 1.9011 + ins_cost(150); 1.9012 + format %{ "FLD $src1\n\t" 1.9013 + "FUCOMIP ST,$src2 // P6 instruction\n\t" 1.9014 + "JNP exit\n\t" 1.9015 + "MOV ah,1 // saw a NaN, set CF\n\t" 1.9016 + "SAHF\n" 1.9017 + "exit:\tNOP // avoid branch to branch" %} 1.9018 + opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 1.9019 + ins_encode( Push_Reg_D(src1), 1.9020 + OpcP, RegOpc(src2), 1.9021 + cmpF_P6_fixup ); 1.9022 + ins_pipe( pipe_slow ); 1.9023 +%} 1.9024 + 1.9025 +// Compare & branch 1.9026 +instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{ 1.9027 + predicate(UseSSE<=1); 1.9028 + match(Set cr (CmpD src1 src2)); 1.9029 + effect(KILL rax); 1.9030 + ins_cost(200); 1.9031 + format %{ "FLD $src1\n\t" 1.9032 + "FCOMp $src2\n\t" 1.9033 + "FNSTSW AX\n\t" 1.9034 + "TEST AX,0x400\n\t" 1.9035 + "JZ,s flags\n\t" 1.9036 + "MOV AH,1\t# unordered treat as LT\n" 1.9037 + "flags:\tSAHF" %} 1.9038 + opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 1.9039 + ins_encode( Push_Reg_D(src1), 1.9040 + OpcP, RegOpc(src2), 1.9041 + fpu_flags); 1.9042 + ins_pipe( pipe_slow ); 1.9043 +%} 1.9044 + 1.9045 +// Compare vs zero into -1,0,1 1.9046 +instruct cmpD_0(eRegI dst, regD src1, immD0 zero, eAXRegI rax, eFlagsReg cr) %{ 1.9047 + predicate(UseSSE<=1); 1.9048 + match(Set dst (CmpD3 src1 zero)); 1.9049 + effect(KILL cr, KILL rax); 1.9050 + ins_cost(280); 1.9051 + format %{ "FTSTD $dst,$src1" %} 1.9052 + opcode(0xE4, 0xD9); 1.9053 + ins_encode( Push_Reg_D(src1), 1.9054 + OpcS, OpcP, PopFPU, 1.9055 + CmpF_Result(dst)); 1.9056 + ins_pipe( pipe_slow ); 1.9057 +%} 1.9058 + 1.9059 +// Compare into -1,0,1 1.9060 +instruct cmpD_reg(eRegI dst, regD src1, regD src2, eAXRegI rax, eFlagsReg cr) %{ 1.9061 + predicate(UseSSE<=1); 1.9062 + match(Set dst (CmpD3 src1 src2)); 1.9063 + effect(KILL cr, KILL rax); 1.9064 + ins_cost(300); 1.9065 + format %{ "FCMPD $dst,$src1,$src2" %} 1.9066 + opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 1.9067 + ins_encode( Push_Reg_D(src1), 1.9068 + OpcP, RegOpc(src2), 1.9069 + CmpF_Result(dst)); 1.9070 + ins_pipe( pipe_slow ); 1.9071 +%} 1.9072 + 1.9073 +// float compare and set condition codes in EFLAGS by XMM regs 1.9074 +instruct cmpXD_cc(eFlagsRegU cr, regXD dst, regXD src, eAXRegI rax) %{ 1.9075 + predicate(UseSSE>=2); 1.9076 + match(Set cr (CmpD dst src)); 1.9077 + effect(KILL rax); 1.9078 + ins_cost(125); 1.9079 + format %{ "COMISD $dst,$src\n" 1.9080 + "\tJNP exit\n" 1.9081 + "\tMOV ah,1 // saw a NaN, set CF\n" 1.9082 + "\tSAHF\n" 1.9083 + "exit:\tNOP // avoid branch to branch" %} 1.9084 + opcode(0x66, 0x0F, 0x2F); 1.9085 + ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src), cmpF_P6_fixup); 1.9086 + ins_pipe( pipe_slow ); 1.9087 +%} 1.9088 + 1.9089 +// float compare and set condition codes in EFLAGS by XMM regs 1.9090 +instruct cmpXD_ccmem(eFlagsRegU cr, regXD dst, memory src, eAXRegI rax) %{ 1.9091 + predicate(UseSSE>=2); 1.9092 + match(Set cr (CmpD dst (LoadD src))); 1.9093 + effect(KILL rax); 1.9094 + ins_cost(145); 1.9095 + format %{ "COMISD $dst,$src\n" 1.9096 + "\tJNP exit\n" 1.9097 + "\tMOV ah,1 // saw a NaN, set CF\n" 1.9098 + "\tSAHF\n" 1.9099 + "exit:\tNOP // avoid branch to branch" %} 1.9100 + opcode(0x66, 0x0F, 0x2F); 1.9101 + ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src), cmpF_P6_fixup); 1.9102 + ins_pipe( pipe_slow ); 1.9103 +%} 1.9104 + 1.9105 +// Compare into -1,0,1 in XMM 1.9106 +instruct cmpXD_reg(eRegI dst, regXD src1, regXD src2, eFlagsReg cr) %{ 1.9107 + predicate(UseSSE>=2); 1.9108 + match(Set dst (CmpD3 src1 src2)); 1.9109 + effect(KILL cr); 1.9110 + ins_cost(255); 1.9111 + format %{ "XOR $dst,$dst\n" 1.9112 + "\tCOMISD $src1,$src2\n" 1.9113 + "\tJP,s nan\n" 1.9114 + "\tJEQ,s exit\n" 1.9115 + "\tJA,s inc\n" 1.9116 + "nan:\tDEC $dst\n" 1.9117 + "\tJMP,s exit\n" 1.9118 + "inc:\tINC $dst\n" 1.9119 + "exit:" 1.9120 + %} 1.9121 + opcode(0x66, 0x0F, 0x2F); 1.9122 + ins_encode(Xor_Reg(dst), OpcP, OpcS, Opcode(tertiary), RegReg(src1, src2), 1.9123 + CmpX_Result(dst)); 1.9124 + ins_pipe( pipe_slow ); 1.9125 +%} 1.9126 + 1.9127 +// Compare into -1,0,1 in XMM and memory 1.9128 +instruct cmpXD_regmem(eRegI dst, regXD src1, memory mem, eFlagsReg cr) %{ 1.9129 + predicate(UseSSE>=2); 1.9130 + match(Set dst (CmpD3 src1 (LoadD mem))); 1.9131 + effect(KILL cr); 1.9132 + ins_cost(275); 1.9133 + format %{ "COMISD $src1,$mem\n" 1.9134 + "\tMOV $dst,0\t\t# do not blow flags\n" 1.9135 + "\tJP,s nan\n" 1.9136 + "\tJEQ,s exit\n" 1.9137 + "\tJA,s inc\n" 1.9138 + "nan:\tDEC $dst\n" 1.9139 + "\tJMP,s exit\n" 1.9140 + "inc:\tINC $dst\n" 1.9141 + "exit:" 1.9142 + %} 1.9143 + opcode(0x66, 0x0F, 0x2F); 1.9144 + ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(src1, mem), 1.9145 + LdImmI(dst,0x0), CmpX_Result(dst)); 1.9146 + ins_pipe( pipe_slow ); 1.9147 +%} 1.9148 + 1.9149 + 1.9150 +instruct subD_reg(regD dst, regD src) %{ 1.9151 + predicate (UseSSE <=1); 1.9152 + match(Set dst (SubD dst src)); 1.9153 + 1.9154 + format %{ "FLD $src\n\t" 1.9155 + "DSUBp $dst,ST" %} 1.9156 + opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 1.9157 + ins_cost(150); 1.9158 + ins_encode( Push_Reg_D(src), 1.9159 + OpcP, RegOpc(dst) ); 1.9160 + ins_pipe( fpu_reg_reg ); 1.9161 +%} 1.9162 + 1.9163 +instruct subD_reg_round(stackSlotD dst, regD src1, regD src2) %{ 1.9164 + predicate (UseSSE <=1); 1.9165 + match(Set dst (RoundDouble (SubD src1 src2))); 1.9166 + ins_cost(250); 1.9167 + 1.9168 + format %{ "FLD $src2\n\t" 1.9169 + "DSUB ST,$src1\n\t" 1.9170 + "FSTP_D $dst\t# D-round" %} 1.9171 + opcode(0xD8, 0x5); 1.9172 + ins_encode( Push_Reg_D(src2), 1.9173 + OpcP, RegOpc(src1), Pop_Mem_D(dst) ); 1.9174 + ins_pipe( fpu_mem_reg_reg ); 1.9175 +%} 1.9176 + 1.9177 + 1.9178 +instruct subD_reg_mem(regD dst, memory src) %{ 1.9179 + predicate (UseSSE <=1); 1.9180 + match(Set dst (SubD dst (LoadD src))); 1.9181 + ins_cost(150); 1.9182 + 1.9183 + format %{ "FLD $src\n\t" 1.9184 + "DSUBp $dst,ST" %} 1.9185 + opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 1.9186 + ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 1.9187 + OpcP, RegOpc(dst) ); 1.9188 + ins_pipe( fpu_reg_mem ); 1.9189 +%} 1.9190 + 1.9191 +instruct absD_reg(regDPR1 dst, regDPR1 src) %{ 1.9192 + predicate (UseSSE<=1); 1.9193 + match(Set dst (AbsD src)); 1.9194 + ins_cost(100); 1.9195 + format %{ "FABS" %} 1.9196 + opcode(0xE1, 0xD9); 1.9197 + ins_encode( OpcS, OpcP ); 1.9198 + ins_pipe( fpu_reg_reg ); 1.9199 +%} 1.9200 + 1.9201 +instruct absXD_reg( regXD dst ) %{ 1.9202 + predicate(UseSSE>=2); 1.9203 + match(Set dst (AbsD dst)); 1.9204 + format %{ "ANDPD $dst,[0x7FFFFFFFFFFFFFFF]\t# ABS D by sign masking" %} 1.9205 + ins_encode( AbsXD_encoding(dst)); 1.9206 + ins_pipe( pipe_slow ); 1.9207 +%} 1.9208 + 1.9209 +instruct negD_reg(regDPR1 dst, regDPR1 src) %{ 1.9210 + predicate(UseSSE<=1); 1.9211 + match(Set dst (NegD src)); 1.9212 + ins_cost(100); 1.9213 + format %{ "FCHS" %} 1.9214 + opcode(0xE0, 0xD9); 1.9215 + ins_encode( OpcS, OpcP ); 1.9216 + ins_pipe( fpu_reg_reg ); 1.9217 +%} 1.9218 + 1.9219 +instruct negXD_reg( regXD dst ) %{ 1.9220 + predicate(UseSSE>=2); 1.9221 + match(Set dst (NegD dst)); 1.9222 + format %{ "XORPD $dst,[0x8000000000000000]\t# CHS D by sign flipping" %} 1.9223 + ins_encode %{ 1.9224 + __ xorpd($dst$$XMMRegister, 1.9225 + ExternalAddress((address)double_signflip_pool)); 1.9226 + %} 1.9227 + ins_pipe( pipe_slow ); 1.9228 +%} 1.9229 + 1.9230 +instruct addD_reg(regD dst, regD src) %{ 1.9231 + predicate(UseSSE<=1); 1.9232 + match(Set dst (AddD dst src)); 1.9233 + format %{ "FLD $src\n\t" 1.9234 + "DADD $dst,ST" %} 1.9235 + size(4); 1.9236 + ins_cost(150); 1.9237 + opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 1.9238 + ins_encode( Push_Reg_D(src), 1.9239 + OpcP, RegOpc(dst) ); 1.9240 + ins_pipe( fpu_reg_reg ); 1.9241 +%} 1.9242 + 1.9243 + 1.9244 +instruct addD_reg_round(stackSlotD dst, regD src1, regD src2) %{ 1.9245 + predicate(UseSSE<=1); 1.9246 + match(Set dst (RoundDouble (AddD src1 src2))); 1.9247 + ins_cost(250); 1.9248 + 1.9249 + format %{ "FLD $src2\n\t" 1.9250 + "DADD ST,$src1\n\t" 1.9251 + "FSTP_D $dst\t# D-round" %} 1.9252 + opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 1.9253 + ins_encode( Push_Reg_D(src2), 1.9254 + OpcP, RegOpc(src1), Pop_Mem_D(dst) ); 1.9255 + ins_pipe( fpu_mem_reg_reg ); 1.9256 +%} 1.9257 + 1.9258 + 1.9259 +instruct addD_reg_mem(regD dst, memory src) %{ 1.9260 + predicate(UseSSE<=1); 1.9261 + match(Set dst (AddD dst (LoadD src))); 1.9262 + ins_cost(150); 1.9263 + 1.9264 + format %{ "FLD $src\n\t" 1.9265 + "DADDp $dst,ST" %} 1.9266 + opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 1.9267 + ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 1.9268 + OpcP, RegOpc(dst) ); 1.9269 + ins_pipe( fpu_reg_mem ); 1.9270 +%} 1.9271 + 1.9272 +// add-to-memory 1.9273 +instruct addD_mem_reg(memory dst, regD src) %{ 1.9274 + predicate(UseSSE<=1); 1.9275 + match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 1.9276 + ins_cost(150); 1.9277 + 1.9278 + format %{ "FLD_D $dst\n\t" 1.9279 + "DADD ST,$src\n\t" 1.9280 + "FST_D $dst" %} 1.9281 + opcode(0xDD, 0x0); 1.9282 + ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 1.9283 + Opcode(0xD8), RegOpc(src), 1.9284 + set_instruction_start, 1.9285 + Opcode(0xDD), RMopc_Mem(0x03,dst) ); 1.9286 + ins_pipe( fpu_reg_mem ); 1.9287 +%} 1.9288 + 1.9289 +instruct addD_reg_imm1(regD dst, immD1 src) %{ 1.9290 + predicate(UseSSE<=1); 1.9291 + match(Set dst (AddD dst src)); 1.9292 + ins_cost(125); 1.9293 + format %{ "FLD1\n\t" 1.9294 + "DADDp $dst,ST" %} 1.9295 + opcode(0xDE, 0x00); 1.9296 + ins_encode( LdImmD(src), 1.9297 + OpcP, RegOpc(dst) ); 1.9298 + ins_pipe( fpu_reg ); 1.9299 +%} 1.9300 + 1.9301 +instruct addD_reg_imm(regD dst, immD src) %{ 1.9302 + predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 1.9303 + match(Set dst (AddD dst src)); 1.9304 + ins_cost(200); 1.9305 + format %{ "FLD_D [$src]\n\t" 1.9306 + "DADDp $dst,ST" %} 1.9307 + opcode(0xDE, 0x00); /* DE /0 */ 1.9308 + ins_encode( LdImmD(src), 1.9309 + OpcP, RegOpc(dst)); 1.9310 + ins_pipe( fpu_reg_mem ); 1.9311 +%} 1.9312 + 1.9313 +instruct addD_reg_imm_round(stackSlotD dst, regD src, immD con) %{ 1.9314 + predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 1.9315 + match(Set dst (RoundDouble (AddD src con))); 1.9316 + ins_cost(200); 1.9317 + format %{ "FLD_D [$con]\n\t" 1.9318 + "DADD ST,$src\n\t" 1.9319 + "FSTP_D $dst\t# D-round" %} 1.9320 + opcode(0xD8, 0x00); /* D8 /0 */ 1.9321 + ins_encode( LdImmD(con), 1.9322 + OpcP, RegOpc(src), Pop_Mem_D(dst)); 1.9323 + ins_pipe( fpu_mem_reg_con ); 1.9324 +%} 1.9325 + 1.9326 +// Add two double precision floating point values in xmm 1.9327 +instruct addXD_reg(regXD dst, regXD src) %{ 1.9328 + predicate(UseSSE>=2); 1.9329 + match(Set dst (AddD dst src)); 1.9330 + format %{ "ADDSD $dst,$src" %} 1.9331 + ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegReg(dst, src)); 1.9332 + ins_pipe( pipe_slow ); 1.9333 +%} 1.9334 + 1.9335 +instruct addXD_imm(regXD dst, immXD con) %{ 1.9336 + predicate(UseSSE>=2); 1.9337 + match(Set dst (AddD dst con)); 1.9338 + format %{ "ADDSD $dst,[$con]" %} 1.9339 + ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), LdImmXD(dst, con) ); 1.9340 + ins_pipe( pipe_slow ); 1.9341 +%} 1.9342 + 1.9343 +instruct addXD_mem(regXD dst, memory mem) %{ 1.9344 + predicate(UseSSE>=2); 1.9345 + match(Set dst (AddD dst (LoadD mem))); 1.9346 + format %{ "ADDSD $dst,$mem" %} 1.9347 + ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegMem(dst,mem)); 1.9348 + ins_pipe( pipe_slow ); 1.9349 +%} 1.9350 + 1.9351 +// Sub two double precision floating point values in xmm 1.9352 +instruct subXD_reg(regXD dst, regXD src) %{ 1.9353 + predicate(UseSSE>=2); 1.9354 + match(Set dst (SubD dst src)); 1.9355 + format %{ "SUBSD $dst,$src" %} 1.9356 + ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src)); 1.9357 + ins_pipe( pipe_slow ); 1.9358 +%} 1.9359 + 1.9360 +instruct subXD_imm(regXD dst, immXD con) %{ 1.9361 + predicate(UseSSE>=2); 1.9362 + match(Set dst (SubD dst con)); 1.9363 + format %{ "SUBSD $dst,[$con]" %} 1.9364 + ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), LdImmXD(dst, con) ); 1.9365 + ins_pipe( pipe_slow ); 1.9366 +%} 1.9367 + 1.9368 +instruct subXD_mem(regXD dst, memory mem) %{ 1.9369 + predicate(UseSSE>=2); 1.9370 + match(Set dst (SubD dst (LoadD mem))); 1.9371 + format %{ "SUBSD $dst,$mem" %} 1.9372 + ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem)); 1.9373 + ins_pipe( pipe_slow ); 1.9374 +%} 1.9375 + 1.9376 +// Mul two double precision floating point values in xmm 1.9377 +instruct mulXD_reg(regXD dst, regXD src) %{ 1.9378 + predicate(UseSSE>=2); 1.9379 + match(Set dst (MulD dst src)); 1.9380 + format %{ "MULSD $dst,$src" %} 1.9381 + ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegReg(dst, src)); 1.9382 + ins_pipe( pipe_slow ); 1.9383 +%} 1.9384 + 1.9385 +instruct mulXD_imm(regXD dst, immXD con) %{ 1.9386 + predicate(UseSSE>=2); 1.9387 + match(Set dst (MulD dst con)); 1.9388 + format %{ "MULSD $dst,[$con]" %} 1.9389 + ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), LdImmXD(dst, con) ); 1.9390 + ins_pipe( pipe_slow ); 1.9391 +%} 1.9392 + 1.9393 +instruct mulXD_mem(regXD dst, memory mem) %{ 1.9394 + predicate(UseSSE>=2); 1.9395 + match(Set dst (MulD dst (LoadD mem))); 1.9396 + format %{ "MULSD $dst,$mem" %} 1.9397 + ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem)); 1.9398 + ins_pipe( pipe_slow ); 1.9399 +%} 1.9400 + 1.9401 +// Div two double precision floating point values in xmm 1.9402 +instruct divXD_reg(regXD dst, regXD src) %{ 1.9403 + predicate(UseSSE>=2); 1.9404 + match(Set dst (DivD dst src)); 1.9405 + format %{ "DIVSD $dst,$src" %} 1.9406 + opcode(0xF2, 0x0F, 0x5E); 1.9407 + ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src)); 1.9408 + ins_pipe( pipe_slow ); 1.9409 +%} 1.9410 + 1.9411 +instruct divXD_imm(regXD dst, immXD con) %{ 1.9412 + predicate(UseSSE>=2); 1.9413 + match(Set dst (DivD dst con)); 1.9414 + format %{ "DIVSD $dst,[$con]" %} 1.9415 + ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), LdImmXD(dst, con)); 1.9416 + ins_pipe( pipe_slow ); 1.9417 +%} 1.9418 + 1.9419 +instruct divXD_mem(regXD dst, memory mem) %{ 1.9420 + predicate(UseSSE>=2); 1.9421 + match(Set dst (DivD dst (LoadD mem))); 1.9422 + format %{ "DIVSD $dst,$mem" %} 1.9423 + ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem)); 1.9424 + ins_pipe( pipe_slow ); 1.9425 +%} 1.9426 + 1.9427 + 1.9428 +instruct mulD_reg(regD dst, regD src) %{ 1.9429 + predicate(UseSSE<=1); 1.9430 + match(Set dst (MulD dst src)); 1.9431 + format %{ "FLD $src\n\t" 1.9432 + "DMULp $dst,ST" %} 1.9433 + opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 1.9434 + ins_cost(150); 1.9435 + ins_encode( Push_Reg_D(src), 1.9436 + OpcP, RegOpc(dst) ); 1.9437 + ins_pipe( fpu_reg_reg ); 1.9438 +%} 1.9439 + 1.9440 +// Strict FP instruction biases argument before multiply then 1.9441 +// biases result to avoid double rounding of subnormals. 1.9442 +// 1.9443 +// scale arg1 by multiplying arg1 by 2^(-15360) 1.9444 +// load arg2 1.9445 +// multiply scaled arg1 by arg2 1.9446 +// rescale product by 2^(15360) 1.9447 +// 1.9448 +instruct strictfp_mulD_reg(regDPR1 dst, regnotDPR1 src) %{ 1.9449 + predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 1.9450 + match(Set dst (MulD dst src)); 1.9451 + ins_cost(1); // Select this instruction for all strict FP double multiplies 1.9452 + 1.9453 + format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 1.9454 + "DMULp $dst,ST\n\t" 1.9455 + "FLD $src\n\t" 1.9456 + "DMULp $dst,ST\n\t" 1.9457 + "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 1.9458 + "DMULp $dst,ST\n\t" %} 1.9459 + opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 1.9460 + ins_encode( strictfp_bias1(dst), 1.9461 + Push_Reg_D(src), 1.9462 + OpcP, RegOpc(dst), 1.9463 + strictfp_bias2(dst) ); 1.9464 + ins_pipe( fpu_reg_reg ); 1.9465 +%} 1.9466 + 1.9467 +instruct mulD_reg_imm(regD dst, immD src) %{ 1.9468 + predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 1.9469 + match(Set dst (MulD dst src)); 1.9470 + ins_cost(200); 1.9471 + format %{ "FLD_D [$src]\n\t" 1.9472 + "DMULp $dst,ST" %} 1.9473 + opcode(0xDE, 0x1); /* DE /1 */ 1.9474 + ins_encode( LdImmD(src), 1.9475 + OpcP, RegOpc(dst) ); 1.9476 + ins_pipe( fpu_reg_mem ); 1.9477 +%} 1.9478 + 1.9479 + 1.9480 +instruct mulD_reg_mem(regD dst, memory src) %{ 1.9481 + predicate( UseSSE<=1 ); 1.9482 + match(Set dst (MulD dst (LoadD src))); 1.9483 + ins_cost(200); 1.9484 + format %{ "FLD_D $src\n\t" 1.9485 + "DMULp $dst,ST" %} 1.9486 + opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 1.9487 + ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 1.9488 + OpcP, RegOpc(dst) ); 1.9489 + ins_pipe( fpu_reg_mem ); 1.9490 +%} 1.9491 + 1.9492 +// 1.9493 +// Cisc-alternate to reg-reg multiply 1.9494 +instruct mulD_reg_mem_cisc(regD dst, regD src, memory mem) %{ 1.9495 + predicate( UseSSE<=1 ); 1.9496 + match(Set dst (MulD src (LoadD mem))); 1.9497 + ins_cost(250); 1.9498 + format %{ "FLD_D $mem\n\t" 1.9499 + "DMUL ST,$src\n\t" 1.9500 + "FSTP_D $dst" %} 1.9501 + opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 1.9502 + ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 1.9503 + OpcReg_F(src), 1.9504 + Pop_Reg_D(dst) ); 1.9505 + ins_pipe( fpu_reg_reg_mem ); 1.9506 +%} 1.9507 + 1.9508 + 1.9509 +// MACRO3 -- addD a mulD 1.9510 +// This instruction is a '2-address' instruction in that the result goes 1.9511 +// back to src2. This eliminates a move from the macro; possibly the 1.9512 +// register allocator will have to add it back (and maybe not). 1.9513 +instruct addD_mulD_reg(regD src2, regD src1, regD src0) %{ 1.9514 + predicate( UseSSE<=1 ); 1.9515 + match(Set src2 (AddD (MulD src0 src1) src2)); 1.9516 + format %{ "FLD $src0\t# ===MACRO3d===\n\t" 1.9517 + "DMUL ST,$src1\n\t" 1.9518 + "DADDp $src2,ST" %} 1.9519 + ins_cost(250); 1.9520 + opcode(0xDD); /* LoadD DD /0 */ 1.9521 + ins_encode( Push_Reg_F(src0), 1.9522 + FMul_ST_reg(src1), 1.9523 + FAddP_reg_ST(src2) ); 1.9524 + ins_pipe( fpu_reg_reg_reg ); 1.9525 +%} 1.9526 + 1.9527 + 1.9528 +// MACRO3 -- subD a mulD 1.9529 +instruct subD_mulD_reg(regD src2, regD src1, regD src0) %{ 1.9530 + predicate( UseSSE<=1 ); 1.9531 + match(Set src2 (SubD (MulD src0 src1) src2)); 1.9532 + format %{ "FLD $src0\t# ===MACRO3d===\n\t" 1.9533 + "DMUL ST,$src1\n\t" 1.9534 + "DSUBRp $src2,ST" %} 1.9535 + ins_cost(250); 1.9536 + ins_encode( Push_Reg_F(src0), 1.9537 + FMul_ST_reg(src1), 1.9538 + Opcode(0xDE), Opc_plus(0xE0,src2)); 1.9539 + ins_pipe( fpu_reg_reg_reg ); 1.9540 +%} 1.9541 + 1.9542 + 1.9543 +instruct divD_reg(regD dst, regD src) %{ 1.9544 + predicate( UseSSE<=1 ); 1.9545 + match(Set dst (DivD dst src)); 1.9546 + 1.9547 + format %{ "FLD $src\n\t" 1.9548 + "FDIVp $dst,ST" %} 1.9549 + opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 1.9550 + ins_cost(150); 1.9551 + ins_encode( Push_Reg_D(src), 1.9552 + OpcP, RegOpc(dst) ); 1.9553 + ins_pipe( fpu_reg_reg ); 1.9554 +%} 1.9555 + 1.9556 +// Strict FP instruction biases argument before division then 1.9557 +// biases result, to avoid double rounding of subnormals. 1.9558 +// 1.9559 +// scale dividend by multiplying dividend by 2^(-15360) 1.9560 +// load divisor 1.9561 +// divide scaled dividend by divisor 1.9562 +// rescale quotient by 2^(15360) 1.9563 +// 1.9564 +instruct strictfp_divD_reg(regDPR1 dst, regnotDPR1 src) %{ 1.9565 + predicate (UseSSE<=1); 1.9566 + match(Set dst (DivD dst src)); 1.9567 + predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 1.9568 + ins_cost(01); 1.9569 + 1.9570 + format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 1.9571 + "DMULp $dst,ST\n\t" 1.9572 + "FLD $src\n\t" 1.9573 + "FDIVp $dst,ST\n\t" 1.9574 + "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 1.9575 + "DMULp $dst,ST\n\t" %} 1.9576 + opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 1.9577 + ins_encode( strictfp_bias1(dst), 1.9578 + Push_Reg_D(src), 1.9579 + OpcP, RegOpc(dst), 1.9580 + strictfp_bias2(dst) ); 1.9581 + ins_pipe( fpu_reg_reg ); 1.9582 +%} 1.9583 + 1.9584 +instruct divD_reg_round(stackSlotD dst, regD src1, regD src2) %{ 1.9585 + predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 1.9586 + match(Set dst (RoundDouble (DivD src1 src2))); 1.9587 + 1.9588 + format %{ "FLD $src1\n\t" 1.9589 + "FDIV ST,$src2\n\t" 1.9590 + "FSTP_D $dst\t# D-round" %} 1.9591 + opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 1.9592 + ins_encode( Push_Reg_D(src1), 1.9593 + OpcP, RegOpc(src2), Pop_Mem_D(dst) ); 1.9594 + ins_pipe( fpu_mem_reg_reg ); 1.9595 +%} 1.9596 + 1.9597 + 1.9598 +instruct modD_reg(regD dst, regD src, eAXRegI rax, eFlagsReg cr) %{ 1.9599 + predicate(UseSSE<=1); 1.9600 + match(Set dst (ModD dst src)); 1.9601 + effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS 1.9602 + 1.9603 + format %{ "DMOD $dst,$src" %} 1.9604 + ins_cost(250); 1.9605 + ins_encode(Push_Reg_Mod_D(dst, src), 1.9606 + emitModD(), 1.9607 + Push_Result_Mod_D(src), 1.9608 + Pop_Reg_D(dst)); 1.9609 + ins_pipe( pipe_slow ); 1.9610 +%} 1.9611 + 1.9612 +instruct modXD_reg(regXD dst, regXD src0, regXD src1, eAXRegI rax, eFlagsReg cr) %{ 1.9613 + predicate(UseSSE>=2); 1.9614 + match(Set dst (ModD src0 src1)); 1.9615 + effect(KILL rax, KILL cr); 1.9616 + 1.9617 + format %{ "SUB ESP,8\t # DMOD\n" 1.9618 + "\tMOVSD [ESP+0],$src1\n" 1.9619 + "\tFLD_D [ESP+0]\n" 1.9620 + "\tMOVSD [ESP+0],$src0\n" 1.9621 + "\tFLD_D [ESP+0]\n" 1.9622 + "loop:\tFPREM\n" 1.9623 + "\tFWAIT\n" 1.9624 + "\tFNSTSW AX\n" 1.9625 + "\tSAHF\n" 1.9626 + "\tJP loop\n" 1.9627 + "\tFSTP_D [ESP+0]\n" 1.9628 + "\tMOVSD $dst,[ESP+0]\n" 1.9629 + "\tADD ESP,8\n" 1.9630 + "\tFSTP ST0\t # Restore FPU Stack" 1.9631 + %} 1.9632 + ins_cost(250); 1.9633 + ins_encode( Push_ModD_encoding(src0, src1), emitModD(), Push_ResultXD(dst), PopFPU); 1.9634 + ins_pipe( pipe_slow ); 1.9635 +%} 1.9636 + 1.9637 +instruct sinD_reg(regDPR1 dst, regDPR1 src) %{ 1.9638 + predicate (UseSSE<=1); 1.9639 + match(Set dst (SinD src)); 1.9640 + ins_cost(1800); 1.9641 + format %{ "DSIN $dst" %} 1.9642 + opcode(0xD9, 0xFE); 1.9643 + ins_encode( OpcP, OpcS ); 1.9644 + ins_pipe( pipe_slow ); 1.9645 +%} 1.9646 + 1.9647 +instruct sinXD_reg(regXD dst, eFlagsReg cr) %{ 1.9648 + predicate (UseSSE>=2); 1.9649 + match(Set dst (SinD dst)); 1.9650 + effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" 1.9651 + ins_cost(1800); 1.9652 + format %{ "DSIN $dst" %} 1.9653 + opcode(0xD9, 0xFE); 1.9654 + ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) ); 1.9655 + ins_pipe( pipe_slow ); 1.9656 +%} 1.9657 + 1.9658 +instruct cosD_reg(regDPR1 dst, regDPR1 src) %{ 1.9659 + predicate (UseSSE<=1); 1.9660 + match(Set dst (CosD src)); 1.9661 + ins_cost(1800); 1.9662 + format %{ "DCOS $dst" %} 1.9663 + opcode(0xD9, 0xFF); 1.9664 + ins_encode( OpcP, OpcS ); 1.9665 + ins_pipe( pipe_slow ); 1.9666 +%} 1.9667 + 1.9668 +instruct cosXD_reg(regXD dst, eFlagsReg cr) %{ 1.9669 + predicate (UseSSE>=2); 1.9670 + match(Set dst (CosD dst)); 1.9671 + effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" 1.9672 + ins_cost(1800); 1.9673 + format %{ "DCOS $dst" %} 1.9674 + opcode(0xD9, 0xFF); 1.9675 + ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) ); 1.9676 + ins_pipe( pipe_slow ); 1.9677 +%} 1.9678 + 1.9679 +instruct tanD_reg(regDPR1 dst, regDPR1 src) %{ 1.9680 + predicate (UseSSE<=1); 1.9681 + match(Set dst(TanD src)); 1.9682 + format %{ "DTAN $dst" %} 1.9683 + ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan 1.9684 + Opcode(0xDD), Opcode(0xD8)); // fstp st 1.9685 + ins_pipe( pipe_slow ); 1.9686 +%} 1.9687 + 1.9688 +instruct tanXD_reg(regXD dst, eFlagsReg cr) %{ 1.9689 + predicate (UseSSE>=2); 1.9690 + match(Set dst(TanD dst)); 1.9691 + effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" 1.9692 + format %{ "DTAN $dst" %} 1.9693 + ins_encode( Push_SrcXD(dst), 1.9694 + Opcode(0xD9), Opcode(0xF2), // fptan 1.9695 + Opcode(0xDD), Opcode(0xD8), // fstp st 1.9696 + Push_ResultXD(dst) ); 1.9697 + ins_pipe( pipe_slow ); 1.9698 +%} 1.9699 + 1.9700 +instruct atanD_reg(regD dst, regD src) %{ 1.9701 + predicate (UseSSE<=1); 1.9702 + match(Set dst(AtanD dst src)); 1.9703 + format %{ "DATA $dst,$src" %} 1.9704 + opcode(0xD9, 0xF3); 1.9705 + ins_encode( Push_Reg_D(src), 1.9706 + OpcP, OpcS, RegOpc(dst) ); 1.9707 + ins_pipe( pipe_slow ); 1.9708 +%} 1.9709 + 1.9710 +instruct atanXD_reg(regXD dst, regXD src, eFlagsReg cr) %{ 1.9711 + predicate (UseSSE>=2); 1.9712 + match(Set dst(AtanD dst src)); 1.9713 + effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" 1.9714 + format %{ "DATA $dst,$src" %} 1.9715 + opcode(0xD9, 0xF3); 1.9716 + ins_encode( Push_SrcXD(src), 1.9717 + OpcP, OpcS, Push_ResultXD(dst) ); 1.9718 + ins_pipe( pipe_slow ); 1.9719 +%} 1.9720 + 1.9721 +instruct sqrtD_reg(regD dst, regD src) %{ 1.9722 + predicate (UseSSE<=1); 1.9723 + match(Set dst (SqrtD src)); 1.9724 + format %{ "DSQRT $dst,$src" %} 1.9725 + opcode(0xFA, 0xD9); 1.9726 + ins_encode( Push_Reg_D(src), 1.9727 + OpcS, OpcP, Pop_Reg_D(dst) ); 1.9728 + ins_pipe( pipe_slow ); 1.9729 +%} 1.9730 + 1.9731 +instruct powD_reg(regD X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ 1.9732 + predicate (UseSSE<=1); 1.9733 + match(Set Y (PowD X Y)); // Raise X to the Yth power 1.9734 + effect(KILL rax, KILL rbx, KILL rcx); 1.9735 + format %{ "SUB ESP,8\t\t# Fast-path POW encoding\n\t" 1.9736 + "FLD_D $X\n\t" 1.9737 + "FYL2X \t\t\t# Q=Y*ln2(X)\n\t" 1.9738 + 1.9739 + "FDUP \t\t\t# Q Q\n\t" 1.9740 + "FRNDINT\t\t\t# int(Q) Q\n\t" 1.9741 + "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t" 1.9742 + "FISTP dword [ESP]\n\t" 1.9743 + "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t" 1.9744 + "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t" 1.9745 + "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead 1.9746 + "MOV EAX,[ESP]\t# Pick up int(Q)\n\t" 1.9747 + "MOV ECX,0xFFFFF800\t# Overflow mask\n\t" 1.9748 + "ADD EAX,1023\t\t# Double exponent bias\n\t" 1.9749 + "MOV EBX,EAX\t\t# Preshifted biased expo\n\t" 1.9750 + "SHL EAX,20\t\t# Shift exponent into place\n\t" 1.9751 + "TEST EBX,ECX\t\t# Check for overflow\n\t" 1.9752 + "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t" 1.9753 + "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t" 1.9754 + "MOV [ESP+0],0\n\t" 1.9755 + "FMUL ST(0),[ESP+0]\t# Scale\n\t" 1.9756 + 1.9757 + "ADD ESP,8" 1.9758 + %} 1.9759 + ins_encode( push_stack_temp_qword, 1.9760 + Push_Reg_D(X), 1.9761 + Opcode(0xD9), Opcode(0xF1), // fyl2x 1.9762 + pow_exp_core_encoding, 1.9763 + pop_stack_temp_qword); 1.9764 + ins_pipe( pipe_slow ); 1.9765 +%} 1.9766 + 1.9767 +instruct powXD_reg(regXD dst, regXD src0, regXD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{ 1.9768 + predicate (UseSSE>=2); 1.9769 + match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power 1.9770 + effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx ); 1.9771 + format %{ "SUB ESP,8\t\t# Fast-path POW encoding\n\t" 1.9772 + "MOVSD [ESP],$src1\n\t" 1.9773 + "FLD FPR1,$src1\n\t" 1.9774 + "MOVSD [ESP],$src0\n\t" 1.9775 + "FLD FPR1,$src0\n\t" 1.9776 + "FYL2X \t\t\t# Q=Y*ln2(X)\n\t" 1.9777 + 1.9778 + "FDUP \t\t\t# Q Q\n\t" 1.9779 + "FRNDINT\t\t\t# int(Q) Q\n\t" 1.9780 + "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t" 1.9781 + "FISTP dword [ESP]\n\t" 1.9782 + "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t" 1.9783 + "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t" 1.9784 + "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead 1.9785 + "MOV EAX,[ESP]\t# Pick up int(Q)\n\t" 1.9786 + "MOV ECX,0xFFFFF800\t# Overflow mask\n\t" 1.9787 + "ADD EAX,1023\t\t# Double exponent bias\n\t" 1.9788 + "MOV EBX,EAX\t\t# Preshifted biased expo\n\t" 1.9789 + "SHL EAX,20\t\t# Shift exponent into place\n\t" 1.9790 + "TEST EBX,ECX\t\t# Check for overflow\n\t" 1.9791 + "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t" 1.9792 + "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t" 1.9793 + "MOV [ESP+0],0\n\t" 1.9794 + "FMUL ST(0),[ESP+0]\t# Scale\n\t" 1.9795 + 1.9796 + "FST_D [ESP]\n\t" 1.9797 + "MOVSD $dst,[ESP]\n\t" 1.9798 + "ADD ESP,8" 1.9799 + %} 1.9800 + ins_encode( push_stack_temp_qword, 1.9801 + push_xmm_to_fpr1(src1), 1.9802 + push_xmm_to_fpr1(src0), 1.9803 + Opcode(0xD9), Opcode(0xF1), // fyl2x 1.9804 + pow_exp_core_encoding, 1.9805 + Push_ResultXD(dst) ); 1.9806 + ins_pipe( pipe_slow ); 1.9807 +%} 1.9808 + 1.9809 + 1.9810 +instruct expD_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ 1.9811 + predicate (UseSSE<=1); 1.9812 + match(Set dpr1 (ExpD dpr1)); 1.9813 + effect(KILL rax, KILL rbx, KILL rcx); 1.9814 + format %{ "SUB ESP,8\t\t# Fast-path EXP encoding" 1.9815 + "FLDL2E \t\t\t# Ld log2(e) X\n\t" 1.9816 + "FMULP \t\t\t# Q=X*log2(e)\n\t" 1.9817 + 1.9818 + "FDUP \t\t\t# Q Q\n\t" 1.9819 + "FRNDINT\t\t\t# int(Q) Q\n\t" 1.9820 + "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t" 1.9821 + "FISTP dword [ESP]\n\t" 1.9822 + "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t" 1.9823 + "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t" 1.9824 + "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead 1.9825 + "MOV EAX,[ESP]\t# Pick up int(Q)\n\t" 1.9826 + "MOV ECX,0xFFFFF800\t# Overflow mask\n\t" 1.9827 + "ADD EAX,1023\t\t# Double exponent bias\n\t" 1.9828 + "MOV EBX,EAX\t\t# Preshifted biased expo\n\t" 1.9829 + "SHL EAX,20\t\t# Shift exponent into place\n\t" 1.9830 + "TEST EBX,ECX\t\t# Check for overflow\n\t" 1.9831 + "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t" 1.9832 + "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t" 1.9833 + "MOV [ESP+0],0\n\t" 1.9834 + "FMUL ST(0),[ESP+0]\t# Scale\n\t" 1.9835 + 1.9836 + "ADD ESP,8" 1.9837 + %} 1.9838 + ins_encode( push_stack_temp_qword, 1.9839 + Opcode(0xD9), Opcode(0xEA), // fldl2e 1.9840 + Opcode(0xDE), Opcode(0xC9), // fmulp 1.9841 + pow_exp_core_encoding, 1.9842 + pop_stack_temp_qword); 1.9843 + ins_pipe( pipe_slow ); 1.9844 +%} 1.9845 + 1.9846 +instruct expXD_reg(regXD dst, regXD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ 1.9847 + predicate (UseSSE>=2); 1.9848 + match(Set dst (ExpD src)); 1.9849 + effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx); 1.9850 + format %{ "SUB ESP,8\t\t# Fast-path EXP encoding\n\t" 1.9851 + "MOVSD [ESP],$src\n\t" 1.9852 + "FLDL2E \t\t\t# Ld log2(e) X\n\t" 1.9853 + "FMULP \t\t\t# Q=X*log2(e) X\n\t" 1.9854 + 1.9855 + "FDUP \t\t\t# Q Q\n\t" 1.9856 + "FRNDINT\t\t\t# int(Q) Q\n\t" 1.9857 + "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t" 1.9858 + "FISTP dword [ESP]\n\t" 1.9859 + "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t" 1.9860 + "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t" 1.9861 + "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead 1.9862 + "MOV EAX,[ESP]\t# Pick up int(Q)\n\t" 1.9863 + "MOV ECX,0xFFFFF800\t# Overflow mask\n\t" 1.9864 + "ADD EAX,1023\t\t# Double exponent bias\n\t" 1.9865 + "MOV EBX,EAX\t\t# Preshifted biased expo\n\t" 1.9866 + "SHL EAX,20\t\t# Shift exponent into place\n\t" 1.9867 + "TEST EBX,ECX\t\t# Check for overflow\n\t" 1.9868 + "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t" 1.9869 + "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t" 1.9870 + "MOV [ESP+0],0\n\t" 1.9871 + "FMUL ST(0),[ESP+0]\t# Scale\n\t" 1.9872 + 1.9873 + "FST_D [ESP]\n\t" 1.9874 + "MOVSD $dst,[ESP]\n\t" 1.9875 + "ADD ESP,8" 1.9876 + %} 1.9877 + ins_encode( Push_SrcXD(src), 1.9878 + Opcode(0xD9), Opcode(0xEA), // fldl2e 1.9879 + Opcode(0xDE), Opcode(0xC9), // fmulp 1.9880 + pow_exp_core_encoding, 1.9881 + Push_ResultXD(dst) ); 1.9882 + ins_pipe( pipe_slow ); 1.9883 +%} 1.9884 + 1.9885 + 1.9886 + 1.9887 +instruct log10D_reg(regDPR1 dst, regDPR1 src) %{ 1.9888 + predicate (UseSSE<=1); 1.9889 + // The source Double operand on FPU stack 1.9890 + match(Set dst (Log10D src)); 1.9891 + // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 1.9892 + // fxch ; swap ST(0) with ST(1) 1.9893 + // fyl2x ; compute log_10(2) * log_2(x) 1.9894 + format %{ "FLDLG2 \t\t\t#Log10\n\t" 1.9895 + "FXCH \n\t" 1.9896 + "FYL2X \t\t\t# Q=Log10*Log_2(x)" 1.9897 + %} 1.9898 + ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 1.9899 + Opcode(0xD9), Opcode(0xC9), // fxch 1.9900 + Opcode(0xD9), Opcode(0xF1)); // fyl2x 1.9901 + 1.9902 + ins_pipe( pipe_slow ); 1.9903 +%} 1.9904 + 1.9905 +instruct log10XD_reg(regXD dst, regXD src, eFlagsReg cr) %{ 1.9906 + predicate (UseSSE>=2); 1.9907 + effect(KILL cr); 1.9908 + match(Set dst (Log10D src)); 1.9909 + // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 1.9910 + // fyl2x ; compute log_10(2) * log_2(x) 1.9911 + format %{ "FLDLG2 \t\t\t#Log10\n\t" 1.9912 + "FYL2X \t\t\t# Q=Log10*Log_2(x)" 1.9913 + %} 1.9914 + ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 1.9915 + Push_SrcXD(src), 1.9916 + Opcode(0xD9), Opcode(0xF1), // fyl2x 1.9917 + Push_ResultXD(dst)); 1.9918 + 1.9919 + ins_pipe( pipe_slow ); 1.9920 +%} 1.9921 + 1.9922 +instruct logD_reg(regDPR1 dst, regDPR1 src) %{ 1.9923 + predicate (UseSSE<=1); 1.9924 + // The source Double operand on FPU stack 1.9925 + match(Set dst (LogD src)); 1.9926 + // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number 1.9927 + // fxch ; swap ST(0) with ST(1) 1.9928 + // fyl2x ; compute log_e(2) * log_2(x) 1.9929 + format %{ "FLDLN2 \t\t\t#Log_e\n\t" 1.9930 + "FXCH \n\t" 1.9931 + "FYL2X \t\t\t# Q=Log_e*Log_2(x)" 1.9932 + %} 1.9933 + ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2 1.9934 + Opcode(0xD9), Opcode(0xC9), // fxch 1.9935 + Opcode(0xD9), Opcode(0xF1)); // fyl2x 1.9936 + 1.9937 + ins_pipe( pipe_slow ); 1.9938 +%} 1.9939 + 1.9940 +instruct logXD_reg(regXD dst, regXD src, eFlagsReg cr) %{ 1.9941 + predicate (UseSSE>=2); 1.9942 + effect(KILL cr); 1.9943 + // The source and result Double operands in XMM registers 1.9944 + match(Set dst (LogD src)); 1.9945 + // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number 1.9946 + // fyl2x ; compute log_e(2) * log_2(x) 1.9947 + format %{ "FLDLN2 \t\t\t#Log_e\n\t" 1.9948 + "FYL2X \t\t\t# Q=Log_e*Log_2(x)" 1.9949 + %} 1.9950 + ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2 1.9951 + Push_SrcXD(src), 1.9952 + Opcode(0xD9), Opcode(0xF1), // fyl2x 1.9953 + Push_ResultXD(dst)); 1.9954 + ins_pipe( pipe_slow ); 1.9955 +%} 1.9956 + 1.9957 +//-------------Float Instructions------------------------------- 1.9958 +// Float Math 1.9959 + 1.9960 +// Code for float compare: 1.9961 +// fcompp(); 1.9962 +// fwait(); fnstsw_ax(); 1.9963 +// sahf(); 1.9964 +// movl(dst, unordered_result); 1.9965 +// jcc(Assembler::parity, exit); 1.9966 +// movl(dst, less_result); 1.9967 +// jcc(Assembler::below, exit); 1.9968 +// movl(dst, equal_result); 1.9969 +// jcc(Assembler::equal, exit); 1.9970 +// movl(dst, greater_result); 1.9971 +// exit: 1.9972 + 1.9973 +// P6 version of float compare, sets condition codes in EFLAGS 1.9974 +instruct cmpF_cc_P6(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{ 1.9975 + predicate(VM_Version::supports_cmov() && UseSSE == 0); 1.9976 + match(Set cr (CmpF src1 src2)); 1.9977 + effect(KILL rax); 1.9978 + ins_cost(150); 1.9979 + format %{ "FLD $src1\n\t" 1.9980 + "FUCOMIP ST,$src2 // P6 instruction\n\t" 1.9981 + "JNP exit\n\t" 1.9982 + "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 1.9983 + "SAHF\n" 1.9984 + "exit:\tNOP // avoid branch to branch" %} 1.9985 + opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 1.9986 + ins_encode( Push_Reg_D(src1), 1.9987 + OpcP, RegOpc(src2), 1.9988 + cmpF_P6_fixup ); 1.9989 + ins_pipe( pipe_slow ); 1.9990 +%} 1.9991 + 1.9992 + 1.9993 +// Compare & branch 1.9994 +instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{ 1.9995 + predicate(UseSSE == 0); 1.9996 + match(Set cr (CmpF src1 src2)); 1.9997 + effect(KILL rax); 1.9998 + ins_cost(200); 1.9999 + format %{ "FLD $src1\n\t" 1.10000 + "FCOMp $src2\n\t" 1.10001 + "FNSTSW AX\n\t" 1.10002 + "TEST AX,0x400\n\t" 1.10003 + "JZ,s flags\n\t" 1.10004 + "MOV AH,1\t# unordered treat as LT\n" 1.10005 + "flags:\tSAHF" %} 1.10006 + opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 1.10007 + ins_encode( Push_Reg_D(src1), 1.10008 + OpcP, RegOpc(src2), 1.10009 + fpu_flags); 1.10010 + ins_pipe( pipe_slow ); 1.10011 +%} 1.10012 + 1.10013 +// Compare vs zero into -1,0,1 1.10014 +instruct cmpF_0(eRegI dst, regF src1, immF0 zero, eAXRegI rax, eFlagsReg cr) %{ 1.10015 + predicate(UseSSE == 0); 1.10016 + match(Set dst (CmpF3 src1 zero)); 1.10017 + effect(KILL cr, KILL rax); 1.10018 + ins_cost(280); 1.10019 + format %{ "FTSTF $dst,$src1" %} 1.10020 + opcode(0xE4, 0xD9); 1.10021 + ins_encode( Push_Reg_D(src1), 1.10022 + OpcS, OpcP, PopFPU, 1.10023 + CmpF_Result(dst)); 1.10024 + ins_pipe( pipe_slow ); 1.10025 +%} 1.10026 + 1.10027 +// Compare into -1,0,1 1.10028 +instruct cmpF_reg(eRegI dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{ 1.10029 + predicate(UseSSE == 0); 1.10030 + match(Set dst (CmpF3 src1 src2)); 1.10031 + effect(KILL cr, KILL rax); 1.10032 + ins_cost(300); 1.10033 + format %{ "FCMPF $dst,$src1,$src2" %} 1.10034 + opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 1.10035 + ins_encode( Push_Reg_D(src1), 1.10036 + OpcP, RegOpc(src2), 1.10037 + CmpF_Result(dst)); 1.10038 + ins_pipe( pipe_slow ); 1.10039 +%} 1.10040 + 1.10041 +// float compare and set condition codes in EFLAGS by XMM regs 1.10042 +instruct cmpX_cc(eFlagsRegU cr, regX dst, regX src, eAXRegI rax) %{ 1.10043 + predicate(UseSSE>=1); 1.10044 + match(Set cr (CmpF dst src)); 1.10045 + effect(KILL rax); 1.10046 + ins_cost(145); 1.10047 + format %{ "COMISS $dst,$src\n" 1.10048 + "\tJNP exit\n" 1.10049 + "\tMOV ah,1 // saw a NaN, set CF\n" 1.10050 + "\tSAHF\n" 1.10051 + "exit:\tNOP // avoid branch to branch" %} 1.10052 + opcode(0x0F, 0x2F); 1.10053 + ins_encode(OpcP, OpcS, RegReg(dst, src), cmpF_P6_fixup); 1.10054 + ins_pipe( pipe_slow ); 1.10055 +%} 1.10056 + 1.10057 +// float compare and set condition codes in EFLAGS by XMM regs 1.10058 +instruct cmpX_ccmem(eFlagsRegU cr, regX dst, memory src, eAXRegI rax) %{ 1.10059 + predicate(UseSSE>=1); 1.10060 + match(Set cr (CmpF dst (LoadF src))); 1.10061 + effect(KILL rax); 1.10062 + ins_cost(165); 1.10063 + format %{ "COMISS $dst,$src\n" 1.10064 + "\tJNP exit\n" 1.10065 + "\tMOV ah,1 // saw a NaN, set CF\n" 1.10066 + "\tSAHF\n" 1.10067 + "exit:\tNOP // avoid branch to branch" %} 1.10068 + opcode(0x0F, 0x2F); 1.10069 + ins_encode(OpcP, OpcS, RegMem(dst, src), cmpF_P6_fixup); 1.10070 + ins_pipe( pipe_slow ); 1.10071 +%} 1.10072 + 1.10073 +// Compare into -1,0,1 in XMM 1.10074 +instruct cmpX_reg(eRegI dst, regX src1, regX src2, eFlagsReg cr) %{ 1.10075 + predicate(UseSSE>=1); 1.10076 + match(Set dst (CmpF3 src1 src2)); 1.10077 + effect(KILL cr); 1.10078 + ins_cost(255); 1.10079 + format %{ "XOR $dst,$dst\n" 1.10080 + "\tCOMISS $src1,$src2\n" 1.10081 + "\tJP,s nan\n" 1.10082 + "\tJEQ,s exit\n" 1.10083 + "\tJA,s inc\n" 1.10084 + "nan:\tDEC $dst\n" 1.10085 + "\tJMP,s exit\n" 1.10086 + "inc:\tINC $dst\n" 1.10087 + "exit:" 1.10088 + %} 1.10089 + opcode(0x0F, 0x2F); 1.10090 + ins_encode(Xor_Reg(dst), OpcP, OpcS, RegReg(src1, src2), CmpX_Result(dst)); 1.10091 + ins_pipe( pipe_slow ); 1.10092 +%} 1.10093 + 1.10094 +// Compare into -1,0,1 in XMM and memory 1.10095 +instruct cmpX_regmem(eRegI dst, regX src1, memory mem, eFlagsReg cr) %{ 1.10096 + predicate(UseSSE>=1); 1.10097 + match(Set dst (CmpF3 src1 (LoadF mem))); 1.10098 + effect(KILL cr); 1.10099 + ins_cost(275); 1.10100 + format %{ "COMISS $src1,$mem\n" 1.10101 + "\tMOV $dst,0\t\t# do not blow flags\n" 1.10102 + "\tJP,s nan\n" 1.10103 + "\tJEQ,s exit\n" 1.10104 + "\tJA,s inc\n" 1.10105 + "nan:\tDEC $dst\n" 1.10106 + "\tJMP,s exit\n" 1.10107 + "inc:\tINC $dst\n" 1.10108 + "exit:" 1.10109 + %} 1.10110 + opcode(0x0F, 0x2F); 1.10111 + ins_encode(OpcP, OpcS, RegMem(src1, mem), LdImmI(dst,0x0), CmpX_Result(dst)); 1.10112 + ins_pipe( pipe_slow ); 1.10113 +%} 1.10114 + 1.10115 +// Spill to obtain 24-bit precision 1.10116 +instruct subF24_reg(stackSlotF dst, regF src1, regF src2) %{ 1.10117 + predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 1.10118 + match(Set dst (SubF src1 src2)); 1.10119 + 1.10120 + format %{ "FSUB $dst,$src1 - $src2" %} 1.10121 + opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 1.10122 + ins_encode( Push_Reg_F(src1), 1.10123 + OpcReg_F(src2), 1.10124 + Pop_Mem_F(dst) ); 1.10125 + ins_pipe( fpu_mem_reg_reg ); 1.10126 +%} 1.10127 +// 1.10128 +// This instruction does not round to 24-bits 1.10129 +instruct subF_reg(regF dst, regF src) %{ 1.10130 + predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 1.10131 + match(Set dst (SubF dst src)); 1.10132 + 1.10133 + format %{ "FSUB $dst,$src" %} 1.10134 + opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 1.10135 + ins_encode( Push_Reg_F(src), 1.10136 + OpcP, RegOpc(dst) ); 1.10137 + ins_pipe( fpu_reg_reg ); 1.10138 +%} 1.10139 + 1.10140 +// Spill to obtain 24-bit precision 1.10141 +instruct addF24_reg(stackSlotF dst, regF src1, regF src2) %{ 1.10142 + predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 1.10143 + match(Set dst (AddF src1 src2)); 1.10144 + 1.10145 + format %{ "FADD $dst,$src1,$src2" %} 1.10146 + opcode(0xD8, 0x0); /* D8 C0+i */ 1.10147 + ins_encode( Push_Reg_F(src2), 1.10148 + OpcReg_F(src1), 1.10149 + Pop_Mem_F(dst) ); 1.10150 + ins_pipe( fpu_mem_reg_reg ); 1.10151 +%} 1.10152 +// 1.10153 +// This instruction does not round to 24-bits 1.10154 +instruct addF_reg(regF dst, regF src) %{ 1.10155 + predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 1.10156 + match(Set dst (AddF dst src)); 1.10157 + 1.10158 + format %{ "FLD $src\n\t" 1.10159 + "FADDp $dst,ST" %} 1.10160 + opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 1.10161 + ins_encode( Push_Reg_F(src), 1.10162 + OpcP, RegOpc(dst) ); 1.10163 + ins_pipe( fpu_reg_reg ); 1.10164 +%} 1.10165 + 1.10166 +// Add two single precision floating point values in xmm 1.10167 +instruct addX_reg(regX dst, regX src) %{ 1.10168 + predicate(UseSSE>=1); 1.10169 + match(Set dst (AddF dst src)); 1.10170 + format %{ "ADDSS $dst,$src" %} 1.10171 + ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegReg(dst, src)); 1.10172 + ins_pipe( pipe_slow ); 1.10173 +%} 1.10174 + 1.10175 +instruct addX_imm(regX dst, immXF con) %{ 1.10176 + predicate(UseSSE>=1); 1.10177 + match(Set dst (AddF dst con)); 1.10178 + format %{ "ADDSS $dst,[$con]" %} 1.10179 + ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), LdImmX(dst, con) ); 1.10180 + ins_pipe( pipe_slow ); 1.10181 +%} 1.10182 + 1.10183 +instruct addX_mem(regX dst, memory mem) %{ 1.10184 + predicate(UseSSE>=1); 1.10185 + match(Set dst (AddF dst (LoadF mem))); 1.10186 + format %{ "ADDSS $dst,$mem" %} 1.10187 + ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegMem(dst, mem)); 1.10188 + ins_pipe( pipe_slow ); 1.10189 +%} 1.10190 + 1.10191 +// Subtract two single precision floating point values in xmm 1.10192 +instruct subX_reg(regX dst, regX src) %{ 1.10193 + predicate(UseSSE>=1); 1.10194 + match(Set dst (SubF dst src)); 1.10195 + format %{ "SUBSS $dst,$src" %} 1.10196 + ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src)); 1.10197 + ins_pipe( pipe_slow ); 1.10198 +%} 1.10199 + 1.10200 +instruct subX_imm(regX dst, immXF con) %{ 1.10201 + predicate(UseSSE>=1); 1.10202 + match(Set dst (SubF dst con)); 1.10203 + format %{ "SUBSS $dst,[$con]" %} 1.10204 + ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), LdImmX(dst, con) ); 1.10205 + ins_pipe( pipe_slow ); 1.10206 +%} 1.10207 + 1.10208 +instruct subX_mem(regX dst, memory mem) %{ 1.10209 + predicate(UseSSE>=1); 1.10210 + match(Set dst (SubF dst (LoadF mem))); 1.10211 + format %{ "SUBSS $dst,$mem" %} 1.10212 + ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem)); 1.10213 + ins_pipe( pipe_slow ); 1.10214 +%} 1.10215 + 1.10216 +// Multiply two single precision floating point values in xmm 1.10217 +instruct mulX_reg(regX dst, regX src) %{ 1.10218 + predicate(UseSSE>=1); 1.10219 + match(Set dst (MulF dst src)); 1.10220 + format %{ "MULSS $dst,$src" %} 1.10221 + ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegReg(dst, src)); 1.10222 + ins_pipe( pipe_slow ); 1.10223 +%} 1.10224 + 1.10225 +instruct mulX_imm(regX dst, immXF con) %{ 1.10226 + predicate(UseSSE>=1); 1.10227 + match(Set dst (MulF dst con)); 1.10228 + format %{ "MULSS $dst,[$con]" %} 1.10229 + ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), LdImmX(dst, con) ); 1.10230 + ins_pipe( pipe_slow ); 1.10231 +%} 1.10232 + 1.10233 +instruct mulX_mem(regX dst, memory mem) %{ 1.10234 + predicate(UseSSE>=1); 1.10235 + match(Set dst (MulF dst (LoadF mem))); 1.10236 + format %{ "MULSS $dst,$mem" %} 1.10237 + ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem)); 1.10238 + ins_pipe( pipe_slow ); 1.10239 +%} 1.10240 + 1.10241 +// Divide two single precision floating point values in xmm 1.10242 +instruct divX_reg(regX dst, regX src) %{ 1.10243 + predicate(UseSSE>=1); 1.10244 + match(Set dst (DivF dst src)); 1.10245 + format %{ "DIVSS $dst,$src" %} 1.10246 + ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src)); 1.10247 + ins_pipe( pipe_slow ); 1.10248 +%} 1.10249 + 1.10250 +instruct divX_imm(regX dst, immXF con) %{ 1.10251 + predicate(UseSSE>=1); 1.10252 + match(Set dst (DivF dst con)); 1.10253 + format %{ "DIVSS $dst,[$con]" %} 1.10254 + ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), LdImmX(dst, con) ); 1.10255 + ins_pipe( pipe_slow ); 1.10256 +%} 1.10257 + 1.10258 +instruct divX_mem(regX dst, memory mem) %{ 1.10259 + predicate(UseSSE>=1); 1.10260 + match(Set dst (DivF dst (LoadF mem))); 1.10261 + format %{ "DIVSS $dst,$mem" %} 1.10262 + ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem)); 1.10263 + ins_pipe( pipe_slow ); 1.10264 +%} 1.10265 + 1.10266 +// Get the square root of a single precision floating point values in xmm 1.10267 +instruct sqrtX_reg(regX dst, regX src) %{ 1.10268 + predicate(UseSSE>=1); 1.10269 + match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 1.10270 + format %{ "SQRTSS $dst,$src" %} 1.10271 + ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegReg(dst, src)); 1.10272 + ins_pipe( pipe_slow ); 1.10273 +%} 1.10274 + 1.10275 +instruct sqrtX_mem(regX dst, memory mem) %{ 1.10276 + predicate(UseSSE>=1); 1.10277 + match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF mem))))); 1.10278 + format %{ "SQRTSS $dst,$mem" %} 1.10279 + ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem)); 1.10280 + ins_pipe( pipe_slow ); 1.10281 +%} 1.10282 + 1.10283 +// Get the square root of a double precision floating point values in xmm 1.10284 +instruct sqrtXD_reg(regXD dst, regXD src) %{ 1.10285 + predicate(UseSSE>=2); 1.10286 + match(Set dst (SqrtD src)); 1.10287 + format %{ "SQRTSD $dst,$src" %} 1.10288 + ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegReg(dst, src)); 1.10289 + ins_pipe( pipe_slow ); 1.10290 +%} 1.10291 + 1.10292 +instruct sqrtXD_mem(regXD dst, memory mem) %{ 1.10293 + predicate(UseSSE>=2); 1.10294 + match(Set dst (SqrtD (LoadD mem))); 1.10295 + format %{ "SQRTSD $dst,$mem" %} 1.10296 + ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem)); 1.10297 + ins_pipe( pipe_slow ); 1.10298 +%} 1.10299 + 1.10300 +instruct absF_reg(regFPR1 dst, regFPR1 src) %{ 1.10301 + predicate(UseSSE==0); 1.10302 + match(Set dst (AbsF src)); 1.10303 + ins_cost(100); 1.10304 + format %{ "FABS" %} 1.10305 + opcode(0xE1, 0xD9); 1.10306 + ins_encode( OpcS, OpcP ); 1.10307 + ins_pipe( fpu_reg_reg ); 1.10308 +%} 1.10309 + 1.10310 +instruct absX_reg(regX dst ) %{ 1.10311 + predicate(UseSSE>=1); 1.10312 + match(Set dst (AbsF dst)); 1.10313 + format %{ "ANDPS $dst,[0x7FFFFFFF]\t# ABS F by sign masking" %} 1.10314 + ins_encode( AbsXF_encoding(dst)); 1.10315 + ins_pipe( pipe_slow ); 1.10316 +%} 1.10317 + 1.10318 +instruct negF_reg(regFPR1 dst, regFPR1 src) %{ 1.10319 + predicate(UseSSE==0); 1.10320 + match(Set dst (NegF src)); 1.10321 + ins_cost(100); 1.10322 + format %{ "FCHS" %} 1.10323 + opcode(0xE0, 0xD9); 1.10324 + ins_encode( OpcS, OpcP ); 1.10325 + ins_pipe( fpu_reg_reg ); 1.10326 +%} 1.10327 + 1.10328 +instruct negX_reg( regX dst ) %{ 1.10329 + predicate(UseSSE>=1); 1.10330 + match(Set dst (NegF dst)); 1.10331 + format %{ "XORPS $dst,[0x80000000]\t# CHS F by sign flipping" %} 1.10332 + ins_encode( NegXF_encoding(dst)); 1.10333 + ins_pipe( pipe_slow ); 1.10334 +%} 1.10335 + 1.10336 +// Cisc-alternate to addF_reg 1.10337 +// Spill to obtain 24-bit precision 1.10338 +instruct addF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{ 1.10339 + predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 1.10340 + match(Set dst (AddF src1 (LoadF src2))); 1.10341 + 1.10342 + format %{ "FLD $src2\n\t" 1.10343 + "FADD ST,$src1\n\t" 1.10344 + "FSTP_S $dst" %} 1.10345 + opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 1.10346 + ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 1.10347 + OpcReg_F(src1), 1.10348 + Pop_Mem_F(dst) ); 1.10349 + ins_pipe( fpu_mem_reg_mem ); 1.10350 +%} 1.10351 +// 1.10352 +// Cisc-alternate to addF_reg 1.10353 +// This instruction does not round to 24-bits 1.10354 +instruct addF_reg_mem(regF dst, memory src) %{ 1.10355 + predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 1.10356 + match(Set dst (AddF dst (LoadF src))); 1.10357 + 1.10358 + format %{ "FADD $dst,$src" %} 1.10359 + opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 1.10360 + ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 1.10361 + OpcP, RegOpc(dst) ); 1.10362 + ins_pipe( fpu_reg_mem ); 1.10363 +%} 1.10364 + 1.10365 +// // Following two instructions for _222_mpegaudio 1.10366 +// Spill to obtain 24-bit precision 1.10367 +instruct addF24_mem_reg(stackSlotF dst, regF src2, memory src1 ) %{ 1.10368 + predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 1.10369 + match(Set dst (AddF src1 src2)); 1.10370 + 1.10371 + format %{ "FADD $dst,$src1,$src2" %} 1.10372 + opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 1.10373 + ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 1.10374 + OpcReg_F(src2), 1.10375 + Pop_Mem_F(dst) ); 1.10376 + ins_pipe( fpu_mem_reg_mem ); 1.10377 +%} 1.10378 + 1.10379 +// Cisc-spill variant 1.10380 +// Spill to obtain 24-bit precision 1.10381 +instruct addF24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 1.10382 + predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 1.10383 + match(Set dst (AddF src1 (LoadF src2))); 1.10384 + 1.10385 + format %{ "FADD $dst,$src1,$src2 cisc" %} 1.10386 + opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 1.10387 + ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 1.10388 + set_instruction_start, 1.10389 + OpcP, RMopc_Mem(secondary,src1), 1.10390 + Pop_Mem_F(dst) ); 1.10391 + ins_pipe( fpu_mem_mem_mem ); 1.10392 +%} 1.10393 + 1.10394 +// Spill to obtain 24-bit precision 1.10395 +instruct addF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 1.10396 + predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 1.10397 + match(Set dst (AddF src1 src2)); 1.10398 + 1.10399 + format %{ "FADD $dst,$src1,$src2" %} 1.10400 + opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 1.10401 + ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 1.10402 + set_instruction_start, 1.10403 + OpcP, RMopc_Mem(secondary,src1), 1.10404 + Pop_Mem_F(dst) ); 1.10405 + ins_pipe( fpu_mem_mem_mem ); 1.10406 +%} 1.10407 + 1.10408 + 1.10409 +// Spill to obtain 24-bit precision 1.10410 +instruct addF24_reg_imm(stackSlotF dst, regF src1, immF src2) %{ 1.10411 + predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 1.10412 + match(Set dst (AddF src1 src2)); 1.10413 + format %{ "FLD $src1\n\t" 1.10414 + "FADD $src2\n\t" 1.10415 + "FSTP_S $dst" %} 1.10416 + opcode(0xD8, 0x00); /* D8 /0 */ 1.10417 + ins_encode( Push_Reg_F(src1), 1.10418 + Opc_MemImm_F(src2), 1.10419 + Pop_Mem_F(dst)); 1.10420 + ins_pipe( fpu_mem_reg_con ); 1.10421 +%} 1.10422 +// 1.10423 +// This instruction does not round to 24-bits 1.10424 +instruct addF_reg_imm(regF dst, regF src1, immF src2) %{ 1.10425 + predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 1.10426 + match(Set dst (AddF src1 src2)); 1.10427 + format %{ "FLD $src1\n\t" 1.10428 + "FADD $src2\n\t" 1.10429 + "FSTP_S $dst" %} 1.10430 + opcode(0xD8, 0x00); /* D8 /0 */ 1.10431 + ins_encode( Push_Reg_F(src1), 1.10432 + Opc_MemImm_F(src2), 1.10433 + Pop_Reg_F(dst)); 1.10434 + ins_pipe( fpu_reg_reg_con ); 1.10435 +%} 1.10436 + 1.10437 +// Spill to obtain 24-bit precision 1.10438 +instruct mulF24_reg(stackSlotF dst, regF src1, regF src2) %{ 1.10439 + predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 1.10440 + match(Set dst (MulF src1 src2)); 1.10441 + 1.10442 + format %{ "FLD $src1\n\t" 1.10443 + "FMUL $src2\n\t" 1.10444 + "FSTP_S $dst" %} 1.10445 + opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 1.10446 + ins_encode( Push_Reg_F(src1), 1.10447 + OpcReg_F(src2), 1.10448 + Pop_Mem_F(dst) ); 1.10449 + ins_pipe( fpu_mem_reg_reg ); 1.10450 +%} 1.10451 +// 1.10452 +// This instruction does not round to 24-bits 1.10453 +instruct mulF_reg(regF dst, regF src1, regF src2) %{ 1.10454 + predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 1.10455 + match(Set dst (MulF src1 src2)); 1.10456 + 1.10457 + format %{ "FLD $src1\n\t" 1.10458 + "FMUL $src2\n\t" 1.10459 + "FSTP_S $dst" %} 1.10460 + opcode(0xD8, 0x1); /* D8 C8+i */ 1.10461 + ins_encode( Push_Reg_F(src2), 1.10462 + OpcReg_F(src1), 1.10463 + Pop_Reg_F(dst) ); 1.10464 + ins_pipe( fpu_reg_reg_reg ); 1.10465 +%} 1.10466 + 1.10467 + 1.10468 +// Spill to obtain 24-bit precision 1.10469 +// Cisc-alternate to reg-reg multiply 1.10470 +instruct mulF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{ 1.10471 + predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 1.10472 + match(Set dst (MulF src1 (LoadF src2))); 1.10473 + 1.10474 + format %{ "FLD_S $src2\n\t" 1.10475 + "FMUL $src1\n\t" 1.10476 + "FSTP_S $dst" %} 1.10477 + opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 1.10478 + ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 1.10479 + OpcReg_F(src1), 1.10480 + Pop_Mem_F(dst) ); 1.10481 + ins_pipe( fpu_mem_reg_mem ); 1.10482 +%} 1.10483 +// 1.10484 +// This instruction does not round to 24-bits 1.10485 +// Cisc-alternate to reg-reg multiply 1.10486 +instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 1.10487 + predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 1.10488 + match(Set dst (MulF src1 (LoadF src2))); 1.10489 + 1.10490 + format %{ "FMUL $dst,$src1,$src2" %} 1.10491 + opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 1.10492 + ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 1.10493 + OpcReg_F(src1), 1.10494 + Pop_Reg_F(dst) ); 1.10495 + ins_pipe( fpu_reg_reg_mem ); 1.10496 +%} 1.10497 + 1.10498 +// Spill to obtain 24-bit precision 1.10499 +instruct mulF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 1.10500 + predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 1.10501 + match(Set dst (MulF src1 src2)); 1.10502 + 1.10503 + format %{ "FMUL $dst,$src1,$src2" %} 1.10504 + opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 1.10505 + ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 1.10506 + set_instruction_start, 1.10507 + OpcP, RMopc_Mem(secondary,src1), 1.10508 + Pop_Mem_F(dst) ); 1.10509 + ins_pipe( fpu_mem_mem_mem ); 1.10510 +%} 1.10511 + 1.10512 +// Spill to obtain 24-bit precision 1.10513 +instruct mulF24_reg_imm(stackSlotF dst, regF src1, immF src2) %{ 1.10514 + predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 1.10515 + match(Set dst (MulF src1 src2)); 1.10516 + 1.10517 + format %{ "FMULc $dst,$src1,$src2" %} 1.10518 + opcode(0xD8, 0x1); /* D8 /1*/ 1.10519 + ins_encode( Push_Reg_F(src1), 1.10520 + Opc_MemImm_F(src2), 1.10521 + Pop_Mem_F(dst)); 1.10522 + ins_pipe( fpu_mem_reg_con ); 1.10523 +%} 1.10524 +// 1.10525 +// This instruction does not round to 24-bits 1.10526 +instruct mulF_reg_imm(regF dst, regF src1, immF src2) %{ 1.10527 + predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 1.10528 + match(Set dst (MulF src1 src2)); 1.10529 + 1.10530 + format %{ "FMULc $dst. $src1, $src2" %} 1.10531 + opcode(0xD8, 0x1); /* D8 /1*/ 1.10532 + ins_encode( Push_Reg_F(src1), 1.10533 + Opc_MemImm_F(src2), 1.10534 + Pop_Reg_F(dst)); 1.10535 + ins_pipe( fpu_reg_reg_con ); 1.10536 +%} 1.10537 + 1.10538 + 1.10539 +// 1.10540 +// MACRO1 -- subsume unshared load into mulF 1.10541 +// This instruction does not round to 24-bits 1.10542 +instruct mulF_reg_load1(regF dst, regF src, memory mem1 ) %{ 1.10543 + predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 1.10544 + match(Set dst (MulF (LoadF mem1) src)); 1.10545 + 1.10546 + format %{ "FLD $mem1 ===MACRO1===\n\t" 1.10547 + "FMUL ST,$src\n\t" 1.10548 + "FSTP $dst" %} 1.10549 + opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 1.10550 + ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 1.10551 + OpcReg_F(src), 1.10552 + Pop_Reg_F(dst) ); 1.10553 + ins_pipe( fpu_reg_reg_mem ); 1.10554 +%} 1.10555 +// 1.10556 +// MACRO2 -- addF a mulF which subsumed an unshared load 1.10557 +// This instruction does not round to 24-bits 1.10558 +instruct addF_mulF_reg_load1(regF dst, memory mem1, regF src1, regF src2) %{ 1.10559 + predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 1.10560 + match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 1.10561 + ins_cost(95); 1.10562 + 1.10563 + format %{ "FLD $mem1 ===MACRO2===\n\t" 1.10564 + "FMUL ST,$src1 subsume mulF left load\n\t" 1.10565 + "FADD ST,$src2\n\t" 1.10566 + "FSTP $dst" %} 1.10567 + opcode(0xD9); /* LoadF D9 /0 */ 1.10568 + ins_encode( OpcP, RMopc_Mem(0x00,mem1), 1.10569 + FMul_ST_reg(src1), 1.10570 + FAdd_ST_reg(src2), 1.10571 + Pop_Reg_F(dst) ); 1.10572 + ins_pipe( fpu_reg_mem_reg_reg ); 1.10573 +%} 1.10574 + 1.10575 +// MACRO3 -- addF a mulF 1.10576 +// This instruction does not round to 24-bits. It is a '2-address' 1.10577 +// instruction in that the result goes back to src2. This eliminates 1.10578 +// a move from the macro; possibly the register allocator will have 1.10579 +// to add it back (and maybe not). 1.10580 +instruct addF_mulF_reg(regF src2, regF src1, regF src0) %{ 1.10581 + predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 1.10582 + match(Set src2 (AddF (MulF src0 src1) src2)); 1.10583 + 1.10584 + format %{ "FLD $src0 ===MACRO3===\n\t" 1.10585 + "FMUL ST,$src1\n\t" 1.10586 + "FADDP $src2,ST" %} 1.10587 + opcode(0xD9); /* LoadF D9 /0 */ 1.10588 + ins_encode( Push_Reg_F(src0), 1.10589 + FMul_ST_reg(src1), 1.10590 + FAddP_reg_ST(src2) ); 1.10591 + ins_pipe( fpu_reg_reg_reg ); 1.10592 +%} 1.10593 + 1.10594 +// MACRO4 -- divF subF 1.10595 +// This instruction does not round to 24-bits 1.10596 +instruct subF_divF_reg(regF dst, regF src1, regF src2, regF src3) %{ 1.10597 + predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 1.10598 + match(Set dst (DivF (SubF src2 src1) src3)); 1.10599 + 1.10600 + format %{ "FLD $src2 ===MACRO4===\n\t" 1.10601 + "FSUB ST,$src1\n\t" 1.10602 + "FDIV ST,$src3\n\t" 1.10603 + "FSTP $dst" %} 1.10604 + opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 1.10605 + ins_encode( Push_Reg_F(src2), 1.10606 + subF_divF_encode(src1,src3), 1.10607 + Pop_Reg_F(dst) ); 1.10608 + ins_pipe( fpu_reg_reg_reg_reg ); 1.10609 +%} 1.10610 + 1.10611 +// Spill to obtain 24-bit precision 1.10612 +instruct divF24_reg(stackSlotF dst, regF src1, regF src2) %{ 1.10613 + predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 1.10614 + match(Set dst (DivF src1 src2)); 1.10615 + 1.10616 + format %{ "FDIV $dst,$src1,$src2" %} 1.10617 + opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 1.10618 + ins_encode( Push_Reg_F(src1), 1.10619 + OpcReg_F(src2), 1.10620 + Pop_Mem_F(dst) ); 1.10621 + ins_pipe( fpu_mem_reg_reg ); 1.10622 +%} 1.10623 +// 1.10624 +// This instruction does not round to 24-bits 1.10625 +instruct divF_reg(regF dst, regF src) %{ 1.10626 + predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 1.10627 + match(Set dst (DivF dst src)); 1.10628 + 1.10629 + format %{ "FDIV $dst,$src" %} 1.10630 + opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 1.10631 + ins_encode( Push_Reg_F(src), 1.10632 + OpcP, RegOpc(dst) ); 1.10633 + ins_pipe( fpu_reg_reg ); 1.10634 +%} 1.10635 + 1.10636 + 1.10637 +// Spill to obtain 24-bit precision 1.10638 +instruct modF24_reg(stackSlotF dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{ 1.10639 + predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 1.10640 + match(Set dst (ModF src1 src2)); 1.10641 + effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS 1.10642 + 1.10643 + format %{ "FMOD $dst,$src1,$src2" %} 1.10644 + ins_encode( Push_Reg_Mod_D(src1, src2), 1.10645 + emitModD(), 1.10646 + Push_Result_Mod_D(src2), 1.10647 + Pop_Mem_F(dst)); 1.10648 + ins_pipe( pipe_slow ); 1.10649 +%} 1.10650 +// 1.10651 +// This instruction does not round to 24-bits 1.10652 +instruct modF_reg(regF dst, regF src, eAXRegI rax, eFlagsReg cr) %{ 1.10653 + predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 1.10654 + match(Set dst (ModF dst src)); 1.10655 + effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS 1.10656 + 1.10657 + format %{ "FMOD $dst,$src" %} 1.10658 + ins_encode(Push_Reg_Mod_D(dst, src), 1.10659 + emitModD(), 1.10660 + Push_Result_Mod_D(src), 1.10661 + Pop_Reg_F(dst)); 1.10662 + ins_pipe( pipe_slow ); 1.10663 +%} 1.10664 + 1.10665 +instruct modX_reg(regX dst, regX src0, regX src1, eAXRegI rax, eFlagsReg cr) %{ 1.10666 + predicate(UseSSE>=1); 1.10667 + match(Set dst (ModF src0 src1)); 1.10668 + effect(KILL rax, KILL cr); 1.10669 + format %{ "SUB ESP,4\t # FMOD\n" 1.10670 + "\tMOVSS [ESP+0],$src1\n" 1.10671 + "\tFLD_S [ESP+0]\n" 1.10672 + "\tMOVSS [ESP+0],$src0\n" 1.10673 + "\tFLD_S [ESP+0]\n" 1.10674 + "loop:\tFPREM\n" 1.10675 + "\tFWAIT\n" 1.10676 + "\tFNSTSW AX\n" 1.10677 + "\tSAHF\n" 1.10678 + "\tJP loop\n" 1.10679 + "\tFSTP_S [ESP+0]\n" 1.10680 + "\tMOVSS $dst,[ESP+0]\n" 1.10681 + "\tADD ESP,4\n" 1.10682 + "\tFSTP ST0\t # Restore FPU Stack" 1.10683 + %} 1.10684 + ins_cost(250); 1.10685 + ins_encode( Push_ModX_encoding(src0, src1), emitModD(), Push_ResultX(dst,0x4), PopFPU); 1.10686 + ins_pipe( pipe_slow ); 1.10687 +%} 1.10688 + 1.10689 + 1.10690 +//----------Arithmetic Conversion Instructions--------------------------------- 1.10691 +// The conversions operations are all Alpha sorted. Please keep it that way! 1.10692 + 1.10693 +instruct roundFloat_mem_reg(stackSlotF dst, regF src) %{ 1.10694 + predicate(UseSSE==0); 1.10695 + match(Set dst (RoundFloat src)); 1.10696 + ins_cost(125); 1.10697 + format %{ "FST_S $dst,$src\t# F-round" %} 1.10698 + ins_encode( Pop_Mem_Reg_F(dst, src) ); 1.10699 + ins_pipe( fpu_mem_reg ); 1.10700 +%} 1.10701 + 1.10702 +instruct roundDouble_mem_reg(stackSlotD dst, regD src) %{ 1.10703 + predicate(UseSSE<=1); 1.10704 + match(Set dst (RoundDouble src)); 1.10705 + ins_cost(125); 1.10706 + format %{ "FST_D $dst,$src\t# D-round" %} 1.10707 + ins_encode( Pop_Mem_Reg_D(dst, src) ); 1.10708 + ins_pipe( fpu_mem_reg ); 1.10709 +%} 1.10710 + 1.10711 +// Force rounding to 24-bit precision and 6-bit exponent 1.10712 +instruct convD2F_reg(stackSlotF dst, regD src) %{ 1.10713 + predicate(UseSSE==0); 1.10714 + match(Set dst (ConvD2F src)); 1.10715 + format %{ "FST_S $dst,$src\t# F-round" %} 1.10716 + expand %{ 1.10717 + roundFloat_mem_reg(dst,src); 1.10718 + %} 1.10719 +%} 1.10720 + 1.10721 +// Force rounding to 24-bit precision and 6-bit exponent 1.10722 +instruct convD2X_reg(regX dst, regD src, eFlagsReg cr) %{ 1.10723 + predicate(UseSSE==1); 1.10724 + match(Set dst (ConvD2F src)); 1.10725 + effect( KILL cr ); 1.10726 + format %{ "SUB ESP,4\n\t" 1.10727 + "FST_S [ESP],$src\t# F-round\n\t" 1.10728 + "MOVSS $dst,[ESP]\n\t" 1.10729 + "ADD ESP,4" %} 1.10730 + ins_encode( D2X_encoding(dst, src) ); 1.10731 + ins_pipe( pipe_slow ); 1.10732 +%} 1.10733 + 1.10734 +// Force rounding double precision to single precision 1.10735 +instruct convXD2X_reg(regX dst, regXD src) %{ 1.10736 + predicate(UseSSE>=2); 1.10737 + match(Set dst (ConvD2F src)); 1.10738 + format %{ "CVTSD2SS $dst,$src\t# F-round" %} 1.10739 + opcode(0xF2, 0x0F, 0x5A); 1.10740 + ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); 1.10741 + ins_pipe( pipe_slow ); 1.10742 +%} 1.10743 + 1.10744 +instruct convF2D_reg_reg(regD dst, regF src) %{ 1.10745 + predicate(UseSSE==0); 1.10746 + match(Set dst (ConvF2D src)); 1.10747 + format %{ "FST_S $dst,$src\t# D-round" %} 1.10748 + ins_encode( Pop_Reg_Reg_D(dst, src)); 1.10749 + ins_pipe( fpu_reg_reg ); 1.10750 +%} 1.10751 + 1.10752 +instruct convF2D_reg(stackSlotD dst, regF src) %{ 1.10753 + predicate(UseSSE==1); 1.10754 + match(Set dst (ConvF2D src)); 1.10755 + format %{ "FST_D $dst,$src\t# D-round" %} 1.10756 + expand %{ 1.10757 + roundDouble_mem_reg(dst,src); 1.10758 + %} 1.10759 +%} 1.10760 + 1.10761 +instruct convX2D_reg(regD dst, regX src, eFlagsReg cr) %{ 1.10762 + predicate(UseSSE==1); 1.10763 + match(Set dst (ConvF2D src)); 1.10764 + effect( KILL cr ); 1.10765 + format %{ "SUB ESP,4\n\t" 1.10766 + "MOVSS [ESP] $src\n\t" 1.10767 + "FLD_S [ESP]\n\t" 1.10768 + "ADD ESP,4\n\t" 1.10769 + "FSTP $dst\t# D-round" %} 1.10770 + ins_encode( X2D_encoding(dst, src), Pop_Reg_D(dst)); 1.10771 + ins_pipe( pipe_slow ); 1.10772 +%} 1.10773 + 1.10774 +instruct convX2XD_reg(regXD dst, regX src) %{ 1.10775 + predicate(UseSSE>=2); 1.10776 + match(Set dst (ConvF2D src)); 1.10777 + format %{ "CVTSS2SD $dst,$src\t# D-round" %} 1.10778 + opcode(0xF3, 0x0F, 0x5A); 1.10779 + ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); 1.10780 + ins_pipe( pipe_slow ); 1.10781 +%} 1.10782 + 1.10783 +// Convert a double to an int. If the double is a NAN, stuff a zero in instead. 1.10784 +instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 1.10785 + predicate(UseSSE<=1); 1.10786 + match(Set dst (ConvD2I src)); 1.10787 + effect( KILL tmp, KILL cr ); 1.10788 + format %{ "FLD $src\t# Convert double to int \n\t" 1.10789 + "FLDCW trunc mode\n\t" 1.10790 + "SUB ESP,4\n\t" 1.10791 + "FISTp [ESP + #0]\n\t" 1.10792 + "FLDCW std/24-bit mode\n\t" 1.10793 + "POP EAX\n\t" 1.10794 + "CMP EAX,0x80000000\n\t" 1.10795 + "JNE,s fast\n\t" 1.10796 + "FLD_D $src\n\t" 1.10797 + "CALL d2i_wrapper\n" 1.10798 + "fast:" %} 1.10799 + ins_encode( Push_Reg_D(src), D2I_encoding(src) ); 1.10800 + ins_pipe( pipe_slow ); 1.10801 +%} 1.10802 + 1.10803 +// Convert a double to an int. If the double is a NAN, stuff a zero in instead. 1.10804 +instruct convXD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regXD src, eFlagsReg cr ) %{ 1.10805 + predicate(UseSSE>=2); 1.10806 + match(Set dst (ConvD2I src)); 1.10807 + effect( KILL tmp, KILL cr ); 1.10808 + format %{ "CVTTSD2SI $dst, $src\n\t" 1.10809 + "CMP $dst,0x80000000\n\t" 1.10810 + "JNE,s fast\n\t" 1.10811 + "SUB ESP, 8\n\t" 1.10812 + "MOVSD [ESP], $src\n\t" 1.10813 + "FLD_D [ESP]\n\t" 1.10814 + "ADD ESP, 8\n\t" 1.10815 + "CALL d2i_wrapper\n" 1.10816 + "fast:" %} 1.10817 + opcode(0x1); // double-precision conversion 1.10818 + ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst)); 1.10819 + ins_pipe( pipe_slow ); 1.10820 +%} 1.10821 + 1.10822 +instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 1.10823 + predicate(UseSSE<=1); 1.10824 + match(Set dst (ConvD2L src)); 1.10825 + effect( KILL cr ); 1.10826 + format %{ "FLD $src\t# Convert double to long\n\t" 1.10827 + "FLDCW trunc mode\n\t" 1.10828 + "SUB ESP,8\n\t" 1.10829 + "FISTp [ESP + #0]\n\t" 1.10830 + "FLDCW std/24-bit mode\n\t" 1.10831 + "POP EAX\n\t" 1.10832 + "POP EDX\n\t" 1.10833 + "CMP EDX,0x80000000\n\t" 1.10834 + "JNE,s fast\n\t" 1.10835 + "TEST EAX,EAX\n\t" 1.10836 + "JNE,s fast\n\t" 1.10837 + "FLD $src\n\t" 1.10838 + "CALL d2l_wrapper\n" 1.10839 + "fast:" %} 1.10840 + ins_encode( Push_Reg_D(src), D2L_encoding(src) ); 1.10841 + ins_pipe( pipe_slow ); 1.10842 +%} 1.10843 + 1.10844 +// XMM lacks a float/double->long conversion, so use the old FPU stack. 1.10845 +instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{ 1.10846 + predicate (UseSSE>=2); 1.10847 + match(Set dst (ConvD2L src)); 1.10848 + effect( KILL cr ); 1.10849 + format %{ "SUB ESP,8\t# Convert double to long\n\t" 1.10850 + "MOVSD [ESP],$src\n\t" 1.10851 + "FLD_D [ESP]\n\t" 1.10852 + "FLDCW trunc mode\n\t" 1.10853 + "FISTp [ESP + #0]\n\t" 1.10854 + "FLDCW std/24-bit mode\n\t" 1.10855 + "POP EAX\n\t" 1.10856 + "POP EDX\n\t" 1.10857 + "CMP EDX,0x80000000\n\t" 1.10858 + "JNE,s fast\n\t" 1.10859 + "TEST EAX,EAX\n\t" 1.10860 + "JNE,s fast\n\t" 1.10861 + "SUB ESP,8\n\t" 1.10862 + "MOVSD [ESP],$src\n\t" 1.10863 + "FLD_D [ESP]\n\t" 1.10864 + "CALL d2l_wrapper\n" 1.10865 + "fast:" %} 1.10866 + ins_encode( XD2L_encoding(src) ); 1.10867 + ins_pipe( pipe_slow ); 1.10868 +%} 1.10869 + 1.10870 +// Convert a double to an int. Java semantics require we do complex 1.10871 +// manglations in the corner cases. So we set the rounding mode to 1.10872 +// 'zero', store the darned double down as an int, and reset the 1.10873 +// rounding mode to 'nearest'. The hardware stores a flag value down 1.10874 +// if we would overflow or converted a NAN; we check for this and 1.10875 +// and go the slow path if needed. 1.10876 +instruct convF2I_reg_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 1.10877 + predicate(UseSSE==0); 1.10878 + match(Set dst (ConvF2I src)); 1.10879 + effect( KILL tmp, KILL cr ); 1.10880 + format %{ "FLD $src\t# Convert float to int \n\t" 1.10881 + "FLDCW trunc mode\n\t" 1.10882 + "SUB ESP,4\n\t" 1.10883 + "FISTp [ESP + #0]\n\t" 1.10884 + "FLDCW std/24-bit mode\n\t" 1.10885 + "POP EAX\n\t" 1.10886 + "CMP EAX,0x80000000\n\t" 1.10887 + "JNE,s fast\n\t" 1.10888 + "FLD $src\n\t" 1.10889 + "CALL d2i_wrapper\n" 1.10890 + "fast:" %} 1.10891 + // D2I_encoding works for F2I 1.10892 + ins_encode( Push_Reg_F(src), D2I_encoding(src) ); 1.10893 + ins_pipe( pipe_slow ); 1.10894 +%} 1.10895 + 1.10896 +// Convert a float in xmm to an int reg. 1.10897 +instruct convX2I_reg(eAXRegI dst, eDXRegI tmp, regX src, eFlagsReg cr ) %{ 1.10898 + predicate(UseSSE>=1); 1.10899 + match(Set dst (ConvF2I src)); 1.10900 + effect( KILL tmp, KILL cr ); 1.10901 + format %{ "CVTTSS2SI $dst, $src\n\t" 1.10902 + "CMP $dst,0x80000000\n\t" 1.10903 + "JNE,s fast\n\t" 1.10904 + "SUB ESP, 4\n\t" 1.10905 + "MOVSS [ESP], $src\n\t" 1.10906 + "FLD [ESP]\n\t" 1.10907 + "ADD ESP, 4\n\t" 1.10908 + "CALL d2i_wrapper\n" 1.10909 + "fast:" %} 1.10910 + opcode(0x0); // single-precision conversion 1.10911 + ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst)); 1.10912 + ins_pipe( pipe_slow ); 1.10913 +%} 1.10914 + 1.10915 +instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 1.10916 + predicate(UseSSE==0); 1.10917 + match(Set dst (ConvF2L src)); 1.10918 + effect( KILL cr ); 1.10919 + format %{ "FLD $src\t# Convert float to long\n\t" 1.10920 + "FLDCW trunc mode\n\t" 1.10921 + "SUB ESP,8\n\t" 1.10922 + "FISTp [ESP + #0]\n\t" 1.10923 + "FLDCW std/24-bit mode\n\t" 1.10924 + "POP EAX\n\t" 1.10925 + "POP EDX\n\t" 1.10926 + "CMP EDX,0x80000000\n\t" 1.10927 + "JNE,s fast\n\t" 1.10928 + "TEST EAX,EAX\n\t" 1.10929 + "JNE,s fast\n\t" 1.10930 + "FLD $src\n\t" 1.10931 + "CALL d2l_wrapper\n" 1.10932 + "fast:" %} 1.10933 + // D2L_encoding works for F2L 1.10934 + ins_encode( Push_Reg_F(src), D2L_encoding(src) ); 1.10935 + ins_pipe( pipe_slow ); 1.10936 +%} 1.10937 + 1.10938 +// XMM lacks a float/double->long conversion, so use the old FPU stack. 1.10939 +instruct convX2L_reg_reg( eADXRegL dst, regX src, eFlagsReg cr ) %{ 1.10940 + predicate (UseSSE>=1); 1.10941 + match(Set dst (ConvF2L src)); 1.10942 + effect( KILL cr ); 1.10943 + format %{ "SUB ESP,8\t# Convert float to long\n\t" 1.10944 + "MOVSS [ESP],$src\n\t" 1.10945 + "FLD_S [ESP]\n\t" 1.10946 + "FLDCW trunc mode\n\t" 1.10947 + "FISTp [ESP + #0]\n\t" 1.10948 + "FLDCW std/24-bit mode\n\t" 1.10949 + "POP EAX\n\t" 1.10950 + "POP EDX\n\t" 1.10951 + "CMP EDX,0x80000000\n\t" 1.10952 + "JNE,s fast\n\t" 1.10953 + "TEST EAX,EAX\n\t" 1.10954 + "JNE,s fast\n\t" 1.10955 + "SUB ESP,4\t# Convert float to long\n\t" 1.10956 + "MOVSS [ESP],$src\n\t" 1.10957 + "FLD_S [ESP]\n\t" 1.10958 + "ADD ESP,4\n\t" 1.10959 + "CALL d2l_wrapper\n" 1.10960 + "fast:" %} 1.10961 + ins_encode( X2L_encoding(src) ); 1.10962 + ins_pipe( pipe_slow ); 1.10963 +%} 1.10964 + 1.10965 +instruct convI2D_reg(regD dst, stackSlotI src) %{ 1.10966 + predicate( UseSSE<=1 ); 1.10967 + match(Set dst (ConvI2D src)); 1.10968 + format %{ "FILD $src\n\t" 1.10969 + "FSTP $dst" %} 1.10970 + opcode(0xDB, 0x0); /* DB /0 */ 1.10971 + ins_encode(Push_Mem_I(src), Pop_Reg_D(dst)); 1.10972 + ins_pipe( fpu_reg_mem ); 1.10973 +%} 1.10974 + 1.10975 +instruct convI2XD_reg(regXD dst, eRegI src) %{ 1.10976 + predicate( UseSSE>=2 ); 1.10977 + match(Set dst (ConvI2D src)); 1.10978 + format %{ "CVTSI2SD $dst,$src" %} 1.10979 + opcode(0xF2, 0x0F, 0x2A); 1.10980 + ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); 1.10981 + ins_pipe( pipe_slow ); 1.10982 +%} 1.10983 + 1.10984 +instruct convI2XD_mem(regXD dst, memory mem) %{ 1.10985 + predicate( UseSSE>=2 ); 1.10986 + match(Set dst (ConvI2D (LoadI mem))); 1.10987 + format %{ "CVTSI2SD $dst,$mem" %} 1.10988 + opcode(0xF2, 0x0F, 0x2A); 1.10989 + ins_encode( OpcP, OpcS, Opcode(tertiary), RegMem(dst, mem)); 1.10990 + ins_pipe( pipe_slow ); 1.10991 +%} 1.10992 + 1.10993 +instruct convI2D_mem(regD dst, memory mem) %{ 1.10994 + predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 1.10995 + match(Set dst (ConvI2D (LoadI mem))); 1.10996 + format %{ "FILD $mem\n\t" 1.10997 + "FSTP $dst" %} 1.10998 + opcode(0xDB); /* DB /0 */ 1.10999 + ins_encode( OpcP, RMopc_Mem(0x00,mem), 1.11000 + Pop_Reg_D(dst)); 1.11001 + ins_pipe( fpu_reg_mem ); 1.11002 +%} 1.11003 + 1.11004 +// Convert a byte to a float; no rounding step needed. 1.11005 +instruct conv24I2F_reg(regF dst, stackSlotI src) %{ 1.11006 + predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 1.11007 + match(Set dst (ConvI2F src)); 1.11008 + format %{ "FILD $src\n\t" 1.11009 + "FSTP $dst" %} 1.11010 + 1.11011 + opcode(0xDB, 0x0); /* DB /0 */ 1.11012 + ins_encode(Push_Mem_I(src), Pop_Reg_F(dst)); 1.11013 + ins_pipe( fpu_reg_mem ); 1.11014 +%} 1.11015 + 1.11016 +// In 24-bit mode, force exponent rounding by storing back out 1.11017 +instruct convI2F_SSF(stackSlotF dst, stackSlotI src) %{ 1.11018 + predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 1.11019 + match(Set dst (ConvI2F src)); 1.11020 + ins_cost(200); 1.11021 + format %{ "FILD $src\n\t" 1.11022 + "FSTP_S $dst" %} 1.11023 + opcode(0xDB, 0x0); /* DB /0 */ 1.11024 + ins_encode( Push_Mem_I(src), 1.11025 + Pop_Mem_F(dst)); 1.11026 + ins_pipe( fpu_mem_mem ); 1.11027 +%} 1.11028 + 1.11029 +// In 24-bit mode, force exponent rounding by storing back out 1.11030 +instruct convI2F_SSF_mem(stackSlotF dst, memory mem) %{ 1.11031 + predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 1.11032 + match(Set dst (ConvI2F (LoadI mem))); 1.11033 + ins_cost(200); 1.11034 + format %{ "FILD $mem\n\t" 1.11035 + "FSTP_S $dst" %} 1.11036 + opcode(0xDB); /* DB /0 */ 1.11037 + ins_encode( OpcP, RMopc_Mem(0x00,mem), 1.11038 + Pop_Mem_F(dst)); 1.11039 + ins_pipe( fpu_mem_mem ); 1.11040 +%} 1.11041 + 1.11042 +// This instruction does not round to 24-bits 1.11043 +instruct convI2F_reg(regF dst, stackSlotI src) %{ 1.11044 + predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 1.11045 + match(Set dst (ConvI2F src)); 1.11046 + format %{ "FILD $src\n\t" 1.11047 + "FSTP $dst" %} 1.11048 + opcode(0xDB, 0x0); /* DB /0 */ 1.11049 + ins_encode( Push_Mem_I(src), 1.11050 + Pop_Reg_F(dst)); 1.11051 + ins_pipe( fpu_reg_mem ); 1.11052 +%} 1.11053 + 1.11054 +// This instruction does not round to 24-bits 1.11055 +instruct convI2F_mem(regF dst, memory mem) %{ 1.11056 + predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 1.11057 + match(Set dst (ConvI2F (LoadI mem))); 1.11058 + format %{ "FILD $mem\n\t" 1.11059 + "FSTP $dst" %} 1.11060 + opcode(0xDB); /* DB /0 */ 1.11061 + ins_encode( OpcP, RMopc_Mem(0x00,mem), 1.11062 + Pop_Reg_F(dst)); 1.11063 + ins_pipe( fpu_reg_mem ); 1.11064 +%} 1.11065 + 1.11066 +// Convert an int to a float in xmm; no rounding step needed. 1.11067 +instruct convI2X_reg(regX dst, eRegI src) %{ 1.11068 + predicate(UseSSE>=1); 1.11069 + match(Set dst (ConvI2F src)); 1.11070 + format %{ "CVTSI2SS $dst, $src" %} 1.11071 + 1.11072 + opcode(0xF3, 0x0F, 0x2A); /* F3 0F 2A /r */ 1.11073 + ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); 1.11074 + ins_pipe( pipe_slow ); 1.11075 +%} 1.11076 + 1.11077 +instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{ 1.11078 + match(Set dst (ConvI2L src)); 1.11079 + effect(KILL cr); 1.11080 + format %{ "MOV $dst.lo,$src\n\t" 1.11081 + "MOV $dst.hi,$src\n\t" 1.11082 + "SAR $dst.hi,31" %} 1.11083 + ins_encode(convert_int_long(dst,src)); 1.11084 + ins_pipe( ialu_reg_reg_long ); 1.11085 +%} 1.11086 + 1.11087 +// Zero-extend convert int to long 1.11088 +instruct convI2L_reg_zex(eRegL dst, eRegI src, immL_32bits mask, eFlagsReg flags ) %{ 1.11089 + match(Set dst (AndL (ConvI2L src) mask) ); 1.11090 + effect( KILL flags ); 1.11091 + format %{ "MOV $dst.lo,$src\n\t" 1.11092 + "XOR $dst.hi,$dst.hi" %} 1.11093 + opcode(0x33); // XOR 1.11094 + ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 1.11095 + ins_pipe( ialu_reg_reg_long ); 1.11096 +%} 1.11097 + 1.11098 +// Zero-extend long 1.11099 +instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 1.11100 + match(Set dst (AndL src mask) ); 1.11101 + effect( KILL flags ); 1.11102 + format %{ "MOV $dst.lo,$src.lo\n\t" 1.11103 + "XOR $dst.hi,$dst.hi\n\t" %} 1.11104 + opcode(0x33); // XOR 1.11105 + ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 1.11106 + ins_pipe( ialu_reg_reg_long ); 1.11107 +%} 1.11108 + 1.11109 +instruct convL2D_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 1.11110 + predicate (UseSSE<=1); 1.11111 + match(Set dst (ConvL2D src)); 1.11112 + effect( KILL cr ); 1.11113 + format %{ "PUSH $src.hi\t# Convert long to double\n\t" 1.11114 + "PUSH $src.lo\n\t" 1.11115 + "FILD ST,[ESP + #0]\n\t" 1.11116 + "ADD ESP,8\n\t" 1.11117 + "FSTP_D $dst\t# D-round" %} 1.11118 + opcode(0xDF, 0x5); /* DF /5 */ 1.11119 + ins_encode(convert_long_double(src), Pop_Mem_D(dst)); 1.11120 + ins_pipe( pipe_slow ); 1.11121 +%} 1.11122 + 1.11123 +instruct convL2XD_reg( regXD dst, eRegL src, eFlagsReg cr) %{ 1.11124 + predicate (UseSSE>=2); 1.11125 + match(Set dst (ConvL2D src)); 1.11126 + effect( KILL cr ); 1.11127 + format %{ "PUSH $src.hi\t# Convert long to double\n\t" 1.11128 + "PUSH $src.lo\n\t" 1.11129 + "FILD_D [ESP]\n\t" 1.11130 + "FSTP_D [ESP]\n\t" 1.11131 + "MOVSD $dst,[ESP]\n\t" 1.11132 + "ADD ESP,8" %} 1.11133 + opcode(0xDF, 0x5); /* DF /5 */ 1.11134 + ins_encode(convert_long_double2(src), Push_ResultXD(dst)); 1.11135 + ins_pipe( pipe_slow ); 1.11136 +%} 1.11137 + 1.11138 +instruct convL2X_reg( regX dst, eRegL src, eFlagsReg cr) %{ 1.11139 + predicate (UseSSE>=1); 1.11140 + match(Set dst (ConvL2F src)); 1.11141 + effect( KILL cr ); 1.11142 + format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 1.11143 + "PUSH $src.lo\n\t" 1.11144 + "FILD_D [ESP]\n\t" 1.11145 + "FSTP_S [ESP]\n\t" 1.11146 + "MOVSS $dst,[ESP]\n\t" 1.11147 + "ADD ESP,8" %} 1.11148 + opcode(0xDF, 0x5); /* DF /5 */ 1.11149 + ins_encode(convert_long_double2(src), Push_ResultX(dst,0x8)); 1.11150 + ins_pipe( pipe_slow ); 1.11151 +%} 1.11152 + 1.11153 +instruct convL2F_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 1.11154 + match(Set dst (ConvL2F src)); 1.11155 + effect( KILL cr ); 1.11156 + format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 1.11157 + "PUSH $src.lo\n\t" 1.11158 + "FILD ST,[ESP + #0]\n\t" 1.11159 + "ADD ESP,8\n\t" 1.11160 + "FSTP_S $dst\t# F-round" %} 1.11161 + opcode(0xDF, 0x5); /* DF /5 */ 1.11162 + ins_encode(convert_long_double(src), Pop_Mem_F(dst)); 1.11163 + ins_pipe( pipe_slow ); 1.11164 +%} 1.11165 + 1.11166 +instruct convL2I_reg( eRegI dst, eRegL src ) %{ 1.11167 + match(Set dst (ConvL2I src)); 1.11168 + effect( DEF dst, USE src ); 1.11169 + format %{ "MOV $dst,$src.lo" %} 1.11170 + ins_encode(enc_CopyL_Lo(dst,src)); 1.11171 + ins_pipe( ialu_reg_reg ); 1.11172 +%} 1.11173 + 1.11174 + 1.11175 +instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{ 1.11176 + match(Set dst (MoveF2I src)); 1.11177 + effect( DEF dst, USE src ); 1.11178 + ins_cost(100); 1.11179 + format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 1.11180 + opcode(0x8B); 1.11181 + ins_encode( OpcP, RegMem(dst,src)); 1.11182 + ins_pipe( ialu_reg_mem ); 1.11183 +%} 1.11184 + 1.11185 +instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{ 1.11186 + predicate(UseSSE==0); 1.11187 + match(Set dst (MoveF2I src)); 1.11188 + effect( DEF dst, USE src ); 1.11189 + 1.11190 + ins_cost(125); 1.11191 + format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 1.11192 + ins_encode( Pop_Mem_Reg_F(dst, src) ); 1.11193 + ins_pipe( fpu_mem_reg ); 1.11194 +%} 1.11195 + 1.11196 +instruct MoveF2I_reg_stack_sse(stackSlotI dst, regX src) %{ 1.11197 + predicate(UseSSE>=1); 1.11198 + match(Set dst (MoveF2I src)); 1.11199 + effect( DEF dst, USE src ); 1.11200 + 1.11201 + ins_cost(95); 1.11202 + format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 1.11203 + ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, dst)); 1.11204 + ins_pipe( pipe_slow ); 1.11205 +%} 1.11206 + 1.11207 +instruct MoveF2I_reg_reg_sse(eRegI dst, regX src) %{ 1.11208 + predicate(UseSSE>=2); 1.11209 + match(Set dst (MoveF2I src)); 1.11210 + effect( DEF dst, USE src ); 1.11211 + ins_cost(85); 1.11212 + format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 1.11213 + ins_encode( MovX2I_reg(dst, src)); 1.11214 + ins_pipe( pipe_slow ); 1.11215 +%} 1.11216 + 1.11217 +instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{ 1.11218 + match(Set dst (MoveI2F src)); 1.11219 + effect( DEF dst, USE src ); 1.11220 + 1.11221 + ins_cost(100); 1.11222 + format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 1.11223 + opcode(0x89); 1.11224 + ins_encode( OpcPRegSS( dst, src ) ); 1.11225 + ins_pipe( ialu_mem_reg ); 1.11226 +%} 1.11227 + 1.11228 + 1.11229 +instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{ 1.11230 + predicate(UseSSE==0); 1.11231 + match(Set dst (MoveI2F src)); 1.11232 + effect(DEF dst, USE src); 1.11233 + 1.11234 + ins_cost(125); 1.11235 + format %{ "FLD_S $src\n\t" 1.11236 + "FSTP $dst\t# MoveI2F_stack_reg" %} 1.11237 + opcode(0xD9); /* D9 /0, FLD m32real */ 1.11238 + ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 1.11239 + Pop_Reg_F(dst) ); 1.11240 + ins_pipe( fpu_reg_mem ); 1.11241 +%} 1.11242 + 1.11243 +instruct MoveI2F_stack_reg_sse(regX dst, stackSlotI src) %{ 1.11244 + predicate(UseSSE>=1); 1.11245 + match(Set dst (MoveI2F src)); 1.11246 + effect( DEF dst, USE src ); 1.11247 + 1.11248 + ins_cost(95); 1.11249 + format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 1.11250 + ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,src)); 1.11251 + ins_pipe( pipe_slow ); 1.11252 +%} 1.11253 + 1.11254 +instruct MoveI2F_reg_reg_sse(regX dst, eRegI src) %{ 1.11255 + predicate(UseSSE>=2); 1.11256 + match(Set dst (MoveI2F src)); 1.11257 + effect( DEF dst, USE src ); 1.11258 + 1.11259 + ins_cost(85); 1.11260 + format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 1.11261 + ins_encode( MovI2X_reg(dst, src) ); 1.11262 + ins_pipe( pipe_slow ); 1.11263 +%} 1.11264 + 1.11265 +instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 1.11266 + match(Set dst (MoveD2L src)); 1.11267 + effect(DEF dst, USE src); 1.11268 + 1.11269 + ins_cost(250); 1.11270 + format %{ "MOV $dst.lo,$src\n\t" 1.11271 + "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 1.11272 + opcode(0x8B, 0x8B); 1.11273 + ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 1.11274 + ins_pipe( ialu_mem_long_reg ); 1.11275 +%} 1.11276 + 1.11277 +instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{ 1.11278 + predicate(UseSSE<=1); 1.11279 + match(Set dst (MoveD2L src)); 1.11280 + effect(DEF dst, USE src); 1.11281 + 1.11282 + ins_cost(125); 1.11283 + format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 1.11284 + ins_encode( Pop_Mem_Reg_D(dst, src) ); 1.11285 + ins_pipe( fpu_mem_reg ); 1.11286 +%} 1.11287 + 1.11288 +instruct MoveD2L_reg_stack_sse(stackSlotL dst, regXD src) %{ 1.11289 + predicate(UseSSE>=2); 1.11290 + match(Set dst (MoveD2L src)); 1.11291 + effect(DEF dst, USE src); 1.11292 + ins_cost(95); 1.11293 + 1.11294 + format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 1.11295 + ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src,dst)); 1.11296 + ins_pipe( pipe_slow ); 1.11297 +%} 1.11298 + 1.11299 +instruct MoveD2L_reg_reg_sse(eRegL dst, regXD src, regXD tmp) %{ 1.11300 + predicate(UseSSE>=2); 1.11301 + match(Set dst (MoveD2L src)); 1.11302 + effect(DEF dst, USE src, TEMP tmp); 1.11303 + ins_cost(85); 1.11304 + format %{ "MOVD $dst.lo,$src\n\t" 1.11305 + "PSHUFLW $tmp,$src,0x4E\n\t" 1.11306 + "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 1.11307 + ins_encode( MovXD2L_reg(dst, src, tmp) ); 1.11308 + ins_pipe( pipe_slow ); 1.11309 +%} 1.11310 + 1.11311 +instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 1.11312 + match(Set dst (MoveL2D src)); 1.11313 + effect(DEF dst, USE src); 1.11314 + 1.11315 + ins_cost(200); 1.11316 + format %{ "MOV $dst,$src.lo\n\t" 1.11317 + "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 1.11318 + opcode(0x89, 0x89); 1.11319 + ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 1.11320 + ins_pipe( ialu_mem_long_reg ); 1.11321 +%} 1.11322 + 1.11323 + 1.11324 +instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{ 1.11325 + predicate(UseSSE<=1); 1.11326 + match(Set dst (MoveL2D src)); 1.11327 + effect(DEF dst, USE src); 1.11328 + ins_cost(125); 1.11329 + 1.11330 + format %{ "FLD_D $src\n\t" 1.11331 + "FSTP $dst\t# MoveL2D_stack_reg" %} 1.11332 + opcode(0xDD); /* DD /0, FLD m64real */ 1.11333 + ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 1.11334 + Pop_Reg_D(dst) ); 1.11335 + ins_pipe( fpu_reg_mem ); 1.11336 +%} 1.11337 + 1.11338 + 1.11339 +instruct MoveL2D_stack_reg_sse(regXD dst, stackSlotL src) %{ 1.11340 + predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 1.11341 + match(Set dst (MoveL2D src)); 1.11342 + effect(DEF dst, USE src); 1.11343 + 1.11344 + ins_cost(95); 1.11345 + format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 1.11346 + ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,src)); 1.11347 + ins_pipe( pipe_slow ); 1.11348 +%} 1.11349 + 1.11350 +instruct MoveL2D_stack_reg_sse_partial(regXD dst, stackSlotL src) %{ 1.11351 + predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 1.11352 + match(Set dst (MoveL2D src)); 1.11353 + effect(DEF dst, USE src); 1.11354 + 1.11355 + ins_cost(95); 1.11356 + format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 1.11357 + ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,src)); 1.11358 + ins_pipe( pipe_slow ); 1.11359 +%} 1.11360 + 1.11361 +instruct MoveL2D_reg_reg_sse(regXD dst, eRegL src, regXD tmp) %{ 1.11362 + predicate(UseSSE>=2); 1.11363 + match(Set dst (MoveL2D src)); 1.11364 + effect(TEMP dst, USE src, TEMP tmp); 1.11365 + ins_cost(85); 1.11366 + format %{ "MOVD $dst,$src.lo\n\t" 1.11367 + "MOVD $tmp,$src.hi\n\t" 1.11368 + "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 1.11369 + ins_encode( MovL2XD_reg(dst, src, tmp) ); 1.11370 + ins_pipe( pipe_slow ); 1.11371 +%} 1.11372 + 1.11373 +// Replicate scalar to packed byte (1 byte) values in xmm 1.11374 +instruct Repl8B_reg(regXD dst, regXD src) %{ 1.11375 + predicate(UseSSE>=2); 1.11376 + match(Set dst (Replicate8B src)); 1.11377 + format %{ "MOVDQA $dst,$src\n\t" 1.11378 + "PUNPCKLBW $dst,$dst\n\t" 1.11379 + "PSHUFLW $dst,$dst,0x00\t! replicate8B" %} 1.11380 + ins_encode( pshufd_8x8(dst, src)); 1.11381 + ins_pipe( pipe_slow ); 1.11382 +%} 1.11383 + 1.11384 +// Replicate scalar to packed byte (1 byte) values in xmm 1.11385 +instruct Repl8B_eRegI(regXD dst, eRegI src) %{ 1.11386 + predicate(UseSSE>=2); 1.11387 + match(Set dst (Replicate8B src)); 1.11388 + format %{ "MOVD $dst,$src\n\t" 1.11389 + "PUNPCKLBW $dst,$dst\n\t" 1.11390 + "PSHUFLW $dst,$dst,0x00\t! replicate8B" %} 1.11391 + ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst)); 1.11392 + ins_pipe( pipe_slow ); 1.11393 +%} 1.11394 + 1.11395 +// Replicate scalar zero to packed byte (1 byte) values in xmm 1.11396 +instruct Repl8B_immI0(regXD dst, immI0 zero) %{ 1.11397 + predicate(UseSSE>=2); 1.11398 + match(Set dst (Replicate8B zero)); 1.11399 + format %{ "PXOR $dst,$dst\t! replicate8B" %} 1.11400 + ins_encode( pxor(dst, dst)); 1.11401 + ins_pipe( fpu_reg_reg ); 1.11402 +%} 1.11403 + 1.11404 +// Replicate scalar to packed shore (2 byte) values in xmm 1.11405 +instruct Repl4S_reg(regXD dst, regXD src) %{ 1.11406 + predicate(UseSSE>=2); 1.11407 + match(Set dst (Replicate4S src)); 1.11408 + format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %} 1.11409 + ins_encode( pshufd_4x16(dst, src)); 1.11410 + ins_pipe( fpu_reg_reg ); 1.11411 +%} 1.11412 + 1.11413 +// Replicate scalar to packed shore (2 byte) values in xmm 1.11414 +instruct Repl4S_eRegI(regXD dst, eRegI src) %{ 1.11415 + predicate(UseSSE>=2); 1.11416 + match(Set dst (Replicate4S src)); 1.11417 + format %{ "MOVD $dst,$src\n\t" 1.11418 + "PSHUFLW $dst,$dst,0x00\t! replicate4S" %} 1.11419 + ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst)); 1.11420 + ins_pipe( fpu_reg_reg ); 1.11421 +%} 1.11422 + 1.11423 +// Replicate scalar zero to packed short (2 byte) values in xmm 1.11424 +instruct Repl4S_immI0(regXD dst, immI0 zero) %{ 1.11425 + predicate(UseSSE>=2); 1.11426 + match(Set dst (Replicate4S zero)); 1.11427 + format %{ "PXOR $dst,$dst\t! replicate4S" %} 1.11428 + ins_encode( pxor(dst, dst)); 1.11429 + ins_pipe( fpu_reg_reg ); 1.11430 +%} 1.11431 + 1.11432 +// Replicate scalar to packed char (2 byte) values in xmm 1.11433 +instruct Repl4C_reg(regXD dst, regXD src) %{ 1.11434 + predicate(UseSSE>=2); 1.11435 + match(Set dst (Replicate4C src)); 1.11436 + format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %} 1.11437 + ins_encode( pshufd_4x16(dst, src)); 1.11438 + ins_pipe( fpu_reg_reg ); 1.11439 +%} 1.11440 + 1.11441 +// Replicate scalar to packed char (2 byte) values in xmm 1.11442 +instruct Repl4C_eRegI(regXD dst, eRegI src) %{ 1.11443 + predicate(UseSSE>=2); 1.11444 + match(Set dst (Replicate4C src)); 1.11445 + format %{ "MOVD $dst,$src\n\t" 1.11446 + "PSHUFLW $dst,$dst,0x00\t! replicate4C" %} 1.11447 + ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst)); 1.11448 + ins_pipe( fpu_reg_reg ); 1.11449 +%} 1.11450 + 1.11451 +// Replicate scalar zero to packed char (2 byte) values in xmm 1.11452 +instruct Repl4C_immI0(regXD dst, immI0 zero) %{ 1.11453 + predicate(UseSSE>=2); 1.11454 + match(Set dst (Replicate4C zero)); 1.11455 + format %{ "PXOR $dst,$dst\t! replicate4C" %} 1.11456 + ins_encode( pxor(dst, dst)); 1.11457 + ins_pipe( fpu_reg_reg ); 1.11458 +%} 1.11459 + 1.11460 +// Replicate scalar to packed integer (4 byte) values in xmm 1.11461 +instruct Repl2I_reg(regXD dst, regXD src) %{ 1.11462 + predicate(UseSSE>=2); 1.11463 + match(Set dst (Replicate2I src)); 1.11464 + format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %} 1.11465 + ins_encode( pshufd(dst, src, 0x00)); 1.11466 + ins_pipe( fpu_reg_reg ); 1.11467 +%} 1.11468 + 1.11469 +// Replicate scalar to packed integer (4 byte) values in xmm 1.11470 +instruct Repl2I_eRegI(regXD dst, eRegI src) %{ 1.11471 + predicate(UseSSE>=2); 1.11472 + match(Set dst (Replicate2I src)); 1.11473 + format %{ "MOVD $dst,$src\n\t" 1.11474 + "PSHUFD $dst,$dst,0x00\t! replicate2I" %} 1.11475 + ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00)); 1.11476 + ins_pipe( fpu_reg_reg ); 1.11477 +%} 1.11478 + 1.11479 +// Replicate scalar zero to packed integer (2 byte) values in xmm 1.11480 +instruct Repl2I_immI0(regXD dst, immI0 zero) %{ 1.11481 + predicate(UseSSE>=2); 1.11482 + match(Set dst (Replicate2I zero)); 1.11483 + format %{ "PXOR $dst,$dst\t! replicate2I" %} 1.11484 + ins_encode( pxor(dst, dst)); 1.11485 + ins_pipe( fpu_reg_reg ); 1.11486 +%} 1.11487 + 1.11488 +// Replicate scalar to packed single precision floating point values in xmm 1.11489 +instruct Repl2F_reg(regXD dst, regXD src) %{ 1.11490 + predicate(UseSSE>=2); 1.11491 + match(Set dst (Replicate2F src)); 1.11492 + format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} 1.11493 + ins_encode( pshufd(dst, src, 0xe0)); 1.11494 + ins_pipe( fpu_reg_reg ); 1.11495 +%} 1.11496 + 1.11497 +// Replicate scalar to packed single precision floating point values in xmm 1.11498 +instruct Repl2F_regX(regXD dst, regX src) %{ 1.11499 + predicate(UseSSE>=2); 1.11500 + match(Set dst (Replicate2F src)); 1.11501 + format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} 1.11502 + ins_encode( pshufd(dst, src, 0xe0)); 1.11503 + ins_pipe( fpu_reg_reg ); 1.11504 +%} 1.11505 + 1.11506 +// Replicate scalar to packed single precision floating point values in xmm 1.11507 +instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{ 1.11508 + predicate(UseSSE>=2); 1.11509 + match(Set dst (Replicate2F zero)); 1.11510 + format %{ "PXOR $dst,$dst\t! replicate2F" %} 1.11511 + ins_encode( pxor(dst, dst)); 1.11512 + ins_pipe( fpu_reg_reg ); 1.11513 +%} 1.11514 + 1.11515 + 1.11516 + 1.11517 +// ======================================================================= 1.11518 +// fast clearing of an array 1.11519 + 1.11520 +instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 1.11521 + match(Set dummy (ClearArray cnt base)); 1.11522 + effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 1.11523 + format %{ "SHL ECX,1\t# Convert doublewords to words\n\t" 1.11524 + "XOR EAX,EAX\n\t" 1.11525 + "REP STOS\t# store EAX into [EDI++] while ECX--" %} 1.11526 + opcode(0,0x4); 1.11527 + ins_encode( Opcode(0xD1), RegOpc(ECX), 1.11528 + OpcRegReg(0x33,EAX,EAX), 1.11529 + Opcode(0xF3), Opcode(0xAB) ); 1.11530 + ins_pipe( pipe_slow ); 1.11531 +%} 1.11532 + 1.11533 +instruct string_compare(eDIRegP str1, eSIRegP str2, eAXRegI tmp1, eBXRegI tmp2, eCXRegI result, eFlagsReg cr) %{ 1.11534 + match(Set result (StrComp str1 str2)); 1.11535 + effect(USE_KILL str1, USE_KILL str2, KILL tmp1, KILL tmp2, KILL cr); 1.11536 + //ins_cost(300); 1.11537 + 1.11538 + format %{ "String Compare $str1,$str2 -> $result // KILL EAX, EBX" %} 1.11539 + ins_encode( enc_String_Compare() ); 1.11540 + ins_pipe( pipe_slow ); 1.11541 +%} 1.11542 + 1.11543 +//----------Control Flow Instructions------------------------------------------ 1.11544 +// Signed compare Instructions 1.11545 +instruct compI_eReg(eFlagsReg cr, eRegI op1, eRegI op2) %{ 1.11546 + match(Set cr (CmpI op1 op2)); 1.11547 + effect( DEF cr, USE op1, USE op2 ); 1.11548 + format %{ "CMP $op1,$op2" %} 1.11549 + opcode(0x3B); /* Opcode 3B /r */ 1.11550 + ins_encode( OpcP, RegReg( op1, op2) ); 1.11551 + ins_pipe( ialu_cr_reg_reg ); 1.11552 +%} 1.11553 + 1.11554 +instruct compI_eReg_imm(eFlagsReg cr, eRegI op1, immI op2) %{ 1.11555 + match(Set cr (CmpI op1 op2)); 1.11556 + effect( DEF cr, USE op1 ); 1.11557 + format %{ "CMP $op1,$op2" %} 1.11558 + opcode(0x81,0x07); /* Opcode 81 /7 */ 1.11559 + // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 1.11560 + ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 1.11561 + ins_pipe( ialu_cr_reg_imm ); 1.11562 +%} 1.11563 + 1.11564 +// Cisc-spilled version of cmpI_eReg 1.11565 +instruct compI_eReg_mem(eFlagsReg cr, eRegI op1, memory op2) %{ 1.11566 + match(Set cr (CmpI op1 (LoadI op2))); 1.11567 + 1.11568 + format %{ "CMP $op1,$op2" %} 1.11569 + ins_cost(500); 1.11570 + opcode(0x3B); /* Opcode 3B /r */ 1.11571 + ins_encode( OpcP, RegMem( op1, op2) ); 1.11572 + ins_pipe( ialu_cr_reg_mem ); 1.11573 +%} 1.11574 + 1.11575 +instruct testI_reg( eFlagsReg cr, eRegI src, immI0 zero ) %{ 1.11576 + match(Set cr (CmpI src zero)); 1.11577 + effect( DEF cr, USE src ); 1.11578 + 1.11579 + format %{ "TEST $src,$src" %} 1.11580 + opcode(0x85); 1.11581 + ins_encode( OpcP, RegReg( src, src ) ); 1.11582 + ins_pipe( ialu_cr_reg_imm ); 1.11583 +%} 1.11584 + 1.11585 +instruct testI_reg_imm( eFlagsReg cr, eRegI src, immI con, immI0 zero ) %{ 1.11586 + match(Set cr (CmpI (AndI src con) zero)); 1.11587 + 1.11588 + format %{ "TEST $src,$con" %} 1.11589 + opcode(0xF7,0x00); 1.11590 + ins_encode( OpcP, RegOpc(src), Con32(con) ); 1.11591 + ins_pipe( ialu_cr_reg_imm ); 1.11592 +%} 1.11593 + 1.11594 +instruct testI_reg_mem( eFlagsReg cr, eRegI src, memory mem, immI0 zero ) %{ 1.11595 + match(Set cr (CmpI (AndI src mem) zero)); 1.11596 + 1.11597 + format %{ "TEST $src,$mem" %} 1.11598 + opcode(0x85); 1.11599 + ins_encode( OpcP, RegMem( src, mem ) ); 1.11600 + ins_pipe( ialu_cr_reg_mem ); 1.11601 +%} 1.11602 + 1.11603 +// Unsigned compare Instructions; really, same as signed except they 1.11604 +// produce an eFlagsRegU instead of eFlagsReg. 1.11605 +instruct compU_eReg(eFlagsRegU cr, eRegI op1, eRegI op2) %{ 1.11606 + match(Set cr (CmpU op1 op2)); 1.11607 + 1.11608 + format %{ "CMPu $op1,$op2" %} 1.11609 + opcode(0x3B); /* Opcode 3B /r */ 1.11610 + ins_encode( OpcP, RegReg( op1, op2) ); 1.11611 + ins_pipe( ialu_cr_reg_reg ); 1.11612 +%} 1.11613 + 1.11614 +instruct compU_eReg_imm(eFlagsRegU cr, eRegI op1, immI op2) %{ 1.11615 + match(Set cr (CmpU op1 op2)); 1.11616 + 1.11617 + format %{ "CMPu $op1,$op2" %} 1.11618 + opcode(0x81,0x07); /* Opcode 81 /7 */ 1.11619 + ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 1.11620 + ins_pipe( ialu_cr_reg_imm ); 1.11621 +%} 1.11622 + 1.11623 +// // Cisc-spilled version of cmpU_eReg 1.11624 +instruct compU_eReg_mem(eFlagsRegU cr, eRegI op1, memory op2) %{ 1.11625 + match(Set cr (CmpU op1 (LoadI op2))); 1.11626 + 1.11627 + format %{ "CMPu $op1,$op2" %} 1.11628 + ins_cost(500); 1.11629 + opcode(0x3B); /* Opcode 3B /r */ 1.11630 + ins_encode( OpcP, RegMem( op1, op2) ); 1.11631 + ins_pipe( ialu_cr_reg_mem ); 1.11632 +%} 1.11633 + 1.11634 +// // Cisc-spilled version of cmpU_eReg 1.11635 +//instruct compU_mem_eReg(eFlagsRegU cr, memory op1, eRegI op2) %{ 1.11636 +// match(Set cr (CmpU (LoadI op1) op2)); 1.11637 +// 1.11638 +// format %{ "CMPu $op1,$op2" %} 1.11639 +// ins_cost(500); 1.11640 +// opcode(0x39); /* Opcode 39 /r */ 1.11641 +// ins_encode( OpcP, RegMem( op1, op2) ); 1.11642 +//%} 1.11643 + 1.11644 +instruct testU_reg( eFlagsRegU cr, eRegI src, immI0 zero ) %{ 1.11645 + match(Set cr (CmpU src zero)); 1.11646 + 1.11647 + format %{ "TESTu $src,$src" %} 1.11648 + opcode(0x85); 1.11649 + ins_encode( OpcP, RegReg( src, src ) ); 1.11650 + ins_pipe( ialu_cr_reg_imm ); 1.11651 +%} 1.11652 + 1.11653 +// Unsigned pointer compare Instructions 1.11654 +instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 1.11655 + match(Set cr (CmpP op1 op2)); 1.11656 + 1.11657 + format %{ "CMPu $op1,$op2" %} 1.11658 + opcode(0x3B); /* Opcode 3B /r */ 1.11659 + ins_encode( OpcP, RegReg( op1, op2) ); 1.11660 + ins_pipe( ialu_cr_reg_reg ); 1.11661 +%} 1.11662 + 1.11663 +instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 1.11664 + match(Set cr (CmpP op1 op2)); 1.11665 + 1.11666 + format %{ "CMPu $op1,$op2" %} 1.11667 + opcode(0x81,0x07); /* Opcode 81 /7 */ 1.11668 + ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 1.11669 + ins_pipe( ialu_cr_reg_imm ); 1.11670 +%} 1.11671 + 1.11672 +// // Cisc-spilled version of cmpP_eReg 1.11673 +instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 1.11674 + match(Set cr (CmpP op1 (LoadP op2))); 1.11675 + 1.11676 + format %{ "CMPu $op1,$op2" %} 1.11677 + ins_cost(500); 1.11678 + opcode(0x3B); /* Opcode 3B /r */ 1.11679 + ins_encode( OpcP, RegMem( op1, op2) ); 1.11680 + ins_pipe( ialu_cr_reg_mem ); 1.11681 +%} 1.11682 + 1.11683 +// // Cisc-spilled version of cmpP_eReg 1.11684 +//instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 1.11685 +// match(Set cr (CmpP (LoadP op1) op2)); 1.11686 +// 1.11687 +// format %{ "CMPu $op1,$op2" %} 1.11688 +// ins_cost(500); 1.11689 +// opcode(0x39); /* Opcode 39 /r */ 1.11690 +// ins_encode( OpcP, RegMem( op1, op2) ); 1.11691 +//%} 1.11692 + 1.11693 +// Compare raw pointer (used in out-of-heap check). 1.11694 +// Only works because non-oop pointers must be raw pointers 1.11695 +// and raw pointers have no anti-dependencies. 1.11696 +instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 1.11697 + predicate( !n->in(2)->in(2)->bottom_type()->isa_oop_ptr() ); 1.11698 + match(Set cr (CmpP op1 (LoadP op2))); 1.11699 + 1.11700 + format %{ "CMPu $op1,$op2" %} 1.11701 + opcode(0x3B); /* Opcode 3B /r */ 1.11702 + ins_encode( OpcP, RegMem( op1, op2) ); 1.11703 + ins_pipe( ialu_cr_reg_mem ); 1.11704 +%} 1.11705 + 1.11706 +// 1.11707 +// This will generate a signed flags result. This should be ok 1.11708 +// since any compare to a zero should be eq/neq. 1.11709 +instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 1.11710 + match(Set cr (CmpP src zero)); 1.11711 + 1.11712 + format %{ "TEST $src,$src" %} 1.11713 + opcode(0x85); 1.11714 + ins_encode( OpcP, RegReg( src, src ) ); 1.11715 + ins_pipe( ialu_cr_reg_imm ); 1.11716 +%} 1.11717 + 1.11718 +// Cisc-spilled version of testP_reg 1.11719 +// This will generate a signed flags result. This should be ok 1.11720 +// since any compare to a zero should be eq/neq. 1.11721 +instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ 1.11722 + match(Set cr (CmpP (LoadP op) zero)); 1.11723 + 1.11724 + format %{ "TEST $op,0xFFFFFFFF" %} 1.11725 + ins_cost(500); 1.11726 + opcode(0xF7); /* Opcode F7 /0 */ 1.11727 + ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 1.11728 + ins_pipe( ialu_cr_reg_imm ); 1.11729 +%} 1.11730 + 1.11731 +// Yanked all unsigned pointer compare operations. 1.11732 +// Pointer compares are done with CmpP which is already unsigned. 1.11733 + 1.11734 +//----------Max and Min-------------------------------------------------------- 1.11735 +// Min Instructions 1.11736 +//// 1.11737 +// *** Min and Max using the conditional move are slower than the 1.11738 +// *** branch version on a Pentium III. 1.11739 +// // Conditional move for min 1.11740 +//instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{ 1.11741 +// effect( USE_DEF op2, USE op1, USE cr ); 1.11742 +// format %{ "CMOVlt $op2,$op1\t! min" %} 1.11743 +// opcode(0x4C,0x0F); 1.11744 +// ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 1.11745 +// ins_pipe( pipe_cmov_reg ); 1.11746 +//%} 1.11747 +// 1.11748 +//// Min Register with Register (P6 version) 1.11749 +//instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{ 1.11750 +// predicate(VM_Version::supports_cmov() ); 1.11751 +// match(Set op2 (MinI op1 op2)); 1.11752 +// ins_cost(200); 1.11753 +// expand %{ 1.11754 +// eFlagsReg cr; 1.11755 +// compI_eReg(cr,op1,op2); 1.11756 +// cmovI_reg_lt(op2,op1,cr); 1.11757 +// %} 1.11758 +//%} 1.11759 + 1.11760 +// Min Register with Register (generic version) 1.11761 +instruct minI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{ 1.11762 + match(Set dst (MinI dst src)); 1.11763 + effect(KILL flags); 1.11764 + ins_cost(300); 1.11765 + 1.11766 + format %{ "MIN $dst,$src" %} 1.11767 + opcode(0xCC); 1.11768 + ins_encode( min_enc(dst,src) ); 1.11769 + ins_pipe( pipe_slow ); 1.11770 +%} 1.11771 + 1.11772 +// Max Register with Register 1.11773 +// *** Min and Max using the conditional move are slower than the 1.11774 +// *** branch version on a Pentium III. 1.11775 +// // Conditional move for max 1.11776 +//instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{ 1.11777 +// effect( USE_DEF op2, USE op1, USE cr ); 1.11778 +// format %{ "CMOVgt $op2,$op1\t! max" %} 1.11779 +// opcode(0x4F,0x0F); 1.11780 +// ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 1.11781 +// ins_pipe( pipe_cmov_reg ); 1.11782 +//%} 1.11783 +// 1.11784 +// // Max Register with Register (P6 version) 1.11785 +//instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{ 1.11786 +// predicate(VM_Version::supports_cmov() ); 1.11787 +// match(Set op2 (MaxI op1 op2)); 1.11788 +// ins_cost(200); 1.11789 +// expand %{ 1.11790 +// eFlagsReg cr; 1.11791 +// compI_eReg(cr,op1,op2); 1.11792 +// cmovI_reg_gt(op2,op1,cr); 1.11793 +// %} 1.11794 +//%} 1.11795 + 1.11796 +// Max Register with Register (generic version) 1.11797 +instruct maxI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{ 1.11798 + match(Set dst (MaxI dst src)); 1.11799 + effect(KILL flags); 1.11800 + ins_cost(300); 1.11801 + 1.11802 + format %{ "MAX $dst,$src" %} 1.11803 + opcode(0xCC); 1.11804 + ins_encode( max_enc(dst,src) ); 1.11805 + ins_pipe( pipe_slow ); 1.11806 +%} 1.11807 + 1.11808 +// ============================================================================ 1.11809 +// Branch Instructions 1.11810 +// Jump Table 1.11811 +instruct jumpXtnd(eRegI switch_val) %{ 1.11812 + match(Jump switch_val); 1.11813 + ins_cost(350); 1.11814 + 1.11815 + format %{ "JMP [table_base](,$switch_val,1)\n\t" %} 1.11816 + 1.11817 + ins_encode %{ 1.11818 + address table_base = __ address_table_constant(_index2label); 1.11819 + 1.11820 + // Jump to Address(table_base + switch_reg) 1.11821 + InternalAddress table(table_base); 1.11822 + Address index(noreg, $switch_val$$Register, Address::times_1); 1.11823 + __ jump(ArrayAddress(table, index)); 1.11824 + %} 1.11825 + ins_pc_relative(1); 1.11826 + ins_pipe(pipe_jmp); 1.11827 +%} 1.11828 + 1.11829 +// Jump Direct - Label defines a relative address from JMP+1 1.11830 +instruct jmpDir(label labl) %{ 1.11831 + match(Goto); 1.11832 + effect(USE labl); 1.11833 + 1.11834 + ins_cost(300); 1.11835 + format %{ "JMP $labl" %} 1.11836 + size(5); 1.11837 + opcode(0xE9); 1.11838 + ins_encode( OpcP, Lbl( labl ) ); 1.11839 + ins_pipe( pipe_jmp ); 1.11840 + ins_pc_relative(1); 1.11841 +%} 1.11842 + 1.11843 +// Jump Direct Conditional - Label defines a relative address from Jcc+1 1.11844 +instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 1.11845 + match(If cop cr); 1.11846 + effect(USE labl); 1.11847 + 1.11848 + ins_cost(300); 1.11849 + format %{ "J$cop $labl" %} 1.11850 + size(6); 1.11851 + opcode(0x0F, 0x80); 1.11852 + ins_encode( Jcc( cop, labl) ); 1.11853 + ins_pipe( pipe_jcc ); 1.11854 + ins_pc_relative(1); 1.11855 +%} 1.11856 + 1.11857 +// Jump Direct Conditional - Label defines a relative address from Jcc+1 1.11858 +instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 1.11859 + match(CountedLoopEnd cop cr); 1.11860 + effect(USE labl); 1.11861 + 1.11862 + ins_cost(300); 1.11863 + format %{ "J$cop $labl\t# Loop end" %} 1.11864 + size(6); 1.11865 + opcode(0x0F, 0x80); 1.11866 + ins_encode( Jcc( cop, labl) ); 1.11867 + ins_pipe( pipe_jcc ); 1.11868 + ins_pc_relative(1); 1.11869 +%} 1.11870 + 1.11871 +// Jump Direct Conditional - Label defines a relative address from Jcc+1 1.11872 +instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 1.11873 + match(CountedLoopEnd cop cmp); 1.11874 + effect(USE labl); 1.11875 + 1.11876 + ins_cost(300); 1.11877 + format %{ "J$cop,u $labl\t# Loop end" %} 1.11878 + size(6); 1.11879 + opcode(0x0F, 0x80); 1.11880 + ins_encode( Jcc( cop, labl) ); 1.11881 + ins_pipe( pipe_jcc ); 1.11882 + ins_pc_relative(1); 1.11883 +%} 1.11884 + 1.11885 +// Jump Direct Conditional - using unsigned comparison 1.11886 +instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 1.11887 + match(If cop cmp); 1.11888 + effect(USE labl); 1.11889 + 1.11890 + ins_cost(300); 1.11891 + format %{ "J$cop,u $labl" %} 1.11892 + size(6); 1.11893 + opcode(0x0F, 0x80); 1.11894 + ins_encode( Jcc( cop, labl) ); 1.11895 + ins_pipe( pipe_jcc ); 1.11896 + ins_pc_relative(1); 1.11897 +%} 1.11898 + 1.11899 +// ============================================================================ 1.11900 +// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 1.11901 +// array for an instance of the superklass. Set a hidden internal cache on a 1.11902 +// hit (cache is checked with exposed code in gen_subtype_check()). Return 1.11903 +// NZ for a miss or zero for a hit. The encoding ALSO sets flags. 1.11904 +instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 1.11905 + match(Set result (PartialSubtypeCheck sub super)); 1.11906 + effect( KILL rcx, KILL cr ); 1.11907 + 1.11908 + ins_cost(1100); // slightly larger than the next version 1.11909 + format %{ "CMPL EAX,ESI\n\t" 1.11910 + "JEQ,s hit\n\t" 1.11911 + "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 1.11912 + "MOV ECX,[EDI+arrayKlass::length]\t# length to scan\n\t" 1.11913 + "ADD EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 1.11914 + "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 1.11915 + "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 1.11916 + "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 1.11917 + "hit:\n\t" 1.11918 + "XOR $result,$result\t\t Hit: EDI zero\n\t" 1.11919 + "miss:\t" %} 1.11920 + 1.11921 + opcode(0x1); // Force a XOR of EDI 1.11922 + ins_encode( enc_PartialSubtypeCheck() ); 1.11923 + ins_pipe( pipe_slow ); 1.11924 +%} 1.11925 + 1.11926 +instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 1.11927 + match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 1.11928 + effect( KILL rcx, KILL result ); 1.11929 + 1.11930 + ins_cost(1000); 1.11931 + format %{ "CMPL EAX,ESI\n\t" 1.11932 + "JEQ,s miss\t# Actually a hit; we are done.\n\t" 1.11933 + "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 1.11934 + "MOV ECX,[EDI+arrayKlass::length]\t# length to scan\n\t" 1.11935 + "ADD EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 1.11936 + "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 1.11937 + "JNE,s miss\t\t# Missed: flags NZ\n\t" 1.11938 + "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 1.11939 + "miss:\t" %} 1.11940 + 1.11941 + opcode(0x0); // No need to XOR EDI 1.11942 + ins_encode( enc_PartialSubtypeCheck() ); 1.11943 + ins_pipe( pipe_slow ); 1.11944 +%} 1.11945 + 1.11946 +// ============================================================================ 1.11947 +// Branch Instructions -- short offset versions 1.11948 +// 1.11949 +// These instructions are used to replace jumps of a long offset (the default 1.11950 +// match) with jumps of a shorter offset. These instructions are all tagged 1.11951 +// with the ins_short_branch attribute, which causes the ADLC to suppress the 1.11952 +// match rules in general matching. Instead, the ADLC generates a conversion 1.11953 +// method in the MachNode which can be used to do in-place replacement of the 1.11954 +// long variant with the shorter variant. The compiler will determine if a 1.11955 +// branch can be taken by the is_short_branch_offset() predicate in the machine 1.11956 +// specific code section of the file. 1.11957 + 1.11958 +// Jump Direct - Label defines a relative address from JMP+1 1.11959 +instruct jmpDir_short(label labl) %{ 1.11960 + match(Goto); 1.11961 + effect(USE labl); 1.11962 + 1.11963 + ins_cost(300); 1.11964 + format %{ "JMP,s $labl" %} 1.11965 + size(2); 1.11966 + opcode(0xEB); 1.11967 + ins_encode( OpcP, LblShort( labl ) ); 1.11968 + ins_pipe( pipe_jmp ); 1.11969 + ins_pc_relative(1); 1.11970 + ins_short_branch(1); 1.11971 +%} 1.11972 + 1.11973 +// Jump Direct Conditional - Label defines a relative address from Jcc+1 1.11974 +instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 1.11975 + match(If cop cr); 1.11976 + effect(USE labl); 1.11977 + 1.11978 + ins_cost(300); 1.11979 + format %{ "J$cop,s $labl" %} 1.11980 + size(2); 1.11981 + opcode(0x70); 1.11982 + ins_encode( JccShort( cop, labl) ); 1.11983 + ins_pipe( pipe_jcc ); 1.11984 + ins_pc_relative(1); 1.11985 + ins_short_branch(1); 1.11986 +%} 1.11987 + 1.11988 +// Jump Direct Conditional - Label defines a relative address from Jcc+1 1.11989 +instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 1.11990 + match(CountedLoopEnd cop cr); 1.11991 + effect(USE labl); 1.11992 + 1.11993 + ins_cost(300); 1.11994 + format %{ "J$cop,s $labl" %} 1.11995 + size(2); 1.11996 + opcode(0x70); 1.11997 + ins_encode( JccShort( cop, labl) ); 1.11998 + ins_pipe( pipe_jcc ); 1.11999 + ins_pc_relative(1); 1.12000 + ins_short_branch(1); 1.12001 +%} 1.12002 + 1.12003 +// Jump Direct Conditional - Label defines a relative address from Jcc+1 1.12004 +instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 1.12005 + match(CountedLoopEnd cop cmp); 1.12006 + effect(USE labl); 1.12007 + 1.12008 + ins_cost(300); 1.12009 + format %{ "J$cop,us $labl" %} 1.12010 + size(2); 1.12011 + opcode(0x70); 1.12012 + ins_encode( JccShort( cop, labl) ); 1.12013 + ins_pipe( pipe_jcc ); 1.12014 + ins_pc_relative(1); 1.12015 + ins_short_branch(1); 1.12016 +%} 1.12017 + 1.12018 +// Jump Direct Conditional - using unsigned comparison 1.12019 +instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 1.12020 + match(If cop cmp); 1.12021 + effect(USE labl); 1.12022 + 1.12023 + ins_cost(300); 1.12024 + format %{ "J$cop,us $labl" %} 1.12025 + size(2); 1.12026 + opcode(0x70); 1.12027 + ins_encode( JccShort( cop, labl) ); 1.12028 + ins_pipe( pipe_jcc ); 1.12029 + ins_pc_relative(1); 1.12030 + ins_short_branch(1); 1.12031 +%} 1.12032 + 1.12033 +// ============================================================================ 1.12034 +// Long Compare 1.12035 +// 1.12036 +// Currently we hold longs in 2 registers. Comparing such values efficiently 1.12037 +// is tricky. The flavor of compare used depends on whether we are testing 1.12038 +// for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 1.12039 +// The GE test is the negated LT test. The LE test can be had by commuting 1.12040 +// the operands (yielding a GE test) and then negating; negate again for the 1.12041 +// GT test. The EQ test is done by ORcc'ing the high and low halves, and the 1.12042 +// NE test is negated from that. 1.12043 + 1.12044 +// Due to a shortcoming in the ADLC, it mixes up expressions like: 1.12045 +// (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 1.12046 +// difference between 'Y' and '0L'. The tree-matches for the CmpI sections 1.12047 +// are collapsed internally in the ADLC's dfa-gen code. The match for 1.12048 +// (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 1.12049 +// foo match ends up with the wrong leaf. One fix is to not match both 1.12050 +// reg-reg and reg-zero forms of long-compare. This is unfortunate because 1.12051 +// both forms beat the trinary form of long-compare and both are very useful 1.12052 +// on Intel which has so few registers. 1.12053 + 1.12054 +// Manifest a CmpL result in an integer register. Very painful. 1.12055 +// This is the test to avoid. 1.12056 +instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 1.12057 + match(Set dst (CmpL3 src1 src2)); 1.12058 + effect( KILL flags ); 1.12059 + ins_cost(1000); 1.12060 + format %{ "XOR $dst,$dst\n\t" 1.12061 + "CMP $src1.hi,$src2.hi\n\t" 1.12062 + "JLT,s m_one\n\t" 1.12063 + "JGT,s p_one\n\t" 1.12064 + "CMP $src1.lo,$src2.lo\n\t" 1.12065 + "JB,s m_one\n\t" 1.12066 + "JEQ,s done\n" 1.12067 + "p_one:\tINC $dst\n\t" 1.12068 + "JMP,s done\n" 1.12069 + "m_one:\tDEC $dst\n" 1.12070 + "done:" %} 1.12071 + ins_encode %{ 1.12072 + Label p_one, m_one, done; 1.12073 + __ xorl($dst$$Register, $dst$$Register); 1.12074 + __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 1.12075 + __ jccb(Assembler::less, m_one); 1.12076 + __ jccb(Assembler::greater, p_one); 1.12077 + __ cmpl($src1$$Register, $src2$$Register); 1.12078 + __ jccb(Assembler::below, m_one); 1.12079 + __ jccb(Assembler::equal, done); 1.12080 + __ bind(p_one); 1.12081 + __ increment($dst$$Register); 1.12082 + __ jmpb(done); 1.12083 + __ bind(m_one); 1.12084 + __ decrement($dst$$Register); 1.12085 + __ bind(done); 1.12086 + %} 1.12087 + ins_pipe( pipe_slow ); 1.12088 +%} 1.12089 + 1.12090 +//====== 1.12091 +// Manifest a CmpL result in the normal flags. Only good for LT or GE 1.12092 +// compares. Can be used for LE or GT compares by reversing arguments. 1.12093 +// NOT GOOD FOR EQ/NE tests. 1.12094 +instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 1.12095 + match( Set flags (CmpL src zero )); 1.12096 + ins_cost(100); 1.12097 + format %{ "TEST $src.hi,$src.hi" %} 1.12098 + opcode(0x85); 1.12099 + ins_encode( OpcP, RegReg_Hi2( src, src ) ); 1.12100 + ins_pipe( ialu_cr_reg_reg ); 1.12101 +%} 1.12102 + 1.12103 +// Manifest a CmpL result in the normal flags. Only good for LT or GE 1.12104 +// compares. Can be used for LE or GT compares by reversing arguments. 1.12105 +// NOT GOOD FOR EQ/NE tests. 1.12106 +instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, eRegI tmp ) %{ 1.12107 + match( Set flags (CmpL src1 src2 )); 1.12108 + effect( TEMP tmp ); 1.12109 + ins_cost(300); 1.12110 + format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 1.12111 + "MOV $tmp,$src1.hi\n\t" 1.12112 + "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 1.12113 + ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 1.12114 + ins_pipe( ialu_cr_reg_reg ); 1.12115 +%} 1.12116 + 1.12117 +// Long compares reg < zero/req OR reg >= zero/req. 1.12118 +// Just a wrapper for a normal branch, plus the predicate test. 1.12119 +instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 1.12120 + match(If cmp flags); 1.12121 + effect(USE labl); 1.12122 + predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 1.12123 + expand %{ 1.12124 + jmpCon(cmp,flags,labl); // JLT or JGE... 1.12125 + %} 1.12126 +%} 1.12127 + 1.12128 +// Compare 2 longs and CMOVE longs. 1.12129 +instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 1.12130 + match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 1.12131 + predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 1.12132 + ins_cost(400); 1.12133 + format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 1.12134 + "CMOV$cmp $dst.hi,$src.hi" %} 1.12135 + opcode(0x0F,0x40); 1.12136 + ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 1.12137 + ins_pipe( pipe_cmov_reg_long ); 1.12138 +%} 1.12139 + 1.12140 +instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 1.12141 + match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 1.12142 + predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 1.12143 + ins_cost(500); 1.12144 + format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 1.12145 + "CMOV$cmp $dst.hi,$src.hi" %} 1.12146 + opcode(0x0F,0x40); 1.12147 + ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 1.12148 + ins_pipe( pipe_cmov_reg_long ); 1.12149 +%} 1.12150 + 1.12151 +// Compare 2 longs and CMOVE ints. 1.12152 +instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, eRegI src) %{ 1.12153 + predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 1.12154 + match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 1.12155 + ins_cost(200); 1.12156 + format %{ "CMOV$cmp $dst,$src" %} 1.12157 + opcode(0x0F,0x40); 1.12158 + ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 1.12159 + ins_pipe( pipe_cmov_reg ); 1.12160 +%} 1.12161 + 1.12162 +instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, memory src) %{ 1.12163 + predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 1.12164 + match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 1.12165 + ins_cost(250); 1.12166 + format %{ "CMOV$cmp $dst,$src" %} 1.12167 + opcode(0x0F,0x40); 1.12168 + ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 1.12169 + ins_pipe( pipe_cmov_mem ); 1.12170 +%} 1.12171 + 1.12172 +// Compare 2 longs and CMOVE ints. 1.12173 +instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 1.12174 + predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 1.12175 + match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 1.12176 + ins_cost(200); 1.12177 + format %{ "CMOV$cmp $dst,$src" %} 1.12178 + opcode(0x0F,0x40); 1.12179 + ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 1.12180 + ins_pipe( pipe_cmov_reg ); 1.12181 +%} 1.12182 + 1.12183 +// Compare 2 longs and CMOVE doubles 1.12184 +instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 1.12185 + predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 1.12186 + match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 1.12187 + ins_cost(200); 1.12188 + expand %{ 1.12189 + fcmovD_regS(cmp,flags,dst,src); 1.12190 + %} 1.12191 +%} 1.12192 + 1.12193 +// Compare 2 longs and CMOVE doubles 1.12194 +instruct cmovXDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regXD dst, regXD src) %{ 1.12195 + predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 1.12196 + match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 1.12197 + ins_cost(200); 1.12198 + expand %{ 1.12199 + fcmovXD_regS(cmp,flags,dst,src); 1.12200 + %} 1.12201 +%} 1.12202 + 1.12203 +instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 1.12204 + predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 1.12205 + match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 1.12206 + ins_cost(200); 1.12207 + expand %{ 1.12208 + fcmovF_regS(cmp,flags,dst,src); 1.12209 + %} 1.12210 +%} 1.12211 + 1.12212 +instruct cmovXX_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regX dst, regX src) %{ 1.12213 + predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 1.12214 + match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 1.12215 + ins_cost(200); 1.12216 + expand %{ 1.12217 + fcmovX_regS(cmp,flags,dst,src); 1.12218 + %} 1.12219 +%} 1.12220 + 1.12221 +//====== 1.12222 +// Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 1.12223 +instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, eRegI tmp ) %{ 1.12224 + match( Set flags (CmpL src zero )); 1.12225 + effect(TEMP tmp); 1.12226 + ins_cost(200); 1.12227 + format %{ "MOV $tmp,$src.lo\n\t" 1.12228 + "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 1.12229 + ins_encode( long_cmp_flags0( src, tmp ) ); 1.12230 + ins_pipe( ialu_reg_reg_long ); 1.12231 +%} 1.12232 + 1.12233 +// Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 1.12234 +instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 1.12235 + match( Set flags (CmpL src1 src2 )); 1.12236 + ins_cost(200+300); 1.12237 + format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 1.12238 + "JNE,s skip\n\t" 1.12239 + "CMP $src1.hi,$src2.hi\n\t" 1.12240 + "skip:\t" %} 1.12241 + ins_encode( long_cmp_flags1( src1, src2 ) ); 1.12242 + ins_pipe( ialu_cr_reg_reg ); 1.12243 +%} 1.12244 + 1.12245 +// Long compare reg == zero/reg OR reg != zero/reg 1.12246 +// Just a wrapper for a normal branch, plus the predicate test. 1.12247 +instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 1.12248 + match(If cmp flags); 1.12249 + effect(USE labl); 1.12250 + predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 1.12251 + expand %{ 1.12252 + jmpCon(cmp,flags,labl); // JEQ or JNE... 1.12253 + %} 1.12254 +%} 1.12255 + 1.12256 +// Compare 2 longs and CMOVE longs. 1.12257 +instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 1.12258 + match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 1.12259 + predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 1.12260 + ins_cost(400); 1.12261 + format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 1.12262 + "CMOV$cmp $dst.hi,$src.hi" %} 1.12263 + opcode(0x0F,0x40); 1.12264 + ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 1.12265 + ins_pipe( pipe_cmov_reg_long ); 1.12266 +%} 1.12267 + 1.12268 +instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 1.12269 + match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 1.12270 + predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 1.12271 + ins_cost(500); 1.12272 + format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 1.12273 + "CMOV$cmp $dst.hi,$src.hi" %} 1.12274 + opcode(0x0F,0x40); 1.12275 + ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 1.12276 + ins_pipe( pipe_cmov_reg_long ); 1.12277 +%} 1.12278 + 1.12279 +// Compare 2 longs and CMOVE ints. 1.12280 +instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, eRegI src) %{ 1.12281 + predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 1.12282 + match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 1.12283 + ins_cost(200); 1.12284 + format %{ "CMOV$cmp $dst,$src" %} 1.12285 + opcode(0x0F,0x40); 1.12286 + ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 1.12287 + ins_pipe( pipe_cmov_reg ); 1.12288 +%} 1.12289 + 1.12290 +instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, memory src) %{ 1.12291 + predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 1.12292 + match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 1.12293 + ins_cost(250); 1.12294 + format %{ "CMOV$cmp $dst,$src" %} 1.12295 + opcode(0x0F,0x40); 1.12296 + ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 1.12297 + ins_pipe( pipe_cmov_mem ); 1.12298 +%} 1.12299 + 1.12300 +// Compare 2 longs and CMOVE ints. 1.12301 +instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 1.12302 + predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 1.12303 + match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 1.12304 + ins_cost(200); 1.12305 + format %{ "CMOV$cmp $dst,$src" %} 1.12306 + opcode(0x0F,0x40); 1.12307 + ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 1.12308 + ins_pipe( pipe_cmov_reg ); 1.12309 +%} 1.12310 + 1.12311 +// Compare 2 longs and CMOVE doubles 1.12312 +instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 1.12313 + predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 1.12314 + match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 1.12315 + ins_cost(200); 1.12316 + expand %{ 1.12317 + fcmovD_regS(cmp,flags,dst,src); 1.12318 + %} 1.12319 +%} 1.12320 + 1.12321 +// Compare 2 longs and CMOVE doubles 1.12322 +instruct cmovXDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regXD dst, regXD src) %{ 1.12323 + predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 1.12324 + match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 1.12325 + ins_cost(200); 1.12326 + expand %{ 1.12327 + fcmovXD_regS(cmp,flags,dst,src); 1.12328 + %} 1.12329 +%} 1.12330 + 1.12331 +instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 1.12332 + predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 1.12333 + match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 1.12334 + ins_cost(200); 1.12335 + expand %{ 1.12336 + fcmovF_regS(cmp,flags,dst,src); 1.12337 + %} 1.12338 +%} 1.12339 + 1.12340 +instruct cmovXX_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regX dst, regX src) %{ 1.12341 + predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 1.12342 + match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 1.12343 + ins_cost(200); 1.12344 + expand %{ 1.12345 + fcmovX_regS(cmp,flags,dst,src); 1.12346 + %} 1.12347 +%} 1.12348 + 1.12349 +//====== 1.12350 +// Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 1.12351 +// Same as cmpL_reg_flags_LEGT except must negate src 1.12352 +instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, eRegI tmp ) %{ 1.12353 + match( Set flags (CmpL src zero )); 1.12354 + effect( TEMP tmp ); 1.12355 + ins_cost(300); 1.12356 + format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 1.12357 + "CMP $tmp,$src.lo\n\t" 1.12358 + "SBB $tmp,$src.hi\n\t" %} 1.12359 + ins_encode( long_cmp_flags3(src, tmp) ); 1.12360 + ins_pipe( ialu_reg_reg_long ); 1.12361 +%} 1.12362 + 1.12363 +// Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 1.12364 +// Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 1.12365 +// requires a commuted test to get the same result. 1.12366 +instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, eRegI tmp ) %{ 1.12367 + match( Set flags (CmpL src1 src2 )); 1.12368 + effect( TEMP tmp ); 1.12369 + ins_cost(300); 1.12370 + format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 1.12371 + "MOV $tmp,$src2.hi\n\t" 1.12372 + "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 1.12373 + ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 1.12374 + ins_pipe( ialu_cr_reg_reg ); 1.12375 +%} 1.12376 + 1.12377 +// Long compares reg < zero/req OR reg >= zero/req. 1.12378 +// Just a wrapper for a normal branch, plus the predicate test 1.12379 +instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 1.12380 + match(If cmp flags); 1.12381 + effect(USE labl); 1.12382 + predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 1.12383 + ins_cost(300); 1.12384 + expand %{ 1.12385 + jmpCon(cmp,flags,labl); // JGT or JLE... 1.12386 + %} 1.12387 +%} 1.12388 + 1.12389 +// Compare 2 longs and CMOVE longs. 1.12390 +instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 1.12391 + match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 1.12392 + predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 1.12393 + ins_cost(400); 1.12394 + format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 1.12395 + "CMOV$cmp $dst.hi,$src.hi" %} 1.12396 + opcode(0x0F,0x40); 1.12397 + ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 1.12398 + ins_pipe( pipe_cmov_reg_long ); 1.12399 +%} 1.12400 + 1.12401 +instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 1.12402 + match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 1.12403 + predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 1.12404 + ins_cost(500); 1.12405 + format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 1.12406 + "CMOV$cmp $dst.hi,$src.hi+4" %} 1.12407 + opcode(0x0F,0x40); 1.12408 + ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 1.12409 + ins_pipe( pipe_cmov_reg_long ); 1.12410 +%} 1.12411 + 1.12412 +// Compare 2 longs and CMOVE ints. 1.12413 +instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, eRegI src) %{ 1.12414 + predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 1.12415 + match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 1.12416 + ins_cost(200); 1.12417 + format %{ "CMOV$cmp $dst,$src" %} 1.12418 + opcode(0x0F,0x40); 1.12419 + ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 1.12420 + ins_pipe( pipe_cmov_reg ); 1.12421 +%} 1.12422 + 1.12423 +instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, memory src) %{ 1.12424 + predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 1.12425 + match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 1.12426 + ins_cost(250); 1.12427 + format %{ "CMOV$cmp $dst,$src" %} 1.12428 + opcode(0x0F,0x40); 1.12429 + ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 1.12430 + ins_pipe( pipe_cmov_mem ); 1.12431 +%} 1.12432 + 1.12433 +// Compare 2 longs and CMOVE ptrs. 1.12434 +instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 1.12435 + predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 1.12436 + match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 1.12437 + ins_cost(200); 1.12438 + format %{ "CMOV$cmp $dst,$src" %} 1.12439 + opcode(0x0F,0x40); 1.12440 + ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 1.12441 + ins_pipe( pipe_cmov_reg ); 1.12442 +%} 1.12443 + 1.12444 +// Compare 2 longs and CMOVE doubles 1.12445 +instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 1.12446 + predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 1.12447 + match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 1.12448 + ins_cost(200); 1.12449 + expand %{ 1.12450 + fcmovD_regS(cmp,flags,dst,src); 1.12451 + %} 1.12452 +%} 1.12453 + 1.12454 +// Compare 2 longs and CMOVE doubles 1.12455 +instruct cmovXDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regXD dst, regXD src) %{ 1.12456 + predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 1.12457 + match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 1.12458 + ins_cost(200); 1.12459 + expand %{ 1.12460 + fcmovXD_regS(cmp,flags,dst,src); 1.12461 + %} 1.12462 +%} 1.12463 + 1.12464 +instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 1.12465 + predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 1.12466 + match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 1.12467 + ins_cost(200); 1.12468 + expand %{ 1.12469 + fcmovF_regS(cmp,flags,dst,src); 1.12470 + %} 1.12471 +%} 1.12472 + 1.12473 + 1.12474 +instruct cmovXX_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regX dst, regX src) %{ 1.12475 + predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 1.12476 + match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 1.12477 + ins_cost(200); 1.12478 + expand %{ 1.12479 + fcmovX_regS(cmp,flags,dst,src); 1.12480 + %} 1.12481 +%} 1.12482 + 1.12483 + 1.12484 +// ============================================================================ 1.12485 +// Procedure Call/Return Instructions 1.12486 +// Call Java Static Instruction 1.12487 +// Note: If this code changes, the corresponding ret_addr_offset() and 1.12488 +// compute_padding() functions will have to be adjusted. 1.12489 +instruct CallStaticJavaDirect(method meth) %{ 1.12490 + match(CallStaticJava); 1.12491 + effect(USE meth); 1.12492 + 1.12493 + ins_cost(300); 1.12494 + format %{ "CALL,static " %} 1.12495 + opcode(0xE8); /* E8 cd */ 1.12496 + ins_encode( pre_call_FPU, 1.12497 + Java_Static_Call( meth ), 1.12498 + call_epilog, 1.12499 + post_call_FPU ); 1.12500 + ins_pipe( pipe_slow ); 1.12501 + ins_pc_relative(1); 1.12502 + ins_alignment(4); 1.12503 +%} 1.12504 + 1.12505 +// Call Java Dynamic Instruction 1.12506 +// Note: If this code changes, the corresponding ret_addr_offset() and 1.12507 +// compute_padding() functions will have to be adjusted. 1.12508 +instruct CallDynamicJavaDirect(method meth) %{ 1.12509 + match(CallDynamicJava); 1.12510 + effect(USE meth); 1.12511 + 1.12512 + ins_cost(300); 1.12513 + format %{ "MOV EAX,(oop)-1\n\t" 1.12514 + "CALL,dynamic" %} 1.12515 + opcode(0xE8); /* E8 cd */ 1.12516 + ins_encode( pre_call_FPU, 1.12517 + Java_Dynamic_Call( meth ), 1.12518 + call_epilog, 1.12519 + post_call_FPU ); 1.12520 + ins_pipe( pipe_slow ); 1.12521 + ins_pc_relative(1); 1.12522 + ins_alignment(4); 1.12523 +%} 1.12524 + 1.12525 +// Call Runtime Instruction 1.12526 +instruct CallRuntimeDirect(method meth) %{ 1.12527 + match(CallRuntime ); 1.12528 + effect(USE meth); 1.12529 + 1.12530 + ins_cost(300); 1.12531 + format %{ "CALL,runtime " %} 1.12532 + opcode(0xE8); /* E8 cd */ 1.12533 + // Use FFREEs to clear entries in float stack 1.12534 + ins_encode( pre_call_FPU, 1.12535 + FFree_Float_Stack_All, 1.12536 + Java_To_Runtime( meth ), 1.12537 + post_call_FPU ); 1.12538 + ins_pipe( pipe_slow ); 1.12539 + ins_pc_relative(1); 1.12540 +%} 1.12541 + 1.12542 +// Call runtime without safepoint 1.12543 +instruct CallLeafDirect(method meth) %{ 1.12544 + match(CallLeaf); 1.12545 + effect(USE meth); 1.12546 + 1.12547 + ins_cost(300); 1.12548 + format %{ "CALL_LEAF,runtime " %} 1.12549 + opcode(0xE8); /* E8 cd */ 1.12550 + ins_encode( pre_call_FPU, 1.12551 + FFree_Float_Stack_All, 1.12552 + Java_To_Runtime( meth ), 1.12553 + Verify_FPU_For_Leaf, post_call_FPU ); 1.12554 + ins_pipe( pipe_slow ); 1.12555 + ins_pc_relative(1); 1.12556 +%} 1.12557 + 1.12558 +instruct CallLeafNoFPDirect(method meth) %{ 1.12559 + match(CallLeafNoFP); 1.12560 + effect(USE meth); 1.12561 + 1.12562 + ins_cost(300); 1.12563 + format %{ "CALL_LEAF_NOFP,runtime " %} 1.12564 + opcode(0xE8); /* E8 cd */ 1.12565 + ins_encode(Java_To_Runtime(meth)); 1.12566 + ins_pipe( pipe_slow ); 1.12567 + ins_pc_relative(1); 1.12568 +%} 1.12569 + 1.12570 + 1.12571 +// Return Instruction 1.12572 +// Remove the return address & jump to it. 1.12573 +instruct Ret() %{ 1.12574 + match(Return); 1.12575 + format %{ "RET" %} 1.12576 + opcode(0xC3); 1.12577 + ins_encode(OpcP); 1.12578 + ins_pipe( pipe_jmp ); 1.12579 +%} 1.12580 + 1.12581 +// Tail Call; Jump from runtime stub to Java code. 1.12582 +// Also known as an 'interprocedural jump'. 1.12583 +// Target of jump will eventually return to caller. 1.12584 +// TailJump below removes the return address. 1.12585 +instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ 1.12586 + match(TailCall jump_target method_oop ); 1.12587 + ins_cost(300); 1.12588 + format %{ "JMP $jump_target \t# EBX holds method oop" %} 1.12589 + opcode(0xFF, 0x4); /* Opcode FF /4 */ 1.12590 + ins_encode( OpcP, RegOpc(jump_target) ); 1.12591 + ins_pipe( pipe_jmp ); 1.12592 +%} 1.12593 + 1.12594 + 1.12595 +// Tail Jump; remove the return address; jump to target. 1.12596 +// TailCall above leaves the return address around. 1.12597 +instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 1.12598 + match( TailJump jump_target ex_oop ); 1.12599 + ins_cost(300); 1.12600 + format %{ "POP EDX\t# pop return address into dummy\n\t" 1.12601 + "JMP $jump_target " %} 1.12602 + opcode(0xFF, 0x4); /* Opcode FF /4 */ 1.12603 + ins_encode( enc_pop_rdx, 1.12604 + OpcP, RegOpc(jump_target) ); 1.12605 + ins_pipe( pipe_jmp ); 1.12606 +%} 1.12607 + 1.12608 +// Create exception oop: created by stack-crawling runtime code. 1.12609 +// Created exception is now available to this handler, and is setup 1.12610 +// just prior to jumping to this handler. No code emitted. 1.12611 +instruct CreateException( eAXRegP ex_oop ) 1.12612 +%{ 1.12613 + match(Set ex_oop (CreateEx)); 1.12614 + 1.12615 + size(0); 1.12616 + // use the following format syntax 1.12617 + format %{ "# exception oop is in EAX; no code emitted" %} 1.12618 + ins_encode(); 1.12619 + ins_pipe( empty ); 1.12620 +%} 1.12621 + 1.12622 + 1.12623 +// Rethrow exception: 1.12624 +// The exception oop will come in the first argument position. 1.12625 +// Then JUMP (not call) to the rethrow stub code. 1.12626 +instruct RethrowException() 1.12627 +%{ 1.12628 + match(Rethrow); 1.12629 + 1.12630 + // use the following format syntax 1.12631 + format %{ "JMP rethrow_stub" %} 1.12632 + ins_encode(enc_rethrow); 1.12633 + ins_pipe( pipe_jmp ); 1.12634 +%} 1.12635 + 1.12636 +// inlined locking and unlocking 1.12637 + 1.12638 + 1.12639 +instruct cmpFastLock( eFlagsReg cr, eRegP object, eRegP box, eAXRegI tmp, eRegP scr) %{ 1.12640 + match( Set cr (FastLock object box) ); 1.12641 + effect( TEMP tmp, TEMP scr ); 1.12642 + ins_cost(300); 1.12643 + format %{ "FASTLOCK $object, $box KILLS $tmp,$scr" %} 1.12644 + ins_encode( Fast_Lock(object,box,tmp,scr) ); 1.12645 + ins_pipe( pipe_slow ); 1.12646 + ins_pc_relative(1); 1.12647 +%} 1.12648 + 1.12649 +instruct cmpFastUnlock( eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 1.12650 + match( Set cr (FastUnlock object box) ); 1.12651 + effect( TEMP tmp ); 1.12652 + ins_cost(300); 1.12653 + format %{ "FASTUNLOCK $object, $box, $tmp" %} 1.12654 + ins_encode( Fast_Unlock(object,box,tmp) ); 1.12655 + ins_pipe( pipe_slow ); 1.12656 + ins_pc_relative(1); 1.12657 +%} 1.12658 + 1.12659 + 1.12660 + 1.12661 +// ============================================================================ 1.12662 +// Safepoint Instruction 1.12663 +instruct safePoint_poll(eFlagsReg cr) %{ 1.12664 + match(SafePoint); 1.12665 + effect(KILL cr); 1.12666 + 1.12667 + // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page. 1.12668 + // On SPARC that might be acceptable as we can generate the address with 1.12669 + // just a sethi, saving an or. By polling at offset 0 we can end up 1.12670 + // putting additional pressure on the index-0 in the D$. Because of 1.12671 + // alignment (just like the situation at hand) the lower indices tend 1.12672 + // to see more traffic. It'd be better to change the polling address 1.12673 + // to offset 0 of the last $line in the polling page. 1.12674 + 1.12675 + format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %} 1.12676 + ins_cost(125); 1.12677 + size(6) ; 1.12678 + ins_encode( Safepoint_Poll() ); 1.12679 + ins_pipe( ialu_reg_mem ); 1.12680 +%} 1.12681 + 1.12682 +//----------PEEPHOLE RULES----------------------------------------------------- 1.12683 +// These must follow all instruction definitions as they use the names 1.12684 +// defined in the instructions definitions. 1.12685 +// 1.12686 +// peepmatch ( root_instr_name [preceeding_instruction]* ); 1.12687 +// 1.12688 +// peepconstraint %{ 1.12689 +// (instruction_number.operand_name relational_op instruction_number.operand_name 1.12690 +// [, ...] ); 1.12691 +// // instruction numbers are zero-based using left to right order in peepmatch 1.12692 +// 1.12693 +// peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 1.12694 +// // provide an instruction_number.operand_name for each operand that appears 1.12695 +// // in the replacement instruction's match rule 1.12696 +// 1.12697 +// ---------VM FLAGS--------------------------------------------------------- 1.12698 +// 1.12699 +// All peephole optimizations can be turned off using -XX:-OptoPeephole 1.12700 +// 1.12701 +// Each peephole rule is given an identifying number starting with zero and 1.12702 +// increasing by one in the order seen by the parser. An individual peephole 1.12703 +// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 1.12704 +// on the command-line. 1.12705 +// 1.12706 +// ---------CURRENT LIMITATIONS---------------------------------------------- 1.12707 +// 1.12708 +// Only match adjacent instructions in same basic block 1.12709 +// Only equality constraints 1.12710 +// Only constraints between operands, not (0.dest_reg == EAX_enc) 1.12711 +// Only one replacement instruction 1.12712 +// 1.12713 +// ---------EXAMPLE---------------------------------------------------------- 1.12714 +// 1.12715 +// // pertinent parts of existing instructions in architecture description 1.12716 +// instruct movI(eRegI dst, eRegI src) %{ 1.12717 +// match(Set dst (CopyI src)); 1.12718 +// %} 1.12719 +// 1.12720 +// instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{ 1.12721 +// match(Set dst (AddI dst src)); 1.12722 +// effect(KILL cr); 1.12723 +// %} 1.12724 +// 1.12725 +// // Change (inc mov) to lea 1.12726 +// peephole %{ 1.12727 +// // increment preceeded by register-register move 1.12728 +// peepmatch ( incI_eReg movI ); 1.12729 +// // require that the destination register of the increment 1.12730 +// // match the destination register of the move 1.12731 +// peepconstraint ( 0.dst == 1.dst ); 1.12732 +// // construct a replacement instruction that sets 1.12733 +// // the destination to ( move's source register + one ) 1.12734 +// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 1.12735 +// %} 1.12736 +// 1.12737 +// Implementation no longer uses movX instructions since 1.12738 +// machine-independent system no longer uses CopyX nodes. 1.12739 +// 1.12740 +// peephole %{ 1.12741 +// peepmatch ( incI_eReg movI ); 1.12742 +// peepconstraint ( 0.dst == 1.dst ); 1.12743 +// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 1.12744 +// %} 1.12745 +// 1.12746 +// peephole %{ 1.12747 +// peepmatch ( decI_eReg movI ); 1.12748 +// peepconstraint ( 0.dst == 1.dst ); 1.12749 +// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 1.12750 +// %} 1.12751 +// 1.12752 +// peephole %{ 1.12753 +// peepmatch ( addI_eReg_imm movI ); 1.12754 +// peepconstraint ( 0.dst == 1.dst ); 1.12755 +// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 1.12756 +// %} 1.12757 +// 1.12758 +// peephole %{ 1.12759 +// peepmatch ( addP_eReg_imm movP ); 1.12760 +// peepconstraint ( 0.dst == 1.dst ); 1.12761 +// peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 1.12762 +// %} 1.12763 + 1.12764 +// // Change load of spilled value to only a spill 1.12765 +// instruct storeI(memory mem, eRegI src) %{ 1.12766 +// match(Set mem (StoreI mem src)); 1.12767 +// %} 1.12768 +// 1.12769 +// instruct loadI(eRegI dst, memory mem) %{ 1.12770 +// match(Set dst (LoadI mem)); 1.12771 +// %} 1.12772 +// 1.12773 +peephole %{ 1.12774 + peepmatch ( loadI storeI ); 1.12775 + peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 1.12776 + peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 1.12777 +%} 1.12778 + 1.12779 +//----------SMARTSPILL RULES--------------------------------------------------- 1.12780 +// These must follow all instruction definitions as they use the names 1.12781 +// defined in the instructions definitions.