src/cpu/x86/vm/x86_64.ad

changeset 0
f90c822e73f8
child 6876
710a3c8b516e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/cpu/x86/vm/x86_64.ad	Wed Apr 27 01:25:04 2016 +0800
     1.3 @@ -0,0 +1,11726 @@
     1.4 +//
     1.5 +// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
     1.6 +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.7 +//
     1.8 +// This code is free software; you can redistribute it and/or modify it
     1.9 +// under the terms of the GNU General Public License version 2 only, as
    1.10 +// published by the Free Software Foundation.
    1.11 +//
    1.12 +// This code is distributed in the hope that it will be useful, but WITHOUT
    1.13 +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.14 +// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.15 +// version 2 for more details (a copy is included in the LICENSE file that
    1.16 +// accompanied this code).
    1.17 +//
    1.18 +// You should have received a copy of the GNU General Public License version
    1.19 +// 2 along with this work; if not, write to the Free Software Foundation,
    1.20 +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.21 +//
    1.22 +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    1.23 +// or visit www.oracle.com if you need additional information or have any
    1.24 +// questions.
    1.25 +//
    1.26 +//
    1.27 +
    1.28 +// AMD64 Architecture Description File
    1.29 +
    1.30 +//----------REGISTER DEFINITION BLOCK------------------------------------------
    1.31 +// This information is used by the matcher and the register allocator to
    1.32 +// describe individual registers and classes of registers within the target
    1.33 +// archtecture.
    1.34 +
    1.35 +register %{
    1.36 +//----------Architecture Description Register Definitions----------------------
    1.37 +// General Registers
    1.38 +// "reg_def"  name ( register save type, C convention save type,
    1.39 +//                   ideal register type, encoding );
    1.40 +// Register Save Types:
    1.41 +//
    1.42 +// NS  = No-Save:       The register allocator assumes that these registers
    1.43 +//                      can be used without saving upon entry to the method, &
    1.44 +//                      that they do not need to be saved at call sites.
    1.45 +//
    1.46 +// SOC = Save-On-Call:  The register allocator assumes that these registers
    1.47 +//                      can be used without saving upon entry to the method,
    1.48 +//                      but that they must be saved at call sites.
    1.49 +//
    1.50 +// SOE = Save-On-Entry: The register allocator assumes that these registers
    1.51 +//                      must be saved before using them upon entry to the
    1.52 +//                      method, but they do not need to be saved at call
    1.53 +//                      sites.
    1.54 +//
    1.55 +// AS  = Always-Save:   The register allocator assumes that these registers
    1.56 +//                      must be saved before using them upon entry to the
    1.57 +//                      method, & that they must be saved at call sites.
    1.58 +//
    1.59 +// Ideal Register Type is used to determine how to save & restore a
    1.60 +// register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
    1.61 +// spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
    1.62 +//
    1.63 +// The encoding number is the actual bit-pattern placed into the opcodes.
    1.64 +
    1.65 +// General Registers
    1.66 +// R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
    1.67 +// used as byte registers)
    1.68 +
    1.69 +// Previously set RBX, RSI, and RDI as save-on-entry for java code
    1.70 +// Turn off SOE in java-code due to frequent use of uncommon-traps.
    1.71 +// Now that allocator is better, turn on RSI and RDI as SOE registers.
    1.72 +
    1.73 +reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
    1.74 +reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
    1.75 +
    1.76 +reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
    1.77 +reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
    1.78 +
    1.79 +reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
    1.80 +reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
    1.81 +
    1.82 +reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
    1.83 +reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
    1.84 +
    1.85 +reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
    1.86 +reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
    1.87 +
    1.88 +// now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
    1.89 +reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
    1.90 +reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
    1.91 +
    1.92 +#ifdef _WIN64
    1.93 +
    1.94 +reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
    1.95 +reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
    1.96 +
    1.97 +reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
    1.98 +reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
    1.99 +
   1.100 +#else
   1.101 +
   1.102 +reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
   1.103 +reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
   1.104 +
   1.105 +reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
   1.106 +reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
   1.107 +
   1.108 +#endif
   1.109 +
   1.110 +reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
   1.111 +reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
   1.112 +
   1.113 +reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
   1.114 +reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
   1.115 +
   1.116 +reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
   1.117 +reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
   1.118 +
   1.119 +reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
   1.120 +reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
   1.121 +
   1.122 +reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
   1.123 +reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
   1.124 +
   1.125 +reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
   1.126 +reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
   1.127 +
   1.128 +reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
   1.129 +reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
   1.130 +
   1.131 +reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
   1.132 +reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
   1.133 +
   1.134 +
   1.135 +// Floating Point Registers
   1.136 +
   1.137 +// Specify priority of register selection within phases of register
   1.138 +// allocation.  Highest priority is first.  A useful heuristic is to
   1.139 +// give registers a low priority when they are required by machine
   1.140 +// instructions, like EAX and EDX on I486, and choose no-save registers
   1.141 +// before save-on-call, & save-on-call before save-on-entry.  Registers
   1.142 +// which participate in fixed calling sequences should come last.
   1.143 +// Registers which are used as pairs must fall on an even boundary.
   1.144 +
   1.145 +alloc_class chunk0(R10,         R10_H,
   1.146 +                   R11,         R11_H,
   1.147 +                   R8,          R8_H,
   1.148 +                   R9,          R9_H,
   1.149 +                   R12,         R12_H,
   1.150 +                   RCX,         RCX_H,
   1.151 +                   RBX,         RBX_H,
   1.152 +                   RDI,         RDI_H,
   1.153 +                   RDX,         RDX_H,
   1.154 +                   RSI,         RSI_H,
   1.155 +                   RAX,         RAX_H,
   1.156 +                   RBP,         RBP_H,
   1.157 +                   R13,         R13_H,
   1.158 +                   R14,         R14_H,
   1.159 +                   R15,         R15_H,
   1.160 +                   RSP,         RSP_H);
   1.161 +
   1.162 +
   1.163 +//----------Architecture Description Register Classes--------------------------
   1.164 +// Several register classes are automatically defined based upon information in
   1.165 +// this architecture description.
   1.166 +// 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
   1.167 +// 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
   1.168 +// 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
   1.169 +// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
   1.170 +//
   1.171 +
   1.172 +// Class for all pointer registers (including RSP)
   1.173 +reg_class any_reg(RAX, RAX_H,
   1.174 +                  RDX, RDX_H,
   1.175 +                  RBP, RBP_H,
   1.176 +                  RDI, RDI_H,
   1.177 +                  RSI, RSI_H,
   1.178 +                  RCX, RCX_H,
   1.179 +                  RBX, RBX_H,
   1.180 +                  RSP, RSP_H,
   1.181 +                  R8,  R8_H,
   1.182 +                  R9,  R9_H,
   1.183 +                  R10, R10_H,
   1.184 +                  R11, R11_H,
   1.185 +                  R12, R12_H,
   1.186 +                  R13, R13_H,
   1.187 +                  R14, R14_H,
   1.188 +                  R15, R15_H);
   1.189 +
   1.190 +// Class for all pointer registers except RSP
   1.191 +reg_class ptr_reg(RAX, RAX_H,
   1.192 +                  RDX, RDX_H,
   1.193 +                  RBP, RBP_H,
   1.194 +                  RDI, RDI_H,
   1.195 +                  RSI, RSI_H,
   1.196 +                  RCX, RCX_H,
   1.197 +                  RBX, RBX_H,
   1.198 +                  R8,  R8_H,
   1.199 +                  R9,  R9_H,
   1.200 +                  R10, R10_H,
   1.201 +                  R11, R11_H,
   1.202 +                  R13, R13_H,
   1.203 +                  R14, R14_H);
   1.204 +
   1.205 +// Class for all pointer registers except RAX and RSP
   1.206 +reg_class ptr_no_rax_reg(RDX, RDX_H,
   1.207 +                         RBP, RBP_H,
   1.208 +                         RDI, RDI_H,
   1.209 +                         RSI, RSI_H,
   1.210 +                         RCX, RCX_H,
   1.211 +                         RBX, RBX_H,
   1.212 +                         R8,  R8_H,
   1.213 +                         R9,  R9_H,
   1.214 +                         R10, R10_H,
   1.215 +                         R11, R11_H,
   1.216 +                         R13, R13_H,
   1.217 +                         R14, R14_H);
   1.218 +
   1.219 +reg_class ptr_no_rbp_reg(RDX, RDX_H,
   1.220 +                         RAX, RAX_H,
   1.221 +                         RDI, RDI_H,
   1.222 +                         RSI, RSI_H,
   1.223 +                         RCX, RCX_H,
   1.224 +                         RBX, RBX_H,
   1.225 +                         R8,  R8_H,
   1.226 +                         R9,  R9_H,
   1.227 +                         R10, R10_H,
   1.228 +                         R11, R11_H,
   1.229 +                         R13, R13_H,
   1.230 +                         R14, R14_H);
   1.231 +
   1.232 +// Class for all pointer registers except RAX, RBX and RSP
   1.233 +reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
   1.234 +                             RBP, RBP_H,
   1.235 +                             RDI, RDI_H,
   1.236 +                             RSI, RSI_H,
   1.237 +                             RCX, RCX_H,
   1.238 +                             R8,  R8_H,
   1.239 +                             R9,  R9_H,
   1.240 +                             R10, R10_H,
   1.241 +                             R11, R11_H,
   1.242 +                             R13, R13_H,
   1.243 +                             R14, R14_H);
   1.244 +
   1.245 +// Singleton class for RAX pointer register
   1.246 +reg_class ptr_rax_reg(RAX, RAX_H);
   1.247 +
   1.248 +// Singleton class for RBX pointer register
   1.249 +reg_class ptr_rbx_reg(RBX, RBX_H);
   1.250 +
   1.251 +// Singleton class for RSI pointer register
   1.252 +reg_class ptr_rsi_reg(RSI, RSI_H);
   1.253 +
   1.254 +// Singleton class for RDI pointer register
   1.255 +reg_class ptr_rdi_reg(RDI, RDI_H);
   1.256 +
   1.257 +// Singleton class for RBP pointer register
   1.258 +reg_class ptr_rbp_reg(RBP, RBP_H);
   1.259 +
   1.260 +// Singleton class for stack pointer
   1.261 +reg_class ptr_rsp_reg(RSP, RSP_H);
   1.262 +
   1.263 +// Singleton class for TLS pointer
   1.264 +reg_class ptr_r15_reg(R15, R15_H);
   1.265 +
   1.266 +// Class for all long registers (except RSP)
   1.267 +reg_class long_reg(RAX, RAX_H,
   1.268 +                   RDX, RDX_H,
   1.269 +                   RBP, RBP_H,
   1.270 +                   RDI, RDI_H,
   1.271 +                   RSI, RSI_H,
   1.272 +                   RCX, RCX_H,
   1.273 +                   RBX, RBX_H,
   1.274 +                   R8,  R8_H,
   1.275 +                   R9,  R9_H,
   1.276 +                   R10, R10_H,
   1.277 +                   R11, R11_H,
   1.278 +                   R13, R13_H,
   1.279 +                   R14, R14_H);
   1.280 +
   1.281 +// Class for all long registers except RAX, RDX (and RSP)
   1.282 +reg_class long_no_rax_rdx_reg(RBP, RBP_H,
   1.283 +                              RDI, RDI_H,
   1.284 +                              RSI, RSI_H,
   1.285 +                              RCX, RCX_H,
   1.286 +                              RBX, RBX_H,
   1.287 +                              R8,  R8_H,
   1.288 +                              R9,  R9_H,
   1.289 +                              R10, R10_H,
   1.290 +                              R11, R11_H,
   1.291 +                              R13, R13_H,
   1.292 +                              R14, R14_H);
   1.293 +
   1.294 +// Class for all long registers except RCX (and RSP)
   1.295 +reg_class long_no_rcx_reg(RBP, RBP_H,
   1.296 +                          RDI, RDI_H,
   1.297 +                          RSI, RSI_H,
   1.298 +                          RAX, RAX_H,
   1.299 +                          RDX, RDX_H,
   1.300 +                          RBX, RBX_H,
   1.301 +                          R8,  R8_H,
   1.302 +                          R9,  R9_H,
   1.303 +                          R10, R10_H,
   1.304 +                          R11, R11_H,
   1.305 +                          R13, R13_H,
   1.306 +                          R14, R14_H);
   1.307 +
   1.308 +// Class for all long registers except RAX (and RSP)
   1.309 +reg_class long_no_rax_reg(RBP, RBP_H,
   1.310 +                          RDX, RDX_H,
   1.311 +                          RDI, RDI_H,
   1.312 +                          RSI, RSI_H,
   1.313 +                          RCX, RCX_H,
   1.314 +                          RBX, RBX_H,
   1.315 +                          R8,  R8_H,
   1.316 +                          R9,  R9_H,
   1.317 +                          R10, R10_H,
   1.318 +                          R11, R11_H,
   1.319 +                          R13, R13_H,
   1.320 +                          R14, R14_H);
   1.321 +
   1.322 +// Singleton class for RAX long register
   1.323 +reg_class long_rax_reg(RAX, RAX_H);
   1.324 +
   1.325 +// Singleton class for RCX long register
   1.326 +reg_class long_rcx_reg(RCX, RCX_H);
   1.327 +
   1.328 +// Singleton class for RDX long register
   1.329 +reg_class long_rdx_reg(RDX, RDX_H);
   1.330 +
   1.331 +// Class for all int registers (except RSP)
   1.332 +reg_class int_reg(RAX,
   1.333 +                  RDX,
   1.334 +                  RBP,
   1.335 +                  RDI,
   1.336 +                  RSI,
   1.337 +                  RCX,
   1.338 +                  RBX,
   1.339 +                  R8,
   1.340 +                  R9,
   1.341 +                  R10,
   1.342 +                  R11,
   1.343 +                  R13,
   1.344 +                  R14);
   1.345 +
   1.346 +// Class for all int registers except RCX (and RSP)
   1.347 +reg_class int_no_rcx_reg(RAX,
   1.348 +                         RDX,
   1.349 +                         RBP,
   1.350 +                         RDI,
   1.351 +                         RSI,
   1.352 +                         RBX,
   1.353 +                         R8,
   1.354 +                         R9,
   1.355 +                         R10,
   1.356 +                         R11,
   1.357 +                         R13,
   1.358 +                         R14);
   1.359 +
   1.360 +// Class for all int registers except RAX, RDX (and RSP)
   1.361 +reg_class int_no_rax_rdx_reg(RBP,
   1.362 +                             RDI,
   1.363 +                             RSI,
   1.364 +                             RCX,
   1.365 +                             RBX,
   1.366 +                             R8,
   1.367 +                             R9,
   1.368 +                             R10,
   1.369 +                             R11,
   1.370 +                             R13,
   1.371 +                             R14);
   1.372 +
   1.373 +// Singleton class for RAX int register
   1.374 +reg_class int_rax_reg(RAX);
   1.375 +
   1.376 +// Singleton class for RBX int register
   1.377 +reg_class int_rbx_reg(RBX);
   1.378 +
   1.379 +// Singleton class for RCX int register
   1.380 +reg_class int_rcx_reg(RCX);
   1.381 +
   1.382 +// Singleton class for RCX int register
   1.383 +reg_class int_rdx_reg(RDX);
   1.384 +
   1.385 +// Singleton class for RCX int register
   1.386 +reg_class int_rdi_reg(RDI);
   1.387 +
   1.388 +// Singleton class for instruction pointer
   1.389 +// reg_class ip_reg(RIP);
   1.390 +
   1.391 +%}
   1.392 +
   1.393 +//----------SOURCE BLOCK-------------------------------------------------------
   1.394 +// This is a block of C++ code which provides values, functions, and
   1.395 +// definitions necessary in the rest of the architecture description
   1.396 +source %{
   1.397 +#define   RELOC_IMM64    Assembler::imm_operand
   1.398 +#define   RELOC_DISP32   Assembler::disp32_operand
   1.399 +
   1.400 +#define __ _masm.
   1.401 +
   1.402 +static int preserve_SP_size() {
   1.403 +  return 3;  // rex.w, op, rm(reg/reg)
   1.404 +}
   1.405 +static int clear_avx_size() {
   1.406 +  return (Compile::current()->max_vector_size() > 16) ? 3 : 0;  // vzeroupper
   1.407 +}
   1.408 +
   1.409 +// !!!!! Special hack to get all types of calls to specify the byte offset
   1.410 +//       from the start of the call to the point where the return address
   1.411 +//       will point.
   1.412 +int MachCallStaticJavaNode::ret_addr_offset()
   1.413 +{
   1.414 +  int offset = 5; // 5 bytes from start of call to where return address points
   1.415 +  offset += clear_avx_size();
   1.416 +  if (_method_handle_invoke)
   1.417 +    offset += preserve_SP_size();
   1.418 +  return offset;
   1.419 +}
   1.420 +
   1.421 +int MachCallDynamicJavaNode::ret_addr_offset()
   1.422 +{
   1.423 +  int offset = 15; // 15 bytes from start of call to where return address points
   1.424 +  offset += clear_avx_size();
   1.425 +  return offset;
   1.426 +}
   1.427 +
   1.428 +int MachCallRuntimeNode::ret_addr_offset() {
   1.429 +  int offset = 13; // movq r10,#addr; callq (r10)
   1.430 +  offset += clear_avx_size();
   1.431 +  return offset;
   1.432 +}
   1.433 +
   1.434 +// Indicate if the safepoint node needs the polling page as an input,
   1.435 +// it does if the polling page is more than disp32 away.
   1.436 +bool SafePointNode::needs_polling_address_input()
   1.437 +{
   1.438 +  return Assembler::is_polling_page_far();
   1.439 +}
   1.440 +
   1.441 +//
   1.442 +// Compute padding required for nodes which need alignment
   1.443 +//
   1.444 +
   1.445 +// The address of the call instruction needs to be 4-byte aligned to
   1.446 +// ensure that it does not span a cache line so that it can be patched.
   1.447 +int CallStaticJavaDirectNode::compute_padding(int current_offset) const
   1.448 +{
   1.449 +  current_offset += clear_avx_size(); // skip vzeroupper
   1.450 +  current_offset += 1; // skip call opcode byte
   1.451 +  return round_to(current_offset, alignment_required()) - current_offset;
   1.452 +}
   1.453 +
   1.454 +// The address of the call instruction needs to be 4-byte aligned to
   1.455 +// ensure that it does not span a cache line so that it can be patched.
   1.456 +int CallStaticJavaHandleNode::compute_padding(int current_offset) const
   1.457 +{
   1.458 +  current_offset += preserve_SP_size();   // skip mov rbp, rsp
   1.459 +  current_offset += clear_avx_size(); // skip vzeroupper
   1.460 +  current_offset += 1; // skip call opcode byte
   1.461 +  return round_to(current_offset, alignment_required()) - current_offset;
   1.462 +}
   1.463 +
   1.464 +// The address of the call instruction needs to be 4-byte aligned to
   1.465 +// ensure that it does not span a cache line so that it can be patched.
   1.466 +int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
   1.467 +{
   1.468 +  current_offset += clear_avx_size(); // skip vzeroupper
   1.469 +  current_offset += 11; // skip movq instruction + call opcode byte
   1.470 +  return round_to(current_offset, alignment_required()) - current_offset;
   1.471 +}
   1.472 +
   1.473 +// EMIT_RM()
   1.474 +void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
   1.475 +  unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
   1.476 +  cbuf.insts()->emit_int8(c);
   1.477 +}
   1.478 +
   1.479 +// EMIT_CC()
   1.480 +void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
   1.481 +  unsigned char c = (unsigned char) (f1 | f2);
   1.482 +  cbuf.insts()->emit_int8(c);
   1.483 +}
   1.484 +
   1.485 +// EMIT_OPCODE()
   1.486 +void emit_opcode(CodeBuffer &cbuf, int code) {
   1.487 +  cbuf.insts()->emit_int8((unsigned char) code);
   1.488 +}
   1.489 +
   1.490 +// EMIT_OPCODE() w/ relocation information
   1.491 +void emit_opcode(CodeBuffer &cbuf,
   1.492 +                 int code, relocInfo::relocType reloc, int offset, int format)
   1.493 +{
   1.494 +  cbuf.relocate(cbuf.insts_mark() + offset, reloc, format);
   1.495 +  emit_opcode(cbuf, code);
   1.496 +}
   1.497 +
   1.498 +// EMIT_D8()
   1.499 +void emit_d8(CodeBuffer &cbuf, int d8) {
   1.500 +  cbuf.insts()->emit_int8((unsigned char) d8);
   1.501 +}
   1.502 +
   1.503 +// EMIT_D16()
   1.504 +void emit_d16(CodeBuffer &cbuf, int d16) {
   1.505 +  cbuf.insts()->emit_int16(d16);
   1.506 +}
   1.507 +
   1.508 +// EMIT_D32()
   1.509 +void emit_d32(CodeBuffer &cbuf, int d32) {
   1.510 +  cbuf.insts()->emit_int32(d32);
   1.511 +}
   1.512 +
   1.513 +// EMIT_D64()
   1.514 +void emit_d64(CodeBuffer &cbuf, int64_t d64) {
   1.515 +  cbuf.insts()->emit_int64(d64);
   1.516 +}
   1.517 +
   1.518 +// emit 32 bit value and construct relocation entry from relocInfo::relocType
   1.519 +void emit_d32_reloc(CodeBuffer& cbuf,
   1.520 +                    int d32,
   1.521 +                    relocInfo::relocType reloc,
   1.522 +                    int format)
   1.523 +{
   1.524 +  assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
   1.525 +  cbuf.relocate(cbuf.insts_mark(), reloc, format);
   1.526 +  cbuf.insts()->emit_int32(d32);
   1.527 +}
   1.528 +
   1.529 +// emit 32 bit value and construct relocation entry from RelocationHolder
   1.530 +void emit_d32_reloc(CodeBuffer& cbuf, int d32, RelocationHolder const& rspec, int format) {
   1.531 +#ifdef ASSERT
   1.532 +  if (rspec.reloc()->type() == relocInfo::oop_type &&
   1.533 +      d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
   1.534 +    assert(Universe::heap()->is_in_reserved((address)(intptr_t)d32), "should be real oop");
   1.535 +    assert(cast_to_oop((intptr_t)d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop((intptr_t)d32)->is_scavengable()), "cannot embed scavengable oops in code");
   1.536 +  }
   1.537 +#endif
   1.538 +  cbuf.relocate(cbuf.insts_mark(), rspec, format);
   1.539 +  cbuf.insts()->emit_int32(d32);
   1.540 +}
   1.541 +
   1.542 +void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
   1.543 +  address next_ip = cbuf.insts_end() + 4;
   1.544 +  emit_d32_reloc(cbuf, (int) (addr - next_ip),
   1.545 +                 external_word_Relocation::spec(addr),
   1.546 +                 RELOC_DISP32);
   1.547 +}
   1.548 +
   1.549 +
   1.550 +// emit 64 bit value and construct relocation entry from relocInfo::relocType
   1.551 +void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, relocInfo::relocType reloc, int format) {
   1.552 +  cbuf.relocate(cbuf.insts_mark(), reloc, format);
   1.553 +  cbuf.insts()->emit_int64(d64);
   1.554 +}
   1.555 +
   1.556 +// emit 64 bit value and construct relocation entry from RelocationHolder
   1.557 +void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, RelocationHolder const& rspec, int format) {
   1.558 +#ifdef ASSERT
   1.559 +  if (rspec.reloc()->type() == relocInfo::oop_type &&
   1.560 +      d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
   1.561 +    assert(Universe::heap()->is_in_reserved((address)d64), "should be real oop");
   1.562 +    assert(cast_to_oop(d64)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d64)->is_scavengable()),
   1.563 +           "cannot embed scavengable oops in code");
   1.564 +  }
   1.565 +#endif
   1.566 +  cbuf.relocate(cbuf.insts_mark(), rspec, format);
   1.567 +  cbuf.insts()->emit_int64(d64);
   1.568 +}
   1.569 +
   1.570 +// Access stack slot for load or store
   1.571 +void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
   1.572 +{
   1.573 +  emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
   1.574 +  if (-0x80 <= disp && disp < 0x80) {
   1.575 +    emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
   1.576 +    emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
   1.577 +    emit_d8(cbuf, disp);     // Displacement  // R/M byte
   1.578 +  } else {
   1.579 +    emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
   1.580 +    emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
   1.581 +    emit_d32(cbuf, disp);     // Displacement // R/M byte
   1.582 +  }
   1.583 +}
   1.584 +
   1.585 +   // rRegI ereg, memory mem) %{    // emit_reg_mem
   1.586 +void encode_RegMem(CodeBuffer &cbuf,
   1.587 +                   int reg,
   1.588 +                   int base, int index, int scale, int disp, relocInfo::relocType disp_reloc)
   1.589 +{
   1.590 +  assert(disp_reloc == relocInfo::none, "cannot have disp");
   1.591 +  int regenc = reg & 7;
   1.592 +  int baseenc = base & 7;
   1.593 +  int indexenc = index & 7;
   1.594 +
   1.595 +  // There is no index & no scale, use form without SIB byte
   1.596 +  if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
   1.597 +    // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
   1.598 +    if (disp == 0 && base != RBP_enc && base != R13_enc) {
   1.599 +      emit_rm(cbuf, 0x0, regenc, baseenc); // *
   1.600 +    } else if (-0x80 <= disp && disp < 0x80 && disp_reloc == relocInfo::none) {
   1.601 +      // If 8-bit displacement, mode 0x1
   1.602 +      emit_rm(cbuf, 0x1, regenc, baseenc); // *
   1.603 +      emit_d8(cbuf, disp);
   1.604 +    } else {
   1.605 +      // If 32-bit displacement
   1.606 +      if (base == -1) { // Special flag for absolute address
   1.607 +        emit_rm(cbuf, 0x0, regenc, 0x5); // *
   1.608 +        if (disp_reloc != relocInfo::none) {
   1.609 +          emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
   1.610 +        } else {
   1.611 +          emit_d32(cbuf, disp);
   1.612 +        }
   1.613 +      } else {
   1.614 +        // Normal base + offset
   1.615 +        emit_rm(cbuf, 0x2, regenc, baseenc); // *
   1.616 +        if (disp_reloc != relocInfo::none) {
   1.617 +          emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
   1.618 +        } else {
   1.619 +          emit_d32(cbuf, disp);
   1.620 +        }
   1.621 +      }
   1.622 +    }
   1.623 +  } else {
   1.624 +    // Else, encode with the SIB byte
   1.625 +    // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
   1.626 +    if (disp == 0 && base != RBP_enc && base != R13_enc) {
   1.627 +      // If no displacement
   1.628 +      emit_rm(cbuf, 0x0, regenc, 0x4); // *
   1.629 +      emit_rm(cbuf, scale, indexenc, baseenc);
   1.630 +    } else {
   1.631 +      if (-0x80 <= disp && disp < 0x80 && disp_reloc == relocInfo::none) {
   1.632 +        // If 8-bit displacement, mode 0x1
   1.633 +        emit_rm(cbuf, 0x1, regenc, 0x4); // *
   1.634 +        emit_rm(cbuf, scale, indexenc, baseenc);
   1.635 +        emit_d8(cbuf, disp);
   1.636 +      } else {
   1.637 +        // If 32-bit displacement
   1.638 +        if (base == 0x04 ) {
   1.639 +          emit_rm(cbuf, 0x2, regenc, 0x4);
   1.640 +          emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
   1.641 +        } else {
   1.642 +          emit_rm(cbuf, 0x2, regenc, 0x4);
   1.643 +          emit_rm(cbuf, scale, indexenc, baseenc); // *
   1.644 +        }
   1.645 +        if (disp_reloc != relocInfo::none) {
   1.646 +          emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
   1.647 +        } else {
   1.648 +          emit_d32(cbuf, disp);
   1.649 +        }
   1.650 +      }
   1.651 +    }
   1.652 +  }
   1.653 +}
   1.654 +
   1.655 +// This could be in MacroAssembler but it's fairly C2 specific
   1.656 +void emit_cmpfp_fixup(MacroAssembler& _masm) {
   1.657 +  Label exit;
   1.658 +  __ jccb(Assembler::noParity, exit);
   1.659 +  __ pushf();
   1.660 +  //
   1.661 +  // comiss/ucomiss instructions set ZF,PF,CF flags and
   1.662 +  // zero OF,AF,SF for NaN values.
   1.663 +  // Fixup flags by zeroing ZF,PF so that compare of NaN
   1.664 +  // values returns 'less than' result (CF is set).
   1.665 +  // Leave the rest of flags unchanged.
   1.666 +  //
   1.667 +  //    7 6 5 4 3 2 1 0
   1.668 +  //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
   1.669 +  //    0 0 1 0 1 0 1 1   (0x2B)
   1.670 +  //
   1.671 +  __ andq(Address(rsp, 0), 0xffffff2b);
   1.672 +  __ popf();
   1.673 +  __ bind(exit);
   1.674 +}
   1.675 +
   1.676 +void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
   1.677 +  Label done;
   1.678 +  __ movl(dst, -1);
   1.679 +  __ jcc(Assembler::parity, done);
   1.680 +  __ jcc(Assembler::below, done);
   1.681 +  __ setb(Assembler::notEqual, dst);
   1.682 +  __ movzbl(dst, dst);
   1.683 +  __ bind(done);
   1.684 +}
   1.685 +
   1.686 +
   1.687 +//=============================================================================
   1.688 +const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
   1.689 +
   1.690 +int Compile::ConstantTable::calculate_table_base_offset() const {
   1.691 +  return 0;  // absolute addressing, no offset
   1.692 +}
   1.693 +
   1.694 +bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
   1.695 +void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
   1.696 +  ShouldNotReachHere();
   1.697 +}
   1.698 +
   1.699 +void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
   1.700 +  // Empty encoding
   1.701 +}
   1.702 +
   1.703 +uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
   1.704 +  return 0;
   1.705 +}
   1.706 +
   1.707 +#ifndef PRODUCT
   1.708 +void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
   1.709 +  st->print("# MachConstantBaseNode (empty encoding)");
   1.710 +}
   1.711 +#endif
   1.712 +
   1.713 +
   1.714 +//=============================================================================
   1.715 +#ifndef PRODUCT
   1.716 +void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
   1.717 +  Compile* C = ra_->C;
   1.718 +
   1.719 +  int framesize = C->frame_size_in_bytes();
   1.720 +  int bangsize = C->bang_size_in_bytes();
   1.721 +  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
   1.722 +  // Remove wordSize for return addr which is already pushed.
   1.723 +  framesize -= wordSize;
   1.724 +
   1.725 +  if (C->need_stack_bang(bangsize)) {
   1.726 +    framesize -= wordSize;
   1.727 +    st->print("# stack bang (%d bytes)", bangsize);
   1.728 +    st->print("\n\t");
   1.729 +    st->print("pushq   rbp\t# Save rbp");
   1.730 +    if (framesize) {
   1.731 +      st->print("\n\t");
   1.732 +      st->print("subq    rsp, #%d\t# Create frame",framesize);
   1.733 +    }
   1.734 +  } else {
   1.735 +    st->print("subq    rsp, #%d\t# Create frame",framesize);
   1.736 +    st->print("\n\t");
   1.737 +    framesize -= wordSize;
   1.738 +    st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
   1.739 +  }
   1.740 +
   1.741 +  if (VerifyStackAtCalls) {
   1.742 +    st->print("\n\t");
   1.743 +    framesize -= wordSize;
   1.744 +    st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
   1.745 +#ifdef ASSERT
   1.746 +    st->print("\n\t");
   1.747 +    st->print("# stack alignment check");
   1.748 +#endif
   1.749 +  }
   1.750 +  st->cr();
   1.751 +}
   1.752 +#endif
   1.753 +
   1.754 +void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
   1.755 +  Compile* C = ra_->C;
   1.756 +  MacroAssembler _masm(&cbuf);
   1.757 +
   1.758 +  int framesize = C->frame_size_in_bytes();
   1.759 +  int bangsize = C->bang_size_in_bytes();
   1.760 +
   1.761 +  __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, false);
   1.762 +
   1.763 +  C->set_frame_complete(cbuf.insts_size());
   1.764 +
   1.765 +  if (C->has_mach_constant_base_node()) {
   1.766 +    // NOTE: We set the table base offset here because users might be
   1.767 +    // emitted before MachConstantBaseNode.
   1.768 +    Compile::ConstantTable& constant_table = C->constant_table();
   1.769 +    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
   1.770 +  }
   1.771 +}
   1.772 +
   1.773 +uint MachPrologNode::size(PhaseRegAlloc* ra_) const
   1.774 +{
   1.775 +  return MachNode::size(ra_); // too many variables; just compute it
   1.776 +                              // the hard way
   1.777 +}
   1.778 +
   1.779 +int MachPrologNode::reloc() const
   1.780 +{
   1.781 +  return 0; // a large enough number
   1.782 +}
   1.783 +
   1.784 +//=============================================================================
   1.785 +#ifndef PRODUCT
   1.786 +void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
   1.787 +{
   1.788 +  Compile* C = ra_->C;
   1.789 +  if (C->max_vector_size() > 16) {
   1.790 +    st->print("vzeroupper");
   1.791 +    st->cr(); st->print("\t");
   1.792 +  }
   1.793 +
   1.794 +  int framesize = C->frame_size_in_bytes();
   1.795 +  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
   1.796 +  // Remove word for return adr already pushed
   1.797 +  // and RBP
   1.798 +  framesize -= 2*wordSize;
   1.799 +
   1.800 +  if (framesize) {
   1.801 +    st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
   1.802 +    st->print("\t");
   1.803 +  }
   1.804 +
   1.805 +  st->print_cr("popq   rbp");
   1.806 +  if (do_polling() && C->is_method_compilation()) {
   1.807 +    st->print("\t");
   1.808 +    if (Assembler::is_polling_page_far()) {
   1.809 +      st->print_cr("movq   rscratch1, #polling_page_address\n\t"
   1.810 +                   "testl  rax, [rscratch1]\t"
   1.811 +                   "# Safepoint: poll for GC");
   1.812 +    } else {
   1.813 +      st->print_cr("testl  rax, [rip + #offset_to_poll_page]\t"
   1.814 +                   "# Safepoint: poll for GC");
   1.815 +    }
   1.816 +  }
   1.817 +}
   1.818 +#endif
   1.819 +
   1.820 +void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
   1.821 +{
   1.822 +  Compile* C = ra_->C;
   1.823 +  if (C->max_vector_size() > 16) {
   1.824 +    // Clear upper bits of YMM registers when current compiled code uses
   1.825 +    // wide vectors to avoid AVX <-> SSE transition penalty during call.
   1.826 +    MacroAssembler _masm(&cbuf);
   1.827 +    __ vzeroupper();
   1.828 +  }
   1.829 +
   1.830 +  int framesize = C->frame_size_in_bytes();
   1.831 +  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
   1.832 +  // Remove word for return adr already pushed
   1.833 +  // and RBP
   1.834 +  framesize -= 2*wordSize;
   1.835 +
   1.836 +  // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
   1.837 +
   1.838 +  if (framesize) {
   1.839 +    emit_opcode(cbuf, Assembler::REX_W);
   1.840 +    if (framesize < 0x80) {
   1.841 +      emit_opcode(cbuf, 0x83); // addq rsp, #framesize
   1.842 +      emit_rm(cbuf, 0x3, 0x00, RSP_enc);
   1.843 +      emit_d8(cbuf, framesize);
   1.844 +    } else {
   1.845 +      emit_opcode(cbuf, 0x81); // addq rsp, #framesize
   1.846 +      emit_rm(cbuf, 0x3, 0x00, RSP_enc);
   1.847 +      emit_d32(cbuf, framesize);
   1.848 +    }
   1.849 +  }
   1.850 +
   1.851 +  // popq rbp
   1.852 +  emit_opcode(cbuf, 0x58 | RBP_enc);
   1.853 +
   1.854 +  if (do_polling() && C->is_method_compilation()) {
   1.855 +    MacroAssembler _masm(&cbuf);
   1.856 +    AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_return_type);
   1.857 +    if (Assembler::is_polling_page_far()) {
   1.858 +      __ lea(rscratch1, polling_page);
   1.859 +      __ relocate(relocInfo::poll_return_type);
   1.860 +      __ testl(rax, Address(rscratch1, 0));
   1.861 +    } else {
   1.862 +      __ testl(rax, polling_page);
   1.863 +    }
   1.864 +  }
   1.865 +}
   1.866 +
   1.867 +uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
   1.868 +{
   1.869 +  return MachNode::size(ra_); // too many variables; just compute it
   1.870 +                              // the hard way
   1.871 +}
   1.872 +
   1.873 +int MachEpilogNode::reloc() const
   1.874 +{
   1.875 +  return 2; // a large enough number
   1.876 +}
   1.877 +
   1.878 +const Pipeline* MachEpilogNode::pipeline() const
   1.879 +{
   1.880 +  return MachNode::pipeline_class();
   1.881 +}
   1.882 +
   1.883 +int MachEpilogNode::safepoint_offset() const
   1.884 +{
   1.885 +  return 0;
   1.886 +}
   1.887 +
   1.888 +//=============================================================================
   1.889 +
   1.890 +enum RC {
   1.891 +  rc_bad,
   1.892 +  rc_int,
   1.893 +  rc_float,
   1.894 +  rc_stack
   1.895 +};
   1.896 +
   1.897 +static enum RC rc_class(OptoReg::Name reg)
   1.898 +{
   1.899 +  if( !OptoReg::is_valid(reg)  ) return rc_bad;
   1.900 +
   1.901 +  if (OptoReg::is_stack(reg)) return rc_stack;
   1.902 +
   1.903 +  VMReg r = OptoReg::as_VMReg(reg);
   1.904 +
   1.905 +  if (r->is_Register()) return rc_int;
   1.906 +
   1.907 +  assert(r->is_XMMRegister(), "must be");
   1.908 +  return rc_float;
   1.909 +}
   1.910 +
   1.911 +// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
   1.912 +static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
   1.913 +                          int src_hi, int dst_hi, uint ireg, outputStream* st);
   1.914 +
   1.915 +static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
   1.916 +                            int stack_offset, int reg, uint ireg, outputStream* st);
   1.917 +
   1.918 +static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
   1.919 +                                      int dst_offset, uint ireg, outputStream* st) {
   1.920 +  if (cbuf) {
   1.921 +    MacroAssembler _masm(cbuf);
   1.922 +    switch (ireg) {
   1.923 +    case Op_VecS:
   1.924 +      __ movq(Address(rsp, -8), rax);
   1.925 +      __ movl(rax, Address(rsp, src_offset));
   1.926 +      __ movl(Address(rsp, dst_offset), rax);
   1.927 +      __ movq(rax, Address(rsp, -8));
   1.928 +      break;
   1.929 +    case Op_VecD:
   1.930 +      __ pushq(Address(rsp, src_offset));
   1.931 +      __ popq (Address(rsp, dst_offset));
   1.932 +      break;
   1.933 +    case Op_VecX:
   1.934 +      __ pushq(Address(rsp, src_offset));
   1.935 +      __ popq (Address(rsp, dst_offset));
   1.936 +      __ pushq(Address(rsp, src_offset+8));
   1.937 +      __ popq (Address(rsp, dst_offset+8));
   1.938 +      break;
   1.939 +    case Op_VecY:
   1.940 +      __ vmovdqu(Address(rsp, -32), xmm0);
   1.941 +      __ vmovdqu(xmm0, Address(rsp, src_offset));
   1.942 +      __ vmovdqu(Address(rsp, dst_offset), xmm0);
   1.943 +      __ vmovdqu(xmm0, Address(rsp, -32));
   1.944 +      break;
   1.945 +    default:
   1.946 +      ShouldNotReachHere();
   1.947 +    }
   1.948 +#ifndef PRODUCT
   1.949 +  } else {
   1.950 +    switch (ireg) {
   1.951 +    case Op_VecS:
   1.952 +      st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
   1.953 +                "movl    rax, [rsp + #%d]\n\t"
   1.954 +                "movl    [rsp + #%d], rax\n\t"
   1.955 +                "movq    rax, [rsp - #8]",
   1.956 +                src_offset, dst_offset);
   1.957 +      break;
   1.958 +    case Op_VecD:
   1.959 +      st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
   1.960 +                "popq    [rsp + #%d]",
   1.961 +                src_offset, dst_offset);
   1.962 +      break;
   1.963 +     case Op_VecX:
   1.964 +      st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
   1.965 +                "popq    [rsp + #%d]\n\t"
   1.966 +                "pushq   [rsp + #%d]\n\t"
   1.967 +                "popq    [rsp + #%d]",
   1.968 +                src_offset, dst_offset, src_offset+8, dst_offset+8);
   1.969 +      break;
   1.970 +    case Op_VecY:
   1.971 +      st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
   1.972 +                "vmovdqu xmm0, [rsp + #%d]\n\t"
   1.973 +                "vmovdqu [rsp + #%d], xmm0\n\t"
   1.974 +                "vmovdqu xmm0, [rsp - #32]",
   1.975 +                src_offset, dst_offset);
   1.976 +      break;
   1.977 +    default:
   1.978 +      ShouldNotReachHere();
   1.979 +    }
   1.980 +#endif
   1.981 +  }
   1.982 +}
   1.983 +
   1.984 +uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
   1.985 +                                       PhaseRegAlloc* ra_,
   1.986 +                                       bool do_size,
   1.987 +                                       outputStream* st) const {
   1.988 +  assert(cbuf != NULL || st  != NULL, "sanity");
   1.989 +  // Get registers to move
   1.990 +  OptoReg::Name src_second = ra_->get_reg_second(in(1));
   1.991 +  OptoReg::Name src_first = ra_->get_reg_first(in(1));
   1.992 +  OptoReg::Name dst_second = ra_->get_reg_second(this);
   1.993 +  OptoReg::Name dst_first = ra_->get_reg_first(this);
   1.994 +
   1.995 +  enum RC src_second_rc = rc_class(src_second);
   1.996 +  enum RC src_first_rc = rc_class(src_first);
   1.997 +  enum RC dst_second_rc = rc_class(dst_second);
   1.998 +  enum RC dst_first_rc = rc_class(dst_first);
   1.999 +
  1.1000 +  assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
  1.1001 +         "must move at least 1 register" );
  1.1002 +
  1.1003 +  if (src_first == dst_first && src_second == dst_second) {
  1.1004 +    // Self copy, no move
  1.1005 +    return 0;
  1.1006 +  }
  1.1007 +  if (bottom_type()->isa_vect() != NULL) {
  1.1008 +    uint ireg = ideal_reg();
  1.1009 +    assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
  1.1010 +    assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY), "sanity");
  1.1011 +    if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
  1.1012 +      // mem -> mem
  1.1013 +      int src_offset = ra_->reg2offset(src_first);
  1.1014 +      int dst_offset = ra_->reg2offset(dst_first);
  1.1015 +      vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
  1.1016 +    } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
  1.1017 +      vec_mov_helper(cbuf, false, src_first, dst_first, src_second, dst_second, ireg, st);
  1.1018 +    } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
  1.1019 +      int stack_offset = ra_->reg2offset(dst_first);
  1.1020 +      vec_spill_helper(cbuf, false, false, stack_offset, src_first, ireg, st);
  1.1021 +    } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
  1.1022 +      int stack_offset = ra_->reg2offset(src_first);
  1.1023 +      vec_spill_helper(cbuf, false, true,  stack_offset, dst_first, ireg, st);
  1.1024 +    } else {
  1.1025 +      ShouldNotReachHere();
  1.1026 +    }
  1.1027 +    return 0;
  1.1028 +  }
  1.1029 +  if (src_first_rc == rc_stack) {
  1.1030 +    // mem ->
  1.1031 +    if (dst_first_rc == rc_stack) {
  1.1032 +      // mem -> mem
  1.1033 +      assert(src_second != dst_first, "overlap");
  1.1034 +      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
  1.1035 +          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
  1.1036 +        // 64-bit
  1.1037 +        int src_offset = ra_->reg2offset(src_first);
  1.1038 +        int dst_offset = ra_->reg2offset(dst_first);
  1.1039 +        if (cbuf) {
  1.1040 +          MacroAssembler _masm(cbuf);
  1.1041 +          __ pushq(Address(rsp, src_offset));
  1.1042 +          __ popq (Address(rsp, dst_offset));
  1.1043 +#ifndef PRODUCT
  1.1044 +        } else {
  1.1045 +          st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
  1.1046 +                    "popq    [rsp + #%d]",
  1.1047 +                     src_offset, dst_offset);
  1.1048 +#endif
  1.1049 +        }
  1.1050 +      } else {
  1.1051 +        // 32-bit
  1.1052 +        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
  1.1053 +        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
  1.1054 +        // No pushl/popl, so:
  1.1055 +        int src_offset = ra_->reg2offset(src_first);
  1.1056 +        int dst_offset = ra_->reg2offset(dst_first);
  1.1057 +        if (cbuf) {
  1.1058 +          MacroAssembler _masm(cbuf);
  1.1059 +          __ movq(Address(rsp, -8), rax);
  1.1060 +          __ movl(rax, Address(rsp, src_offset));
  1.1061 +          __ movl(Address(rsp, dst_offset), rax);
  1.1062 +          __ movq(rax, Address(rsp, -8));
  1.1063 +#ifndef PRODUCT
  1.1064 +        } else {
  1.1065 +          st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
  1.1066 +                    "movl    rax, [rsp + #%d]\n\t"
  1.1067 +                    "movl    [rsp + #%d], rax\n\t"
  1.1068 +                    "movq    rax, [rsp - #8]",
  1.1069 +                     src_offset, dst_offset);
  1.1070 +#endif
  1.1071 +        }
  1.1072 +      }
  1.1073 +      return 0;
  1.1074 +    } else if (dst_first_rc == rc_int) {
  1.1075 +      // mem -> gpr
  1.1076 +      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
  1.1077 +          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
  1.1078 +        // 64-bit
  1.1079 +        int offset = ra_->reg2offset(src_first);
  1.1080 +        if (cbuf) {
  1.1081 +          MacroAssembler _masm(cbuf);
  1.1082 +          __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
  1.1083 +#ifndef PRODUCT
  1.1084 +        } else {
  1.1085 +          st->print("movq    %s, [rsp + #%d]\t# spill",
  1.1086 +                     Matcher::regName[dst_first],
  1.1087 +                     offset);
  1.1088 +#endif
  1.1089 +        }
  1.1090 +      } else {
  1.1091 +        // 32-bit
  1.1092 +        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
  1.1093 +        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
  1.1094 +        int offset = ra_->reg2offset(src_first);
  1.1095 +        if (cbuf) {
  1.1096 +          MacroAssembler _masm(cbuf);
  1.1097 +          __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
  1.1098 +#ifndef PRODUCT
  1.1099 +        } else {
  1.1100 +          st->print("movl    %s, [rsp + #%d]\t# spill",
  1.1101 +                     Matcher::regName[dst_first],
  1.1102 +                     offset);
  1.1103 +#endif
  1.1104 +        }
  1.1105 +      }
  1.1106 +      return 0;
  1.1107 +    } else if (dst_first_rc == rc_float) {
  1.1108 +      // mem-> xmm
  1.1109 +      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
  1.1110 +          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
  1.1111 +        // 64-bit
  1.1112 +        int offset = ra_->reg2offset(src_first);
  1.1113 +        if (cbuf) {
  1.1114 +          MacroAssembler _masm(cbuf);
  1.1115 +          __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
  1.1116 +#ifndef PRODUCT
  1.1117 +        } else {
  1.1118 +          st->print("%s  %s, [rsp + #%d]\t# spill",
  1.1119 +                     UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
  1.1120 +                     Matcher::regName[dst_first],
  1.1121 +                     offset);
  1.1122 +#endif
  1.1123 +        }
  1.1124 +      } else {
  1.1125 +        // 32-bit
  1.1126 +        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
  1.1127 +        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
  1.1128 +        int offset = ra_->reg2offset(src_first);
  1.1129 +        if (cbuf) {
  1.1130 +          MacroAssembler _masm(cbuf);
  1.1131 +          __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
  1.1132 +#ifndef PRODUCT
  1.1133 +        } else {
  1.1134 +          st->print("movss   %s, [rsp + #%d]\t# spill",
  1.1135 +                     Matcher::regName[dst_first],
  1.1136 +                     offset);
  1.1137 +#endif
  1.1138 +        }
  1.1139 +      }
  1.1140 +      return 0;
  1.1141 +    }
  1.1142 +  } else if (src_first_rc == rc_int) {
  1.1143 +    // gpr ->
  1.1144 +    if (dst_first_rc == rc_stack) {
  1.1145 +      // gpr -> mem
  1.1146 +      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
  1.1147 +          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
  1.1148 +        // 64-bit
  1.1149 +        int offset = ra_->reg2offset(dst_first);
  1.1150 +        if (cbuf) {
  1.1151 +          MacroAssembler _masm(cbuf);
  1.1152 +          __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
  1.1153 +#ifndef PRODUCT
  1.1154 +        } else {
  1.1155 +          st->print("movq    [rsp + #%d], %s\t# spill",
  1.1156 +                     offset,
  1.1157 +                     Matcher::regName[src_first]);
  1.1158 +#endif
  1.1159 +        }
  1.1160 +      } else {
  1.1161 +        // 32-bit
  1.1162 +        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
  1.1163 +        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
  1.1164 +        int offset = ra_->reg2offset(dst_first);
  1.1165 +        if (cbuf) {
  1.1166 +          MacroAssembler _masm(cbuf);
  1.1167 +          __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
  1.1168 +#ifndef PRODUCT
  1.1169 +        } else {
  1.1170 +          st->print("movl    [rsp + #%d], %s\t# spill",
  1.1171 +                     offset,
  1.1172 +                     Matcher::regName[src_first]);
  1.1173 +#endif
  1.1174 +        }
  1.1175 +      }
  1.1176 +      return 0;
  1.1177 +    } else if (dst_first_rc == rc_int) {
  1.1178 +      // gpr -> gpr
  1.1179 +      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
  1.1180 +          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
  1.1181 +        // 64-bit
  1.1182 +        if (cbuf) {
  1.1183 +          MacroAssembler _masm(cbuf);
  1.1184 +          __ movq(as_Register(Matcher::_regEncode[dst_first]),
  1.1185 +                  as_Register(Matcher::_regEncode[src_first]));
  1.1186 +#ifndef PRODUCT
  1.1187 +        } else {
  1.1188 +          st->print("movq    %s, %s\t# spill",
  1.1189 +                     Matcher::regName[dst_first],
  1.1190 +                     Matcher::regName[src_first]);
  1.1191 +#endif
  1.1192 +        }
  1.1193 +        return 0;
  1.1194 +      } else {
  1.1195 +        // 32-bit
  1.1196 +        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
  1.1197 +        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
  1.1198 +        if (cbuf) {
  1.1199 +          MacroAssembler _masm(cbuf);
  1.1200 +          __ movl(as_Register(Matcher::_regEncode[dst_first]),
  1.1201 +                  as_Register(Matcher::_regEncode[src_first]));
  1.1202 +#ifndef PRODUCT
  1.1203 +        } else {
  1.1204 +          st->print("movl    %s, %s\t# spill",
  1.1205 +                     Matcher::regName[dst_first],
  1.1206 +                     Matcher::regName[src_first]);
  1.1207 +#endif
  1.1208 +        }
  1.1209 +        return 0;
  1.1210 +      }
  1.1211 +    } else if (dst_first_rc == rc_float) {
  1.1212 +      // gpr -> xmm
  1.1213 +      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
  1.1214 +          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
  1.1215 +        // 64-bit
  1.1216 +        if (cbuf) {
  1.1217 +          MacroAssembler _masm(cbuf);
  1.1218 +          __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
  1.1219 +#ifndef PRODUCT
  1.1220 +        } else {
  1.1221 +          st->print("movdq   %s, %s\t# spill",
  1.1222 +                     Matcher::regName[dst_first],
  1.1223 +                     Matcher::regName[src_first]);
  1.1224 +#endif
  1.1225 +        }
  1.1226 +      } else {
  1.1227 +        // 32-bit
  1.1228 +        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
  1.1229 +        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
  1.1230 +        if (cbuf) {
  1.1231 +          MacroAssembler _masm(cbuf);
  1.1232 +          __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
  1.1233 +#ifndef PRODUCT
  1.1234 +        } else {
  1.1235 +          st->print("movdl   %s, %s\t# spill",
  1.1236 +                     Matcher::regName[dst_first],
  1.1237 +                     Matcher::regName[src_first]);
  1.1238 +#endif
  1.1239 +        }
  1.1240 +      }
  1.1241 +      return 0;
  1.1242 +    }
  1.1243 +  } else if (src_first_rc == rc_float) {
  1.1244 +    // xmm ->
  1.1245 +    if (dst_first_rc == rc_stack) {
  1.1246 +      // xmm -> mem
  1.1247 +      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
  1.1248 +          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
  1.1249 +        // 64-bit
  1.1250 +        int offset = ra_->reg2offset(dst_first);
  1.1251 +        if (cbuf) {
  1.1252 +          MacroAssembler _masm(cbuf);
  1.1253 +          __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
  1.1254 +#ifndef PRODUCT
  1.1255 +        } else {
  1.1256 +          st->print("movsd   [rsp + #%d], %s\t# spill",
  1.1257 +                     offset,
  1.1258 +                     Matcher::regName[src_first]);
  1.1259 +#endif
  1.1260 +        }
  1.1261 +      } else {
  1.1262 +        // 32-bit
  1.1263 +        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
  1.1264 +        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
  1.1265 +        int offset = ra_->reg2offset(dst_first);
  1.1266 +        if (cbuf) {
  1.1267 +          MacroAssembler _masm(cbuf);
  1.1268 +          __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
  1.1269 +#ifndef PRODUCT
  1.1270 +        } else {
  1.1271 +          st->print("movss   [rsp + #%d], %s\t# spill",
  1.1272 +                     offset,
  1.1273 +                     Matcher::regName[src_first]);
  1.1274 +#endif
  1.1275 +        }
  1.1276 +      }
  1.1277 +      return 0;
  1.1278 +    } else if (dst_first_rc == rc_int) {
  1.1279 +      // xmm -> gpr
  1.1280 +      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
  1.1281 +          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
  1.1282 +        // 64-bit
  1.1283 +        if (cbuf) {
  1.1284 +          MacroAssembler _masm(cbuf);
  1.1285 +          __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
  1.1286 +#ifndef PRODUCT
  1.1287 +        } else {
  1.1288 +          st->print("movdq   %s, %s\t# spill",
  1.1289 +                     Matcher::regName[dst_first],
  1.1290 +                     Matcher::regName[src_first]);
  1.1291 +#endif
  1.1292 +        }
  1.1293 +      } else {
  1.1294 +        // 32-bit
  1.1295 +        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
  1.1296 +        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
  1.1297 +        if (cbuf) {
  1.1298 +          MacroAssembler _masm(cbuf);
  1.1299 +          __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
  1.1300 +#ifndef PRODUCT
  1.1301 +        } else {
  1.1302 +          st->print("movdl   %s, %s\t# spill",
  1.1303 +                     Matcher::regName[dst_first],
  1.1304 +                     Matcher::regName[src_first]);
  1.1305 +#endif
  1.1306 +        }
  1.1307 +      }
  1.1308 +      return 0;
  1.1309 +    } else if (dst_first_rc == rc_float) {
  1.1310 +      // xmm -> xmm
  1.1311 +      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
  1.1312 +          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
  1.1313 +        // 64-bit
  1.1314 +        if (cbuf) {
  1.1315 +          MacroAssembler _masm(cbuf);
  1.1316 +          __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
  1.1317 +#ifndef PRODUCT
  1.1318 +        } else {
  1.1319 +          st->print("%s  %s, %s\t# spill",
  1.1320 +                     UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
  1.1321 +                     Matcher::regName[dst_first],
  1.1322 +                     Matcher::regName[src_first]);
  1.1323 +#endif
  1.1324 +        }
  1.1325 +      } else {
  1.1326 +        // 32-bit
  1.1327 +        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
  1.1328 +        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
  1.1329 +        if (cbuf) {
  1.1330 +          MacroAssembler _masm(cbuf);
  1.1331 +          __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
  1.1332 +#ifndef PRODUCT
  1.1333 +        } else {
  1.1334 +          st->print("%s  %s, %s\t# spill",
  1.1335 +                     UseXmmRegToRegMoveAll ? "movaps" : "movss ",
  1.1336 +                     Matcher::regName[dst_first],
  1.1337 +                     Matcher::regName[src_first]);
  1.1338 +#endif
  1.1339 +        }
  1.1340 +      }
  1.1341 +      return 0;
  1.1342 +    }
  1.1343 +  }
  1.1344 +
  1.1345 +  assert(0," foo ");
  1.1346 +  Unimplemented();
  1.1347 +  return 0;
  1.1348 +}
  1.1349 +
  1.1350 +#ifndef PRODUCT
  1.1351 +void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
  1.1352 +  implementation(NULL, ra_, false, st);
  1.1353 +}
  1.1354 +#endif
  1.1355 +
  1.1356 +void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  1.1357 +  implementation(&cbuf, ra_, false, NULL);
  1.1358 +}
  1.1359 +
  1.1360 +uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
  1.1361 +  return MachNode::size(ra_);
  1.1362 +}
  1.1363 +
  1.1364 +//=============================================================================
  1.1365 +#ifndef PRODUCT
  1.1366 +void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  1.1367 +{
  1.1368 +  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
  1.1369 +  int reg = ra_->get_reg_first(this);
  1.1370 +  st->print("leaq    %s, [rsp + #%d]\t# box lock",
  1.1371 +            Matcher::regName[reg], offset);
  1.1372 +}
  1.1373 +#endif
  1.1374 +
  1.1375 +void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
  1.1376 +{
  1.1377 +  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
  1.1378 +  int reg = ra_->get_encode(this);
  1.1379 +  if (offset >= 0x80) {
  1.1380 +    emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
  1.1381 +    emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
  1.1382 +    emit_rm(cbuf, 0x2, reg & 7, 0x04);
  1.1383 +    emit_rm(cbuf, 0x0, 0x04, RSP_enc);
  1.1384 +    emit_d32(cbuf, offset);
  1.1385 +  } else {
  1.1386 +    emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
  1.1387 +    emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
  1.1388 +    emit_rm(cbuf, 0x1, reg & 7, 0x04);
  1.1389 +    emit_rm(cbuf, 0x0, 0x04, RSP_enc);
  1.1390 +    emit_d8(cbuf, offset);
  1.1391 +  }
  1.1392 +}
  1.1393 +
  1.1394 +uint BoxLockNode::size(PhaseRegAlloc *ra_) const
  1.1395 +{
  1.1396 +  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
  1.1397 +  return (offset < 0x80) ? 5 : 8; // REX
  1.1398 +}
  1.1399 +
  1.1400 +//=============================================================================
  1.1401 +#ifndef PRODUCT
  1.1402 +void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  1.1403 +{
  1.1404 +  if (UseCompressedClassPointers) {
  1.1405 +    st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
  1.1406 +    st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
  1.1407 +    st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
  1.1408 +  } else {
  1.1409 +    st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
  1.1410 +                 "# Inline cache check");
  1.1411 +  }
  1.1412 +  st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
  1.1413 +  st->print_cr("\tnop\t# nops to align entry point");
  1.1414 +}
  1.1415 +#endif
  1.1416 +
  1.1417 +void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
  1.1418 +{
  1.1419 +  MacroAssembler masm(&cbuf);
  1.1420 +  uint insts_size = cbuf.insts_size();
  1.1421 +  if (UseCompressedClassPointers) {
  1.1422 +    masm.load_klass(rscratch1, j_rarg0);
  1.1423 +    masm.cmpptr(rax, rscratch1);
  1.1424 +  } else {
  1.1425 +    masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
  1.1426 +  }
  1.1427 +
  1.1428 +  masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
  1.1429 +
  1.1430 +  /* WARNING these NOPs are critical so that verified entry point is properly
  1.1431 +     4 bytes aligned for patching by NativeJump::patch_verified_entry() */
  1.1432 +  int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
  1.1433 +  if (OptoBreakpoint) {
  1.1434 +    // Leave space for int3
  1.1435 +    nops_cnt -= 1;
  1.1436 +  }
  1.1437 +  nops_cnt &= 0x3; // Do not add nops if code is aligned.
  1.1438 +  if (nops_cnt > 0)
  1.1439 +    masm.nop(nops_cnt);
  1.1440 +}
  1.1441 +
  1.1442 +uint MachUEPNode::size(PhaseRegAlloc* ra_) const
  1.1443 +{
  1.1444 +  return MachNode::size(ra_); // too many variables; just compute it
  1.1445 +                              // the hard way
  1.1446 +}
  1.1447 + 
  1.1448 +
  1.1449 +//=============================================================================
  1.1450 +
  1.1451 +int Matcher::regnum_to_fpu_offset(int regnum)
  1.1452 +{
  1.1453 +  return regnum - 32; // The FP registers are in the second chunk
  1.1454 +}
  1.1455 +
  1.1456 +// This is UltraSparc specific, true just means we have fast l2f conversion
  1.1457 +const bool Matcher::convL2FSupported(void) {
  1.1458 +  return true;
  1.1459 +}
  1.1460 +
  1.1461 +// Is this branch offset short enough that a short branch can be used?
  1.1462 +//
  1.1463 +// NOTE: If the platform does not provide any short branch variants, then
  1.1464 +//       this method should return false for offset 0.
  1.1465 +bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
  1.1466 +  // The passed offset is relative to address of the branch.
  1.1467 +  // On 86 a branch displacement is calculated relative to address
  1.1468 +  // of a next instruction.
  1.1469 +  offset -= br_size;
  1.1470 +
  1.1471 +  // the short version of jmpConUCF2 contains multiple branches,
  1.1472 +  // making the reach slightly less
  1.1473 +  if (rule == jmpConUCF2_rule)
  1.1474 +    return (-126 <= offset && offset <= 125);
  1.1475 +  return (-128 <= offset && offset <= 127);
  1.1476 +}
  1.1477 +
  1.1478 +const bool Matcher::isSimpleConstant64(jlong value) {
  1.1479 +  // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
  1.1480 +  //return value == (int) value;  // Cf. storeImmL and immL32.
  1.1481 +
  1.1482 +  // Probably always true, even if a temp register is required.
  1.1483 +  return true;
  1.1484 +}
  1.1485 +
  1.1486 +// The ecx parameter to rep stosq for the ClearArray node is in words.
  1.1487 +const bool Matcher::init_array_count_is_in_bytes = false;
  1.1488 +
  1.1489 +// Threshold size for cleararray.
  1.1490 +const int Matcher::init_array_short_size = 8 * BytesPerLong;
  1.1491 +
  1.1492 +// No additional cost for CMOVL.
  1.1493 +const int Matcher::long_cmove_cost() { return 0; }
  1.1494 +
  1.1495 +// No CMOVF/CMOVD with SSE2
  1.1496 +const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
  1.1497 +
  1.1498 +// Does the CPU require late expand (see block.cpp for description of late expand)?
  1.1499 +const bool Matcher::require_postalloc_expand = false;
  1.1500 +
  1.1501 +// Should the Matcher clone shifts on addressing modes, expecting them
  1.1502 +// to be subsumed into complex addressing expressions or compute them
  1.1503 +// into registers?  True for Intel but false for most RISCs
  1.1504 +const bool Matcher::clone_shift_expressions = true;
  1.1505 +
  1.1506 +// Do we need to mask the count passed to shift instructions or does
  1.1507 +// the cpu only look at the lower 5/6 bits anyway?
  1.1508 +const bool Matcher::need_masked_shift_count = false;
  1.1509 +
  1.1510 +bool Matcher::narrow_oop_use_complex_address() {
  1.1511 +  assert(UseCompressedOops, "only for compressed oops code");
  1.1512 +  return (LogMinObjAlignmentInBytes <= 3);
  1.1513 +}
  1.1514 +
  1.1515 +bool Matcher::narrow_klass_use_complex_address() {
  1.1516 +  assert(UseCompressedClassPointers, "only for compressed klass code");
  1.1517 +  return (LogKlassAlignmentInBytes <= 3);
  1.1518 +}
  1.1519 +
  1.1520 +// Is it better to copy float constants, or load them directly from
  1.1521 +// memory?  Intel can load a float constant from a direct address,
  1.1522 +// requiring no extra registers.  Most RISCs will have to materialize
  1.1523 +// an address into a register first, so they would do better to copy
  1.1524 +// the constant from stack.
  1.1525 +const bool Matcher::rematerialize_float_constants = true; // XXX
  1.1526 +
  1.1527 +// If CPU can load and store mis-aligned doubles directly then no
  1.1528 +// fixup is needed.  Else we split the double into 2 integer pieces
  1.1529 +// and move it piece-by-piece.  Only happens when passing doubles into
  1.1530 +// C code as the Java calling convention forces doubles to be aligned.
  1.1531 +const bool Matcher::misaligned_doubles_ok = true;
  1.1532 +
  1.1533 +// No-op on amd64
  1.1534 +void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
  1.1535 +
  1.1536 +// Advertise here if the CPU requires explicit rounding operations to
  1.1537 +// implement the UseStrictFP mode.
  1.1538 +const bool Matcher::strict_fp_requires_explicit_rounding = true;
  1.1539 +
  1.1540 +// Are floats conerted to double when stored to stack during deoptimization?
  1.1541 +// On x64 it is stored without convertion so we can use normal access.
  1.1542 +bool Matcher::float_in_double() { return false; }
  1.1543 +
  1.1544 +// Do ints take an entire long register or just half?
  1.1545 +const bool Matcher::int_in_long = true;
  1.1546 +
  1.1547 +// Return whether or not this register is ever used as an argument.
  1.1548 +// This function is used on startup to build the trampoline stubs in
  1.1549 +// generateOptoStub.  Registers not mentioned will be killed by the VM
  1.1550 +// call in the trampoline, and arguments in those registers not be
  1.1551 +// available to the callee.
  1.1552 +bool Matcher::can_be_java_arg(int reg)
  1.1553 +{
  1.1554 +  return
  1.1555 +    reg ==  RDI_num || reg == RDI_H_num ||
  1.1556 +    reg ==  RSI_num || reg == RSI_H_num ||
  1.1557 +    reg ==  RDX_num || reg == RDX_H_num ||
  1.1558 +    reg ==  RCX_num || reg == RCX_H_num ||
  1.1559 +    reg ==   R8_num || reg ==  R8_H_num ||
  1.1560 +    reg ==   R9_num || reg ==  R9_H_num ||
  1.1561 +    reg ==  R12_num || reg == R12_H_num ||
  1.1562 +    reg == XMM0_num || reg == XMM0b_num ||
  1.1563 +    reg == XMM1_num || reg == XMM1b_num ||
  1.1564 +    reg == XMM2_num || reg == XMM2b_num ||
  1.1565 +    reg == XMM3_num || reg == XMM3b_num ||
  1.1566 +    reg == XMM4_num || reg == XMM4b_num ||
  1.1567 +    reg == XMM5_num || reg == XMM5b_num ||
  1.1568 +    reg == XMM6_num || reg == XMM6b_num ||
  1.1569 +    reg == XMM7_num || reg == XMM7b_num;
  1.1570 +}
  1.1571 +
  1.1572 +bool Matcher::is_spillable_arg(int reg)
  1.1573 +{
  1.1574 +  return can_be_java_arg(reg);
  1.1575 +}
  1.1576 +
  1.1577 +bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
  1.1578 +  // In 64 bit mode a code which use multiply when
  1.1579 +  // devisor is constant is faster than hardware
  1.1580 +  // DIV instruction (it uses MulHiL).
  1.1581 +  return false;
  1.1582 +}
  1.1583 +
  1.1584 +// Register for DIVI projection of divmodI
  1.1585 +RegMask Matcher::divI_proj_mask() {
  1.1586 +  return INT_RAX_REG_mask();
  1.1587 +}
  1.1588 +
  1.1589 +// Register for MODI projection of divmodI
  1.1590 +RegMask Matcher::modI_proj_mask() {
  1.1591 +  return INT_RDX_REG_mask();
  1.1592 +}
  1.1593 +
  1.1594 +// Register for DIVL projection of divmodL
  1.1595 +RegMask Matcher::divL_proj_mask() {
  1.1596 +  return LONG_RAX_REG_mask();
  1.1597 +}
  1.1598 +
  1.1599 +// Register for MODL projection of divmodL
  1.1600 +RegMask Matcher::modL_proj_mask() {
  1.1601 +  return LONG_RDX_REG_mask();
  1.1602 +}
  1.1603 +
  1.1604 +const RegMask Matcher::method_handle_invoke_SP_save_mask() {
  1.1605 +  return PTR_RBP_REG_mask();
  1.1606 +}
  1.1607 +
  1.1608 +%}
  1.1609 +
  1.1610 +//----------ENCODING BLOCK-----------------------------------------------------
  1.1611 +// This block specifies the encoding classes used by the compiler to
  1.1612 +// output byte streams.  Encoding classes are parameterized macros
  1.1613 +// used by Machine Instruction Nodes in order to generate the bit
  1.1614 +// encoding of the instruction.  Operands specify their base encoding
  1.1615 +// interface with the interface keyword.  There are currently
  1.1616 +// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
  1.1617 +// COND_INTER.  REG_INTER causes an operand to generate a function
  1.1618 +// which returns its register number when queried.  CONST_INTER causes
  1.1619 +// an operand to generate a function which returns the value of the
  1.1620 +// constant when queried.  MEMORY_INTER causes an operand to generate
  1.1621 +// four functions which return the Base Register, the Index Register,
  1.1622 +// the Scale Value, and the Offset Value of the operand when queried.
  1.1623 +// COND_INTER causes an operand to generate six functions which return
  1.1624 +// the encoding code (ie - encoding bits for the instruction)
  1.1625 +// associated with each basic boolean condition for a conditional
  1.1626 +// instruction.
  1.1627 +//
  1.1628 +// Instructions specify two basic values for encoding.  Again, a
  1.1629 +// function is available to check if the constant displacement is an
  1.1630 +// oop. They use the ins_encode keyword to specify their encoding
  1.1631 +// classes (which must be a sequence of enc_class names, and their
  1.1632 +// parameters, specified in the encoding block), and they use the
  1.1633 +// opcode keyword to specify, in order, their primary, secondary, and
  1.1634 +// tertiary opcode.  Only the opcode sections which a particular
  1.1635 +// instruction needs for encoding need to be specified.
  1.1636 +encode %{
  1.1637 +  // Build emit functions for each basic byte or larger field in the
  1.1638 +  // intel encoding scheme (opcode, rm, sib, immediate), and call them
  1.1639 +  // from C++ code in the enc_class source block.  Emit functions will
  1.1640 +  // live in the main source block for now.  In future, we can
  1.1641 +  // generalize this by adding a syntax that specifies the sizes of
  1.1642 +  // fields in an order, so that the adlc can build the emit functions
  1.1643 +  // automagically
  1.1644 +
  1.1645 +  // Emit primary opcode
  1.1646 +  enc_class OpcP
  1.1647 +  %{
  1.1648 +    emit_opcode(cbuf, $primary);
  1.1649 +  %}
  1.1650 +
  1.1651 +  // Emit secondary opcode
  1.1652 +  enc_class OpcS
  1.1653 +  %{
  1.1654 +    emit_opcode(cbuf, $secondary);
  1.1655 +  %}
  1.1656 +
  1.1657 +  // Emit tertiary opcode
  1.1658 +  enc_class OpcT
  1.1659 +  %{
  1.1660 +    emit_opcode(cbuf, $tertiary);
  1.1661 +  %}
  1.1662 +
  1.1663 +  // Emit opcode directly
  1.1664 +  enc_class Opcode(immI d8)
  1.1665 +  %{
  1.1666 +    emit_opcode(cbuf, $d8$$constant);
  1.1667 +  %}
  1.1668 +
  1.1669 +  // Emit size prefix
  1.1670 +  enc_class SizePrefix
  1.1671 +  %{
  1.1672 +    emit_opcode(cbuf, 0x66);
  1.1673 +  %}
  1.1674 +
  1.1675 +  enc_class reg(rRegI reg)
  1.1676 +  %{
  1.1677 +    emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
  1.1678 +  %}
  1.1679 +
  1.1680 +  enc_class reg_reg(rRegI dst, rRegI src)
  1.1681 +  %{
  1.1682 +    emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
  1.1683 +  %}
  1.1684 +
  1.1685 +  enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
  1.1686 +  %{
  1.1687 +    emit_opcode(cbuf, $opcode$$constant);
  1.1688 +    emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
  1.1689 +  %}
  1.1690 +
  1.1691 +  enc_class cdql_enc(no_rax_rdx_RegI div)
  1.1692 +  %{
  1.1693 +    // Full implementation of Java idiv and irem; checks for
  1.1694 +    // special case as described in JVM spec., p.243 & p.271.
  1.1695 +    //
  1.1696 +    //         normal case                           special case
  1.1697 +    //
  1.1698 +    // input : rax: dividend                         min_int
  1.1699 +    //         reg: divisor                          -1
  1.1700 +    //
  1.1701 +    // output: rax: quotient  (= rax idiv reg)       min_int
  1.1702 +    //         rdx: remainder (= rax irem reg)       0
  1.1703 +    //
  1.1704 +    //  Code sequnce:
  1.1705 +    //
  1.1706 +    //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
  1.1707 +    //    5:   75 07/08                jne    e <normal>
  1.1708 +    //    7:   33 d2                   xor    %edx,%edx
  1.1709 +    //  [div >= 8 -> offset + 1]
  1.1710 +    //  [REX_B]
  1.1711 +    //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
  1.1712 +    //    c:   74 03/04                je     11 <done>
  1.1713 +    // 000000000000000e <normal>:
  1.1714 +    //    e:   99                      cltd
  1.1715 +    //  [div >= 8 -> offset + 1]
  1.1716 +    //  [REX_B]
  1.1717 +    //    f:   f7 f9                   idiv   $div
  1.1718 +    // 0000000000000011 <done>:
  1.1719 +
  1.1720 +    // cmp    $0x80000000,%eax
  1.1721 +    emit_opcode(cbuf, 0x3d);
  1.1722 +    emit_d8(cbuf, 0x00);
  1.1723 +    emit_d8(cbuf, 0x00);
  1.1724 +    emit_d8(cbuf, 0x00);
  1.1725 +    emit_d8(cbuf, 0x80);
  1.1726 +
  1.1727 +    // jne    e <normal>
  1.1728 +    emit_opcode(cbuf, 0x75);
  1.1729 +    emit_d8(cbuf, $div$$reg < 8 ? 0x07 : 0x08);
  1.1730 +
  1.1731 +    // xor    %edx,%edx
  1.1732 +    emit_opcode(cbuf, 0x33);
  1.1733 +    emit_d8(cbuf, 0xD2);
  1.1734 +
  1.1735 +    // cmp    $0xffffffffffffffff,%ecx
  1.1736 +    if ($div$$reg >= 8) {
  1.1737 +      emit_opcode(cbuf, Assembler::REX_B);
  1.1738 +    }
  1.1739 +    emit_opcode(cbuf, 0x83);
  1.1740 +    emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
  1.1741 +    emit_d8(cbuf, 0xFF);
  1.1742 +
  1.1743 +    // je     11 <done>
  1.1744 +    emit_opcode(cbuf, 0x74);
  1.1745 +    emit_d8(cbuf, $div$$reg < 8 ? 0x03 : 0x04);
  1.1746 +
  1.1747 +    // <normal>
  1.1748 +    // cltd
  1.1749 +    emit_opcode(cbuf, 0x99);
  1.1750 +
  1.1751 +    // idivl (note: must be emitted by the user of this rule)
  1.1752 +    // <done>
  1.1753 +  %}
  1.1754 +
  1.1755 +  enc_class cdqq_enc(no_rax_rdx_RegL div)
  1.1756 +  %{
  1.1757 +    // Full implementation of Java ldiv and lrem; checks for
  1.1758 +    // special case as described in JVM spec., p.243 & p.271.
  1.1759 +    //
  1.1760 +    //         normal case                           special case
  1.1761 +    //
  1.1762 +    // input : rax: dividend                         min_long
  1.1763 +    //         reg: divisor                          -1
  1.1764 +    //
  1.1765 +    // output: rax: quotient  (= rax idiv reg)       min_long
  1.1766 +    //         rdx: remainder (= rax irem reg)       0
  1.1767 +    //
  1.1768 +    //  Code sequnce:
  1.1769 +    //
  1.1770 +    //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
  1.1771 +    //    7:   00 00 80
  1.1772 +    //    a:   48 39 d0                cmp    %rdx,%rax
  1.1773 +    //    d:   75 08                   jne    17 <normal>
  1.1774 +    //    f:   33 d2                   xor    %edx,%edx
  1.1775 +    //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
  1.1776 +    //   15:   74 05                   je     1c <done>
  1.1777 +    // 0000000000000017 <normal>:
  1.1778 +    //   17:   48 99                   cqto
  1.1779 +    //   19:   48 f7 f9                idiv   $div
  1.1780 +    // 000000000000001c <done>:
  1.1781 +
  1.1782 +    // mov    $0x8000000000000000,%rdx
  1.1783 +    emit_opcode(cbuf, Assembler::REX_W);
  1.1784 +    emit_opcode(cbuf, 0xBA);
  1.1785 +    emit_d8(cbuf, 0x00);
  1.1786 +    emit_d8(cbuf, 0x00);
  1.1787 +    emit_d8(cbuf, 0x00);
  1.1788 +    emit_d8(cbuf, 0x00);
  1.1789 +    emit_d8(cbuf, 0x00);
  1.1790 +    emit_d8(cbuf, 0x00);
  1.1791 +    emit_d8(cbuf, 0x00);
  1.1792 +    emit_d8(cbuf, 0x80);
  1.1793 +
  1.1794 +    // cmp    %rdx,%rax
  1.1795 +    emit_opcode(cbuf, Assembler::REX_W);
  1.1796 +    emit_opcode(cbuf, 0x39);
  1.1797 +    emit_d8(cbuf, 0xD0);
  1.1798 +
  1.1799 +    // jne    17 <normal>
  1.1800 +    emit_opcode(cbuf, 0x75);
  1.1801 +    emit_d8(cbuf, 0x08);
  1.1802 +
  1.1803 +    // xor    %edx,%edx
  1.1804 +    emit_opcode(cbuf, 0x33);
  1.1805 +    emit_d8(cbuf, 0xD2);
  1.1806 +
  1.1807 +    // cmp    $0xffffffffffffffff,$div
  1.1808 +    emit_opcode(cbuf, $div$$reg < 8 ? Assembler::REX_W : Assembler::REX_WB);
  1.1809 +    emit_opcode(cbuf, 0x83);
  1.1810 +    emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
  1.1811 +    emit_d8(cbuf, 0xFF);
  1.1812 +
  1.1813 +    // je     1e <done>
  1.1814 +    emit_opcode(cbuf, 0x74);
  1.1815 +    emit_d8(cbuf, 0x05);
  1.1816 +
  1.1817 +    // <normal>
  1.1818 +    // cqto
  1.1819 +    emit_opcode(cbuf, Assembler::REX_W);
  1.1820 +    emit_opcode(cbuf, 0x99);
  1.1821 +
  1.1822 +    // idivq (note: must be emitted by the user of this rule)
  1.1823 +    // <done>
  1.1824 +  %}
  1.1825 +
  1.1826 +  // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
  1.1827 +  enc_class OpcSE(immI imm)
  1.1828 +  %{
  1.1829 +    // Emit primary opcode and set sign-extend bit
  1.1830 +    // Check for 8-bit immediate, and set sign extend bit in opcode
  1.1831 +    if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
  1.1832 +      emit_opcode(cbuf, $primary | 0x02);
  1.1833 +    } else {
  1.1834 +      // 32-bit immediate
  1.1835 +      emit_opcode(cbuf, $primary);
  1.1836 +    }
  1.1837 +  %}
  1.1838 +
  1.1839 +  enc_class OpcSErm(rRegI dst, immI imm)
  1.1840 +  %{
  1.1841 +    // OpcSEr/m
  1.1842 +    int dstenc = $dst$$reg;
  1.1843 +    if (dstenc >= 8) {
  1.1844 +      emit_opcode(cbuf, Assembler::REX_B);
  1.1845 +      dstenc -= 8;
  1.1846 +    }
  1.1847 +    // Emit primary opcode and set sign-extend bit
  1.1848 +    // Check for 8-bit immediate, and set sign extend bit in opcode
  1.1849 +    if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
  1.1850 +      emit_opcode(cbuf, $primary | 0x02);
  1.1851 +    } else {
  1.1852 +      // 32-bit immediate
  1.1853 +      emit_opcode(cbuf, $primary);
  1.1854 +    }
  1.1855 +    // Emit r/m byte with secondary opcode, after primary opcode.
  1.1856 +    emit_rm(cbuf, 0x3, $secondary, dstenc);
  1.1857 +  %}
  1.1858 +
  1.1859 +  enc_class OpcSErm_wide(rRegL dst, immI imm)
  1.1860 +  %{
  1.1861 +    // OpcSEr/m
  1.1862 +    int dstenc = $dst$$reg;
  1.1863 +    if (dstenc < 8) {
  1.1864 +      emit_opcode(cbuf, Assembler::REX_W);
  1.1865 +    } else {
  1.1866 +      emit_opcode(cbuf, Assembler::REX_WB);
  1.1867 +      dstenc -= 8;
  1.1868 +    }
  1.1869 +    // Emit primary opcode and set sign-extend bit
  1.1870 +    // Check for 8-bit immediate, and set sign extend bit in opcode
  1.1871 +    if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
  1.1872 +      emit_opcode(cbuf, $primary | 0x02);
  1.1873 +    } else {
  1.1874 +      // 32-bit immediate
  1.1875 +      emit_opcode(cbuf, $primary);
  1.1876 +    }
  1.1877 +    // Emit r/m byte with secondary opcode, after primary opcode.
  1.1878 +    emit_rm(cbuf, 0x3, $secondary, dstenc);
  1.1879 +  %}
  1.1880 +
  1.1881 +  enc_class Con8or32(immI imm)
  1.1882 +  %{
  1.1883 +    // Check for 8-bit immediate, and set sign extend bit in opcode
  1.1884 +    if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
  1.1885 +      $$$emit8$imm$$constant;
  1.1886 +    } else {
  1.1887 +      // 32-bit immediate
  1.1888 +      $$$emit32$imm$$constant;
  1.1889 +    }
  1.1890 +  %}
  1.1891 +
  1.1892 +  enc_class opc2_reg(rRegI dst)
  1.1893 +  %{
  1.1894 +    // BSWAP
  1.1895 +    emit_cc(cbuf, $secondary, $dst$$reg);
  1.1896 +  %}
  1.1897 +
  1.1898 +  enc_class opc3_reg(rRegI dst)
  1.1899 +  %{
  1.1900 +    // BSWAP
  1.1901 +    emit_cc(cbuf, $tertiary, $dst$$reg);
  1.1902 +  %}
  1.1903 +
  1.1904 +  enc_class reg_opc(rRegI div)
  1.1905 +  %{
  1.1906 +    // INC, DEC, IDIV, IMOD, JMP indirect, ...
  1.1907 +    emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
  1.1908 +  %}
  1.1909 +
  1.1910 +  enc_class enc_cmov(cmpOp cop)
  1.1911 +  %{
  1.1912 +    // CMOV
  1.1913 +    $$$emit8$primary;
  1.1914 +    emit_cc(cbuf, $secondary, $cop$$cmpcode);
  1.1915 +  %}
  1.1916 +
  1.1917 +  enc_class enc_PartialSubtypeCheck()
  1.1918 +  %{
  1.1919 +    Register Rrdi = as_Register(RDI_enc); // result register
  1.1920 +    Register Rrax = as_Register(RAX_enc); // super class
  1.1921 +    Register Rrcx = as_Register(RCX_enc); // killed
  1.1922 +    Register Rrsi = as_Register(RSI_enc); // sub class
  1.1923 +    Label miss;
  1.1924 +    const bool set_cond_codes = true;
  1.1925 +
  1.1926 +    MacroAssembler _masm(&cbuf);
  1.1927 +    __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
  1.1928 +                                     NULL, &miss,
  1.1929 +                                     /*set_cond_codes:*/ true);
  1.1930 +    if ($primary) {
  1.1931 +      __ xorptr(Rrdi, Rrdi);
  1.1932 +    }
  1.1933 +    __ bind(miss);
  1.1934 +  %}
  1.1935 +
  1.1936 +  enc_class clear_avx %{
  1.1937 +    debug_only(int off0 = cbuf.insts_size());
  1.1938 +    if (ra_->C->max_vector_size() > 16) {
  1.1939 +      // Clear upper bits of YMM registers when current compiled code uses
  1.1940 +      // wide vectors to avoid AVX <-> SSE transition penalty during call.
  1.1941 +      MacroAssembler _masm(&cbuf);
  1.1942 +      __ vzeroupper();
  1.1943 +    }
  1.1944 +    debug_only(int off1 = cbuf.insts_size());
  1.1945 +    assert(off1 - off0 == clear_avx_size(), "correct size prediction");
  1.1946 +  %}
  1.1947 +
  1.1948 +  enc_class Java_To_Runtime(method meth) %{
  1.1949 +    // No relocation needed
  1.1950 +    MacroAssembler _masm(&cbuf);
  1.1951 +    __ mov64(r10, (int64_t) $meth$$method);
  1.1952 +    __ call(r10);
  1.1953 +  %}
  1.1954 +
  1.1955 +  enc_class Java_To_Interpreter(method meth)
  1.1956 +  %{
  1.1957 +    // CALL Java_To_Interpreter
  1.1958 +    // This is the instruction starting address for relocation info.
  1.1959 +    cbuf.set_insts_mark();
  1.1960 +    $$$emit8$primary;
  1.1961 +    // CALL directly to the runtime
  1.1962 +    emit_d32_reloc(cbuf,
  1.1963 +                   (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
  1.1964 +                   runtime_call_Relocation::spec(),
  1.1965 +                   RELOC_DISP32);
  1.1966 +  %}
  1.1967 +
  1.1968 +  enc_class Java_Static_Call(method meth)
  1.1969 +  %{
  1.1970 +    // JAVA STATIC CALL
  1.1971 +    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
  1.1972 +    // determine who we intended to call.
  1.1973 +    cbuf.set_insts_mark();
  1.1974 +    $$$emit8$primary;
  1.1975 +
  1.1976 +    if (!_method) {
  1.1977 +      emit_d32_reloc(cbuf,
  1.1978 +                     (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
  1.1979 +                     runtime_call_Relocation::spec(),
  1.1980 +                     RELOC_DISP32);
  1.1981 +    } else if (_optimized_virtual) {
  1.1982 +      emit_d32_reloc(cbuf,
  1.1983 +                     (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
  1.1984 +                     opt_virtual_call_Relocation::spec(),
  1.1985 +                     RELOC_DISP32);
  1.1986 +    } else {
  1.1987 +      emit_d32_reloc(cbuf,
  1.1988 +                     (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
  1.1989 +                     static_call_Relocation::spec(),
  1.1990 +                     RELOC_DISP32);
  1.1991 +    }
  1.1992 +    if (_method) {
  1.1993 +      // Emit stub for static call.
  1.1994 +      CompiledStaticCall::emit_to_interp_stub(cbuf);
  1.1995 +    }
  1.1996 +  %}
  1.1997 +
  1.1998 +  enc_class Java_Dynamic_Call(method meth) %{
  1.1999 +    MacroAssembler _masm(&cbuf);
  1.2000 +    __ ic_call((address)$meth$$method);
  1.2001 +  %}
  1.2002 +
  1.2003 +  enc_class Java_Compiled_Call(method meth)
  1.2004 +  %{
  1.2005 +    // JAVA COMPILED CALL
  1.2006 +    int disp = in_bytes(Method:: from_compiled_offset());
  1.2007 +
  1.2008 +    // XXX XXX offset is 128 is 1.5 NON-PRODUCT !!!
  1.2009 +    // assert(-0x80 <= disp && disp < 0x80, "compiled_code_offset isn't small");
  1.2010 +
  1.2011 +    // callq *disp(%rax)
  1.2012 +    cbuf.set_insts_mark();
  1.2013 +    $$$emit8$primary;
  1.2014 +    if (disp < 0x80) {
  1.2015 +      emit_rm(cbuf, 0x01, $secondary, RAX_enc); // R/M byte
  1.2016 +      emit_d8(cbuf, disp); // Displacement
  1.2017 +    } else {
  1.2018 +      emit_rm(cbuf, 0x02, $secondary, RAX_enc); // R/M byte
  1.2019 +      emit_d32(cbuf, disp); // Displacement
  1.2020 +    }
  1.2021 +  %}
  1.2022 +
  1.2023 +  enc_class reg_opc_imm(rRegI dst, immI8 shift)
  1.2024 +  %{
  1.2025 +    // SAL, SAR, SHR
  1.2026 +    int dstenc = $dst$$reg;
  1.2027 +    if (dstenc >= 8) {
  1.2028 +      emit_opcode(cbuf, Assembler::REX_B);
  1.2029 +      dstenc -= 8;
  1.2030 +    }
  1.2031 +    $$$emit8$primary;
  1.2032 +    emit_rm(cbuf, 0x3, $secondary, dstenc);
  1.2033 +    $$$emit8$shift$$constant;
  1.2034 +  %}
  1.2035 +
  1.2036 +  enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
  1.2037 +  %{
  1.2038 +    // SAL, SAR, SHR
  1.2039 +    int dstenc = $dst$$reg;
  1.2040 +    if (dstenc < 8) {
  1.2041 +      emit_opcode(cbuf, Assembler::REX_W);
  1.2042 +    } else {
  1.2043 +      emit_opcode(cbuf, Assembler::REX_WB);
  1.2044 +      dstenc -= 8;
  1.2045 +    }
  1.2046 +    $$$emit8$primary;
  1.2047 +    emit_rm(cbuf, 0x3, $secondary, dstenc);
  1.2048 +    $$$emit8$shift$$constant;
  1.2049 +  %}
  1.2050 +
  1.2051 +  enc_class load_immI(rRegI dst, immI src)
  1.2052 +  %{
  1.2053 +    int dstenc = $dst$$reg;
  1.2054 +    if (dstenc >= 8) {
  1.2055 +      emit_opcode(cbuf, Assembler::REX_B);
  1.2056 +      dstenc -= 8;
  1.2057 +    }
  1.2058 +    emit_opcode(cbuf, 0xB8 | dstenc);
  1.2059 +    $$$emit32$src$$constant;
  1.2060 +  %}
  1.2061 +
  1.2062 +  enc_class load_immL(rRegL dst, immL src)
  1.2063 +  %{
  1.2064 +    int dstenc = $dst$$reg;
  1.2065 +    if (dstenc < 8) {
  1.2066 +      emit_opcode(cbuf, Assembler::REX_W);
  1.2067 +    } else {
  1.2068 +      emit_opcode(cbuf, Assembler::REX_WB);
  1.2069 +      dstenc -= 8;
  1.2070 +    }
  1.2071 +    emit_opcode(cbuf, 0xB8 | dstenc);
  1.2072 +    emit_d64(cbuf, $src$$constant);
  1.2073 +  %}
  1.2074 +
  1.2075 +  enc_class load_immUL32(rRegL dst, immUL32 src)
  1.2076 +  %{
  1.2077 +    // same as load_immI, but this time we care about zeroes in the high word
  1.2078 +    int dstenc = $dst$$reg;
  1.2079 +    if (dstenc >= 8) {
  1.2080 +      emit_opcode(cbuf, Assembler::REX_B);
  1.2081 +      dstenc -= 8;
  1.2082 +    }
  1.2083 +    emit_opcode(cbuf, 0xB8 | dstenc);
  1.2084 +    $$$emit32$src$$constant;
  1.2085 +  %}
  1.2086 +
  1.2087 +  enc_class load_immL32(rRegL dst, immL32 src)
  1.2088 +  %{
  1.2089 +    int dstenc = $dst$$reg;
  1.2090 +    if (dstenc < 8) {
  1.2091 +      emit_opcode(cbuf, Assembler::REX_W);
  1.2092 +    } else {
  1.2093 +      emit_opcode(cbuf, Assembler::REX_WB);
  1.2094 +      dstenc -= 8;
  1.2095 +    }
  1.2096 +    emit_opcode(cbuf, 0xC7);
  1.2097 +    emit_rm(cbuf, 0x03, 0x00, dstenc);
  1.2098 +    $$$emit32$src$$constant;
  1.2099 +  %}
  1.2100 +
  1.2101 +  enc_class load_immP31(rRegP dst, immP32 src)
  1.2102 +  %{
  1.2103 +    // same as load_immI, but this time we care about zeroes in the high word
  1.2104 +    int dstenc = $dst$$reg;
  1.2105 +    if (dstenc >= 8) {
  1.2106 +      emit_opcode(cbuf, Assembler::REX_B);
  1.2107 +      dstenc -= 8;
  1.2108 +    }
  1.2109 +    emit_opcode(cbuf, 0xB8 | dstenc);
  1.2110 +    $$$emit32$src$$constant;
  1.2111 +  %}
  1.2112 +
  1.2113 +  enc_class load_immP(rRegP dst, immP src)
  1.2114 +  %{
  1.2115 +    int dstenc = $dst$$reg;
  1.2116 +    if (dstenc < 8) {
  1.2117 +      emit_opcode(cbuf, Assembler::REX_W);
  1.2118 +    } else {
  1.2119 +      emit_opcode(cbuf, Assembler::REX_WB);
  1.2120 +      dstenc -= 8;
  1.2121 +    }
  1.2122 +    emit_opcode(cbuf, 0xB8 | dstenc);
  1.2123 +    // This next line should be generated from ADLC
  1.2124 +    if ($src->constant_reloc() != relocInfo::none) {
  1.2125 +      emit_d64_reloc(cbuf, $src$$constant, $src->constant_reloc(), RELOC_IMM64);
  1.2126 +    } else {
  1.2127 +      emit_d64(cbuf, $src$$constant);
  1.2128 +    }
  1.2129 +  %}
  1.2130 +
  1.2131 +  enc_class Con32(immI src)
  1.2132 +  %{
  1.2133 +    // Output immediate
  1.2134 +    $$$emit32$src$$constant;
  1.2135 +  %}
  1.2136 +
  1.2137 +  enc_class Con32F_as_bits(immF src)
  1.2138 +  %{
  1.2139 +    // Output Float immediate bits
  1.2140 +    jfloat jf = $src$$constant;
  1.2141 +    jint jf_as_bits = jint_cast(jf);
  1.2142 +    emit_d32(cbuf, jf_as_bits);
  1.2143 +  %}
  1.2144 +
  1.2145 +  enc_class Con16(immI src)
  1.2146 +  %{
  1.2147 +    // Output immediate
  1.2148 +    $$$emit16$src$$constant;
  1.2149 +  %}
  1.2150 +
  1.2151 +  // How is this different from Con32??? XXX
  1.2152 +  enc_class Con_d32(immI src)
  1.2153 +  %{
  1.2154 +    emit_d32(cbuf,$src$$constant);
  1.2155 +  %}
  1.2156 +
  1.2157 +  enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
  1.2158 +    // Output immediate memory reference
  1.2159 +    emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
  1.2160 +    emit_d32(cbuf, 0x00);
  1.2161 +  %}
  1.2162 +
  1.2163 +  enc_class lock_prefix()
  1.2164 +  %{
  1.2165 +    if (os::is_MP()) {
  1.2166 +      emit_opcode(cbuf, 0xF0); // lock
  1.2167 +    }
  1.2168 +  %}
  1.2169 +
  1.2170 +  enc_class REX_mem(memory mem)
  1.2171 +  %{
  1.2172 +    if ($mem$$base >= 8) {
  1.2173 +      if ($mem$$index < 8) {
  1.2174 +        emit_opcode(cbuf, Assembler::REX_B);
  1.2175 +      } else {
  1.2176 +        emit_opcode(cbuf, Assembler::REX_XB);
  1.2177 +      }
  1.2178 +    } else {
  1.2179 +      if ($mem$$index >= 8) {
  1.2180 +        emit_opcode(cbuf, Assembler::REX_X);
  1.2181 +      }
  1.2182 +    }
  1.2183 +  %}
  1.2184 +
  1.2185 +  enc_class REX_mem_wide(memory mem)
  1.2186 +  %{
  1.2187 +    if ($mem$$base >= 8) {
  1.2188 +      if ($mem$$index < 8) {
  1.2189 +        emit_opcode(cbuf, Assembler::REX_WB);
  1.2190 +      } else {
  1.2191 +        emit_opcode(cbuf, Assembler::REX_WXB);
  1.2192 +      }
  1.2193 +    } else {
  1.2194 +      if ($mem$$index < 8) {
  1.2195 +        emit_opcode(cbuf, Assembler::REX_W);
  1.2196 +      } else {
  1.2197 +        emit_opcode(cbuf, Assembler::REX_WX);
  1.2198 +      }
  1.2199 +    }
  1.2200 +  %}
  1.2201 +
  1.2202 +  // for byte regs
  1.2203 +  enc_class REX_breg(rRegI reg)
  1.2204 +  %{
  1.2205 +    if ($reg$$reg >= 4) {
  1.2206 +      emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
  1.2207 +    }
  1.2208 +  %}
  1.2209 +
  1.2210 +  // for byte regs
  1.2211 +  enc_class REX_reg_breg(rRegI dst, rRegI src)
  1.2212 +  %{
  1.2213 +    if ($dst$$reg < 8) {
  1.2214 +      if ($src$$reg >= 4) {
  1.2215 +        emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
  1.2216 +      }
  1.2217 +    } else {
  1.2218 +      if ($src$$reg < 8) {
  1.2219 +        emit_opcode(cbuf, Assembler::REX_R);
  1.2220 +      } else {
  1.2221 +        emit_opcode(cbuf, Assembler::REX_RB);
  1.2222 +      }
  1.2223 +    }
  1.2224 +  %}
  1.2225 +
  1.2226 +  // for byte regs
  1.2227 +  enc_class REX_breg_mem(rRegI reg, memory mem)
  1.2228 +  %{
  1.2229 +    if ($reg$$reg < 8) {
  1.2230 +      if ($mem$$base < 8) {
  1.2231 +        if ($mem$$index >= 8) {
  1.2232 +          emit_opcode(cbuf, Assembler::REX_X);
  1.2233 +        } else if ($reg$$reg >= 4) {
  1.2234 +          emit_opcode(cbuf, Assembler::REX);
  1.2235 +        }
  1.2236 +      } else {
  1.2237 +        if ($mem$$index < 8) {
  1.2238 +          emit_opcode(cbuf, Assembler::REX_B);
  1.2239 +        } else {
  1.2240 +          emit_opcode(cbuf, Assembler::REX_XB);
  1.2241 +        }
  1.2242 +      }
  1.2243 +    } else {
  1.2244 +      if ($mem$$base < 8) {
  1.2245 +        if ($mem$$index < 8) {
  1.2246 +          emit_opcode(cbuf, Assembler::REX_R);
  1.2247 +        } else {
  1.2248 +          emit_opcode(cbuf, Assembler::REX_RX);
  1.2249 +        }
  1.2250 +      } else {
  1.2251 +        if ($mem$$index < 8) {
  1.2252 +          emit_opcode(cbuf, Assembler::REX_RB);
  1.2253 +        } else {
  1.2254 +          emit_opcode(cbuf, Assembler::REX_RXB);
  1.2255 +        }
  1.2256 +      }
  1.2257 +    }
  1.2258 +  %}
  1.2259 +
  1.2260 +  enc_class REX_reg(rRegI reg)
  1.2261 +  %{
  1.2262 +    if ($reg$$reg >= 8) {
  1.2263 +      emit_opcode(cbuf, Assembler::REX_B);
  1.2264 +    }
  1.2265 +  %}
  1.2266 +
  1.2267 +  enc_class REX_reg_wide(rRegI reg)
  1.2268 +  %{
  1.2269 +    if ($reg$$reg < 8) {
  1.2270 +      emit_opcode(cbuf, Assembler::REX_W);
  1.2271 +    } else {
  1.2272 +      emit_opcode(cbuf, Assembler::REX_WB);
  1.2273 +    }
  1.2274 +  %}
  1.2275 +
  1.2276 +  enc_class REX_reg_reg(rRegI dst, rRegI src)
  1.2277 +  %{
  1.2278 +    if ($dst$$reg < 8) {
  1.2279 +      if ($src$$reg >= 8) {
  1.2280 +        emit_opcode(cbuf, Assembler::REX_B);
  1.2281 +      }
  1.2282 +    } else {
  1.2283 +      if ($src$$reg < 8) {
  1.2284 +        emit_opcode(cbuf, Assembler::REX_R);
  1.2285 +      } else {
  1.2286 +        emit_opcode(cbuf, Assembler::REX_RB);
  1.2287 +      }
  1.2288 +    }
  1.2289 +  %}
  1.2290 +
  1.2291 +  enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
  1.2292 +  %{
  1.2293 +    if ($dst$$reg < 8) {
  1.2294 +      if ($src$$reg < 8) {
  1.2295 +        emit_opcode(cbuf, Assembler::REX_W);
  1.2296 +      } else {
  1.2297 +        emit_opcode(cbuf, Assembler::REX_WB);
  1.2298 +      }
  1.2299 +    } else {
  1.2300 +      if ($src$$reg < 8) {
  1.2301 +        emit_opcode(cbuf, Assembler::REX_WR);
  1.2302 +      } else {
  1.2303 +        emit_opcode(cbuf, Assembler::REX_WRB);
  1.2304 +      }
  1.2305 +    }
  1.2306 +  %}
  1.2307 +
  1.2308 +  enc_class REX_reg_mem(rRegI reg, memory mem)
  1.2309 +  %{
  1.2310 +    if ($reg$$reg < 8) {
  1.2311 +      if ($mem$$base < 8) {
  1.2312 +        if ($mem$$index >= 8) {
  1.2313 +          emit_opcode(cbuf, Assembler::REX_X);
  1.2314 +        }
  1.2315 +      } else {
  1.2316 +        if ($mem$$index < 8) {
  1.2317 +          emit_opcode(cbuf, Assembler::REX_B);
  1.2318 +        } else {
  1.2319 +          emit_opcode(cbuf, Assembler::REX_XB);
  1.2320 +        }
  1.2321 +      }
  1.2322 +    } else {
  1.2323 +      if ($mem$$base < 8) {
  1.2324 +        if ($mem$$index < 8) {
  1.2325 +          emit_opcode(cbuf, Assembler::REX_R);
  1.2326 +        } else {
  1.2327 +          emit_opcode(cbuf, Assembler::REX_RX);
  1.2328 +        }
  1.2329 +      } else {
  1.2330 +        if ($mem$$index < 8) {
  1.2331 +          emit_opcode(cbuf, Assembler::REX_RB);
  1.2332 +        } else {
  1.2333 +          emit_opcode(cbuf, Assembler::REX_RXB);
  1.2334 +        }
  1.2335 +      }
  1.2336 +    }
  1.2337 +  %}
  1.2338 +
  1.2339 +  enc_class REX_reg_mem_wide(rRegL reg, memory mem)
  1.2340 +  %{
  1.2341 +    if ($reg$$reg < 8) {
  1.2342 +      if ($mem$$base < 8) {
  1.2343 +        if ($mem$$index < 8) {
  1.2344 +          emit_opcode(cbuf, Assembler::REX_W);
  1.2345 +        } else {
  1.2346 +          emit_opcode(cbuf, Assembler::REX_WX);
  1.2347 +        }
  1.2348 +      } else {
  1.2349 +        if ($mem$$index < 8) {
  1.2350 +          emit_opcode(cbuf, Assembler::REX_WB);
  1.2351 +        } else {
  1.2352 +          emit_opcode(cbuf, Assembler::REX_WXB);
  1.2353 +        }
  1.2354 +      }
  1.2355 +    } else {
  1.2356 +      if ($mem$$base < 8) {
  1.2357 +        if ($mem$$index < 8) {
  1.2358 +          emit_opcode(cbuf, Assembler::REX_WR);
  1.2359 +        } else {
  1.2360 +          emit_opcode(cbuf, Assembler::REX_WRX);
  1.2361 +        }
  1.2362 +      } else {
  1.2363 +        if ($mem$$index < 8) {
  1.2364 +          emit_opcode(cbuf, Assembler::REX_WRB);
  1.2365 +        } else {
  1.2366 +          emit_opcode(cbuf, Assembler::REX_WRXB);
  1.2367 +        }
  1.2368 +      }
  1.2369 +    }
  1.2370 +  %}
  1.2371 +
  1.2372 +  enc_class reg_mem(rRegI ereg, memory mem)
  1.2373 +  %{
  1.2374 +    // High registers handle in encode_RegMem
  1.2375 +    int reg = $ereg$$reg;
  1.2376 +    int base = $mem$$base;
  1.2377 +    int index = $mem$$index;
  1.2378 +    int scale = $mem$$scale;
  1.2379 +    int disp = $mem$$disp;
  1.2380 +    relocInfo::relocType disp_reloc = $mem->disp_reloc();
  1.2381 +
  1.2382 +    encode_RegMem(cbuf, reg, base, index, scale, disp, disp_reloc);
  1.2383 +  %}
  1.2384 +
  1.2385 +  enc_class RM_opc_mem(immI rm_opcode, memory mem)
  1.2386 +  %{
  1.2387 +    int rm_byte_opcode = $rm_opcode$$constant;
  1.2388 +
  1.2389 +    // High registers handle in encode_RegMem
  1.2390 +    int base = $mem$$base;
  1.2391 +    int index = $mem$$index;
  1.2392 +    int scale = $mem$$scale;
  1.2393 +    int displace = $mem$$disp;
  1.2394 +
  1.2395 +    relocInfo::relocType disp_reloc = $mem->disp_reloc();       // disp-as-oop when
  1.2396 +                                            // working with static
  1.2397 +                                            // globals
  1.2398 +    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
  1.2399 +                  disp_reloc);
  1.2400 +  %}
  1.2401 +
  1.2402 +  enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
  1.2403 +  %{
  1.2404 +    int reg_encoding = $dst$$reg;
  1.2405 +    int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
  1.2406 +    int index        = 0x04;            // 0x04 indicates no index
  1.2407 +    int scale        = 0x00;            // 0x00 indicates no scale
  1.2408 +    int displace     = $src1$$constant; // 0x00 indicates no displacement
  1.2409 +    relocInfo::relocType disp_reloc = relocInfo::none;
  1.2410 +    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
  1.2411 +                  disp_reloc);
  1.2412 +  %}
  1.2413 +
  1.2414 +  enc_class neg_reg(rRegI dst)
  1.2415 +  %{
  1.2416 +    int dstenc = $dst$$reg;
  1.2417 +    if (dstenc >= 8) {
  1.2418 +      emit_opcode(cbuf, Assembler::REX_B);
  1.2419 +      dstenc -= 8;
  1.2420 +    }
  1.2421 +    // NEG $dst
  1.2422 +    emit_opcode(cbuf, 0xF7);
  1.2423 +    emit_rm(cbuf, 0x3, 0x03, dstenc);
  1.2424 +  %}
  1.2425 +
  1.2426 +  enc_class neg_reg_wide(rRegI dst)
  1.2427 +  %{
  1.2428 +    int dstenc = $dst$$reg;
  1.2429 +    if (dstenc < 8) {
  1.2430 +      emit_opcode(cbuf, Assembler::REX_W);
  1.2431 +    } else {
  1.2432 +      emit_opcode(cbuf, Assembler::REX_WB);
  1.2433 +      dstenc -= 8;
  1.2434 +    }
  1.2435 +    // NEG $dst
  1.2436 +    emit_opcode(cbuf, 0xF7);
  1.2437 +    emit_rm(cbuf, 0x3, 0x03, dstenc);
  1.2438 +  %}
  1.2439 +
  1.2440 +  enc_class setLT_reg(rRegI dst)
  1.2441 +  %{
  1.2442 +    int dstenc = $dst$$reg;
  1.2443 +    if (dstenc >= 8) {
  1.2444 +      emit_opcode(cbuf, Assembler::REX_B);
  1.2445 +      dstenc -= 8;
  1.2446 +    } else if (dstenc >= 4) {
  1.2447 +      emit_opcode(cbuf, Assembler::REX);
  1.2448 +    }
  1.2449 +    // SETLT $dst
  1.2450 +    emit_opcode(cbuf, 0x0F);
  1.2451 +    emit_opcode(cbuf, 0x9C);
  1.2452 +    emit_rm(cbuf, 0x3, 0x0, dstenc);
  1.2453 +  %}
  1.2454 +
  1.2455 +  enc_class setNZ_reg(rRegI dst)
  1.2456 +  %{
  1.2457 +    int dstenc = $dst$$reg;
  1.2458 +    if (dstenc >= 8) {
  1.2459 +      emit_opcode(cbuf, Assembler::REX_B);
  1.2460 +      dstenc -= 8;
  1.2461 +    } else if (dstenc >= 4) {
  1.2462 +      emit_opcode(cbuf, Assembler::REX);
  1.2463 +    }
  1.2464 +    // SETNZ $dst
  1.2465 +    emit_opcode(cbuf, 0x0F);
  1.2466 +    emit_opcode(cbuf, 0x95);
  1.2467 +    emit_rm(cbuf, 0x3, 0x0, dstenc);
  1.2468 +  %}
  1.2469 +
  1.2470 +
  1.2471 +  // Compare the lonogs and set -1, 0, or 1 into dst
  1.2472 +  enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
  1.2473 +  %{
  1.2474 +    int src1enc = $src1$$reg;
  1.2475 +    int src2enc = $src2$$reg;
  1.2476 +    int dstenc = $dst$$reg;
  1.2477 +
  1.2478 +    // cmpq $src1, $src2
  1.2479 +    if (src1enc < 8) {
  1.2480 +      if (src2enc < 8) {
  1.2481 +        emit_opcode(cbuf, Assembler::REX_W);
  1.2482 +      } else {
  1.2483 +        emit_opcode(cbuf, Assembler::REX_WB);
  1.2484 +      }
  1.2485 +    } else {
  1.2486 +      if (src2enc < 8) {
  1.2487 +        emit_opcode(cbuf, Assembler::REX_WR);
  1.2488 +      } else {
  1.2489 +        emit_opcode(cbuf, Assembler::REX_WRB);
  1.2490 +      }
  1.2491 +    }
  1.2492 +    emit_opcode(cbuf, 0x3B);
  1.2493 +    emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
  1.2494 +
  1.2495 +    // movl $dst, -1
  1.2496 +    if (dstenc >= 8) {
  1.2497 +      emit_opcode(cbuf, Assembler::REX_B);
  1.2498 +    }
  1.2499 +    emit_opcode(cbuf, 0xB8 | (dstenc & 7));
  1.2500 +    emit_d32(cbuf, -1);
  1.2501 +
  1.2502 +    // jl,s done
  1.2503 +    emit_opcode(cbuf, 0x7C);
  1.2504 +    emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
  1.2505 +
  1.2506 +    // setne $dst
  1.2507 +    if (dstenc >= 4) {
  1.2508 +      emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
  1.2509 +    }
  1.2510 +    emit_opcode(cbuf, 0x0F);
  1.2511 +    emit_opcode(cbuf, 0x95);
  1.2512 +    emit_opcode(cbuf, 0xC0 | (dstenc & 7));
  1.2513 +
  1.2514 +    // movzbl $dst, $dst
  1.2515 +    if (dstenc >= 4) {
  1.2516 +      emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
  1.2517 +    }
  1.2518 +    emit_opcode(cbuf, 0x0F);
  1.2519 +    emit_opcode(cbuf, 0xB6);
  1.2520 +    emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
  1.2521 +  %}
  1.2522 +
  1.2523 +  enc_class Push_ResultXD(regD dst) %{
  1.2524 +    MacroAssembler _masm(&cbuf);
  1.2525 +    __ fstp_d(Address(rsp, 0));
  1.2526 +    __ movdbl($dst$$XMMRegister, Address(rsp, 0));
  1.2527 +    __ addptr(rsp, 8);
  1.2528 +  %}
  1.2529 +
  1.2530 +  enc_class Push_SrcXD(regD src) %{
  1.2531 +    MacroAssembler _masm(&cbuf);
  1.2532 +    __ subptr(rsp, 8);
  1.2533 +    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
  1.2534 +    __ fld_d(Address(rsp, 0));
  1.2535 +  %}
  1.2536 +
  1.2537 +
  1.2538 +  enc_class enc_rethrow()
  1.2539 +  %{
  1.2540 +    cbuf.set_insts_mark();
  1.2541 +    emit_opcode(cbuf, 0xE9); // jmp entry
  1.2542 +    emit_d32_reloc(cbuf,
  1.2543 +                   (int) (OptoRuntime::rethrow_stub() - cbuf.insts_end() - 4),
  1.2544 +                   runtime_call_Relocation::spec(),
  1.2545 +                   RELOC_DISP32);
  1.2546 +  %}
  1.2547 +
  1.2548 +%}
  1.2549 +
  1.2550 +
  1.2551 +
  1.2552 +//----------FRAME--------------------------------------------------------------
  1.2553 +// Definition of frame structure and management information.
  1.2554 +//
  1.2555 +//  S T A C K   L A Y O U T    Allocators stack-slot number
  1.2556 +//                             |   (to get allocators register number
  1.2557 +//  G  Owned by    |        |  v    add OptoReg::stack0())
  1.2558 +//  r   CALLER     |        |
  1.2559 +//  o     |        +--------+      pad to even-align allocators stack-slot
  1.2560 +//  w     V        |  pad0  |        numbers; owned by CALLER
  1.2561 +//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
  1.2562 +//  h     ^        |   in   |  5
  1.2563 +//        |        |  args  |  4   Holes in incoming args owned by SELF
  1.2564 +//  |     |        |        |  3
  1.2565 +//  |     |        +--------+
  1.2566 +//  V     |        | old out|      Empty on Intel, window on Sparc
  1.2567 +//        |    old |preserve|      Must be even aligned.
  1.2568 +//        |     SP-+--------+----> Matcher::_old_SP, even aligned
  1.2569 +//        |        |   in   |  3   area for Intel ret address
  1.2570 +//     Owned by    |preserve|      Empty on Sparc.
  1.2571 +//       SELF      +--------+
  1.2572 +//        |        |  pad2  |  2   pad to align old SP
  1.2573 +//        |        +--------+  1
  1.2574 +//        |        | locks  |  0
  1.2575 +//        |        +--------+----> OptoReg::stack0(), even aligned
  1.2576 +//        |        |  pad1  | 11   pad to align new SP
  1.2577 +//        |        +--------+
  1.2578 +//        |        |        | 10
  1.2579 +//        |        | spills |  9   spills
  1.2580 +//        V        |        |  8   (pad0 slot for callee)
  1.2581 +//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
  1.2582 +//        ^        |  out   |  7
  1.2583 +//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
  1.2584 +//     Owned by    +--------+
  1.2585 +//      CALLEE     | new out|  6   Empty on Intel, window on Sparc
  1.2586 +//        |    new |preserve|      Must be even-aligned.
  1.2587 +//        |     SP-+--------+----> Matcher::_new_SP, even aligned
  1.2588 +//        |        |        |
  1.2589 +//
  1.2590 +// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
  1.2591 +//         known from SELF's arguments and the Java calling convention.
  1.2592 +//         Region 6-7 is determined per call site.
  1.2593 +// Note 2: If the calling convention leaves holes in the incoming argument
  1.2594 +//         area, those holes are owned by SELF.  Holes in the outgoing area
  1.2595 +//         are owned by the CALLEE.  Holes should not be nessecary in the
  1.2596 +//         incoming area, as the Java calling convention is completely under
  1.2597 +//         the control of the AD file.  Doubles can be sorted and packed to
  1.2598 +//         avoid holes.  Holes in the outgoing arguments may be nessecary for
  1.2599 +//         varargs C calling conventions.
  1.2600 +// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
  1.2601 +//         even aligned with pad0 as needed.
  1.2602 +//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
  1.2603 +//         region 6-11 is even aligned; it may be padded out more so that
  1.2604 +//         the region from SP to FP meets the minimum stack alignment.
  1.2605 +// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
  1.2606 +//         alignment.  Region 11, pad1, may be dynamically extended so that
  1.2607 +//         SP meets the minimum alignment.
  1.2608 +
  1.2609 +frame
  1.2610 +%{
  1.2611 +  // What direction does stack grow in (assumed to be same for C & Java)
  1.2612 +  stack_direction(TOWARDS_LOW);
  1.2613 +
  1.2614 +  // These three registers define part of the calling convention
  1.2615 +  // between compiled code and the interpreter.
  1.2616 +  inline_cache_reg(RAX);                // Inline Cache Register
  1.2617 +  interpreter_method_oop_reg(RBX);      // Method Oop Register when
  1.2618 +                                        // calling interpreter
  1.2619 +
  1.2620 +  // Optional: name the operand used by cisc-spilling to access
  1.2621 +  // [stack_pointer + offset]
  1.2622 +  cisc_spilling_operand_name(indOffset32);
  1.2623 +
  1.2624 +  // Number of stack slots consumed by locking an object
  1.2625 +  sync_stack_slots(2);
  1.2626 +
  1.2627 +  // Compiled code's Frame Pointer
  1.2628 +  frame_pointer(RSP);
  1.2629 +
  1.2630 +  // Interpreter stores its frame pointer in a register which is
  1.2631 +  // stored to the stack by I2CAdaptors.
  1.2632 +  // I2CAdaptors convert from interpreted java to compiled java.
  1.2633 +  interpreter_frame_pointer(RBP);
  1.2634 +
  1.2635 +  // Stack alignment requirement
  1.2636 +  stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
  1.2637 +
  1.2638 +  // Number of stack slots between incoming argument block and the start of
  1.2639 +  // a new frame.  The PROLOG must add this many slots to the stack.  The
  1.2640 +  // EPILOG must remove this many slots.  amd64 needs two slots for
  1.2641 +  // return address.
  1.2642 +  in_preserve_stack_slots(4 + 2 * VerifyStackAtCalls);
  1.2643 +
  1.2644 +  // Number of outgoing stack slots killed above the out_preserve_stack_slots
  1.2645 +  // for calls to C.  Supports the var-args backing area for register parms.
  1.2646 +  varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
  1.2647 +
  1.2648 +  // The after-PROLOG location of the return address.  Location of
  1.2649 +  // return address specifies a type (REG or STACK) and a number
  1.2650 +  // representing the register number (i.e. - use a register name) or
  1.2651 +  // stack slot.
  1.2652 +  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
  1.2653 +  // Otherwise, it is above the locks and verification slot and alignment word
  1.2654 +  return_addr(STACK - 2 +
  1.2655 +              round_to((Compile::current()->in_preserve_stack_slots() +
  1.2656 +                        Compile::current()->fixed_slots()),
  1.2657 +                       stack_alignment_in_slots()));
  1.2658 +
  1.2659 +  // Body of function which returns an integer array locating
  1.2660 +  // arguments either in registers or in stack slots.  Passed an array
  1.2661 +  // of ideal registers called "sig" and a "length" count.  Stack-slot
  1.2662 +  // offsets are based on outgoing arguments, i.e. a CALLER setting up
  1.2663 +  // arguments for a CALLEE.  Incoming stack arguments are
  1.2664 +  // automatically biased by the preserve_stack_slots field above.
  1.2665 +
  1.2666 +  calling_convention
  1.2667 +  %{
  1.2668 +    // No difference between ingoing/outgoing just pass false
  1.2669 +    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
  1.2670 +  %}
  1.2671 +
  1.2672 +  c_calling_convention
  1.2673 +  %{
  1.2674 +    // This is obviously always outgoing
  1.2675 +    (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
  1.2676 +  %}
  1.2677 +
  1.2678 +  // Location of compiled Java return values.  Same as C for now.
  1.2679 +  return_value
  1.2680 +  %{
  1.2681 +    assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
  1.2682 +           "only return normal values");
  1.2683 +
  1.2684 +    static const int lo[Op_RegL + 1] = {
  1.2685 +      0,
  1.2686 +      0,
  1.2687 +      RAX_num,  // Op_RegN
  1.2688 +      RAX_num,  // Op_RegI
  1.2689 +      RAX_num,  // Op_RegP
  1.2690 +      XMM0_num, // Op_RegF
  1.2691 +      XMM0_num, // Op_RegD
  1.2692 +      RAX_num   // Op_RegL
  1.2693 +    };
  1.2694 +    static const int hi[Op_RegL + 1] = {
  1.2695 +      0,
  1.2696 +      0,
  1.2697 +      OptoReg::Bad, // Op_RegN
  1.2698 +      OptoReg::Bad, // Op_RegI
  1.2699 +      RAX_H_num,    // Op_RegP
  1.2700 +      OptoReg::Bad, // Op_RegF
  1.2701 +      XMM0b_num,    // Op_RegD
  1.2702 +      RAX_H_num     // Op_RegL
  1.2703 +    };
  1.2704 +    // Excluded flags and vector registers.
  1.2705 +    assert(ARRAY_SIZE(hi) == _last_machine_leaf - 5, "missing type");
  1.2706 +    return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
  1.2707 +  %}
  1.2708 +%}
  1.2709 +
  1.2710 +//----------ATTRIBUTES---------------------------------------------------------
  1.2711 +//----------Operand Attributes-------------------------------------------------
  1.2712 +op_attrib op_cost(0);        // Required cost attribute
  1.2713 +
  1.2714 +//----------Instruction Attributes---------------------------------------------
  1.2715 +ins_attrib ins_cost(100);       // Required cost attribute
  1.2716 +ins_attrib ins_size(8);         // Required size attribute (in bits)
  1.2717 +ins_attrib ins_short_branch(0); // Required flag: is this instruction
  1.2718 +                                // a non-matching short branch variant
  1.2719 +                                // of some long branch?
  1.2720 +ins_attrib ins_alignment(1);    // Required alignment attribute (must
  1.2721 +                                // be a power of 2) specifies the
  1.2722 +                                // alignment that some part of the
  1.2723 +                                // instruction (not necessarily the
  1.2724 +                                // start) requires.  If > 1, a
  1.2725 +                                // compute_padding() function must be
  1.2726 +                                // provided for the instruction
  1.2727 +
  1.2728 +//----------OPERANDS-----------------------------------------------------------
  1.2729 +// Operand definitions must precede instruction definitions for correct parsing
  1.2730 +// in the ADLC because operands constitute user defined types which are used in
  1.2731 +// instruction definitions.
  1.2732 +
  1.2733 +//----------Simple Operands----------------------------------------------------
  1.2734 +// Immediate Operands
  1.2735 +// Integer Immediate
  1.2736 +operand immI()
  1.2737 +%{
  1.2738 +  match(ConI);
  1.2739 +
  1.2740 +  op_cost(10);
  1.2741 +  format %{ %}
  1.2742 +  interface(CONST_INTER);
  1.2743 +%}
  1.2744 +
  1.2745 +// Constant for test vs zero
  1.2746 +operand immI0()
  1.2747 +%{
  1.2748 +  predicate(n->get_int() == 0);
  1.2749 +  match(ConI);
  1.2750 +
  1.2751 +  op_cost(0);
  1.2752 +  format %{ %}
  1.2753 +  interface(CONST_INTER);
  1.2754 +%}
  1.2755 +
  1.2756 +// Constant for increment
  1.2757 +operand immI1()
  1.2758 +%{
  1.2759 +  predicate(n->get_int() == 1);
  1.2760 +  match(ConI);
  1.2761 +
  1.2762 +  op_cost(0);
  1.2763 +  format %{ %}
  1.2764 +  interface(CONST_INTER);
  1.2765 +%}
  1.2766 +
  1.2767 +// Constant for decrement
  1.2768 +operand immI_M1()
  1.2769 +%{
  1.2770 +  predicate(n->get_int() == -1);
  1.2771 +  match(ConI);
  1.2772 +
  1.2773 +  op_cost(0);
  1.2774 +  format %{ %}
  1.2775 +  interface(CONST_INTER);
  1.2776 +%}
  1.2777 +
  1.2778 +// Valid scale values for addressing modes
  1.2779 +operand immI2()
  1.2780 +%{
  1.2781 +  predicate(0 <= n->get_int() && (n->get_int() <= 3));
  1.2782 +  match(ConI);
  1.2783 +
  1.2784 +  format %{ %}
  1.2785 +  interface(CONST_INTER);
  1.2786 +%}
  1.2787 +
  1.2788 +operand immI8()
  1.2789 +%{
  1.2790 +  predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
  1.2791 +  match(ConI);
  1.2792 +
  1.2793 +  op_cost(5);
  1.2794 +  format %{ %}
  1.2795 +  interface(CONST_INTER);
  1.2796 +%}
  1.2797 +
  1.2798 +operand immI16()
  1.2799 +%{
  1.2800 +  predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
  1.2801 +  match(ConI);
  1.2802 +
  1.2803 +  op_cost(10);
  1.2804 +  format %{ %}
  1.2805 +  interface(CONST_INTER);
  1.2806 +%}
  1.2807 +
  1.2808 +// Int Immediate non-negative
  1.2809 +operand immU31()
  1.2810 +%{
  1.2811 +  predicate(n->get_int() >= 0);
  1.2812 +  match(ConI);
  1.2813 +
  1.2814 +  op_cost(0);
  1.2815 +  format %{ %}
  1.2816 +  interface(CONST_INTER);
  1.2817 +%}
  1.2818 +
  1.2819 +// Constant for long shifts
  1.2820 +operand immI_32()
  1.2821 +%{
  1.2822 +  predicate( n->get_int() == 32 );
  1.2823 +  match(ConI);
  1.2824 +
  1.2825 +  op_cost(0);
  1.2826 +  format %{ %}
  1.2827 +  interface(CONST_INTER);
  1.2828 +%}
  1.2829 +
  1.2830 +// Constant for long shifts
  1.2831 +operand immI_64()
  1.2832 +%{
  1.2833 +  predicate( n->get_int() == 64 );
  1.2834 +  match(ConI);
  1.2835 +
  1.2836 +  op_cost(0);
  1.2837 +  format %{ %}
  1.2838 +  interface(CONST_INTER);
  1.2839 +%}
  1.2840 +
  1.2841 +// Pointer Immediate
  1.2842 +operand immP()
  1.2843 +%{
  1.2844 +  match(ConP);
  1.2845 +
  1.2846 +  op_cost(10);
  1.2847 +  format %{ %}
  1.2848 +  interface(CONST_INTER);
  1.2849 +%}
  1.2850 +
  1.2851 +// NULL Pointer Immediate
  1.2852 +operand immP0()
  1.2853 +%{
  1.2854 +  predicate(n->get_ptr() == 0);
  1.2855 +  match(ConP);
  1.2856 +
  1.2857 +  op_cost(5);
  1.2858 +  format %{ %}
  1.2859 +  interface(CONST_INTER);
  1.2860 +%}
  1.2861 +
  1.2862 +// Pointer Immediate
  1.2863 +operand immN() %{
  1.2864 +  match(ConN);
  1.2865 +
  1.2866 +  op_cost(10);
  1.2867 +  format %{ %}
  1.2868 +  interface(CONST_INTER);
  1.2869 +%}
  1.2870 +
  1.2871 +operand immNKlass() %{
  1.2872 +  match(ConNKlass);
  1.2873 +
  1.2874 +  op_cost(10);
  1.2875 +  format %{ %}
  1.2876 +  interface(CONST_INTER);
  1.2877 +%}
  1.2878 +
  1.2879 +// NULL Pointer Immediate
  1.2880 +operand immN0() %{
  1.2881 +  predicate(n->get_narrowcon() == 0);
  1.2882 +  match(ConN);
  1.2883 +
  1.2884 +  op_cost(5);
  1.2885 +  format %{ %}
  1.2886 +  interface(CONST_INTER);
  1.2887 +%}
  1.2888 +
  1.2889 +operand immP31()
  1.2890 +%{
  1.2891 +  predicate(n->as_Type()->type()->reloc() == relocInfo::none
  1.2892 +            && (n->get_ptr() >> 31) == 0);
  1.2893 +  match(ConP);
  1.2894 +
  1.2895 +  op_cost(5);
  1.2896 +  format %{ %}
  1.2897 +  interface(CONST_INTER);
  1.2898 +%}
  1.2899 +
  1.2900 +
  1.2901 +// Long Immediate
  1.2902 +operand immL()
  1.2903 +%{
  1.2904 +  match(ConL);
  1.2905 +
  1.2906 +  op_cost(20);
  1.2907 +  format %{ %}
  1.2908 +  interface(CONST_INTER);
  1.2909 +%}
  1.2910 +
  1.2911 +// Long Immediate 8-bit
  1.2912 +operand immL8()
  1.2913 +%{
  1.2914 +  predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
  1.2915 +  match(ConL);
  1.2916 +
  1.2917 +  op_cost(5);
  1.2918 +  format %{ %}
  1.2919 +  interface(CONST_INTER);
  1.2920 +%}
  1.2921 +
  1.2922 +// Long Immediate 32-bit unsigned
  1.2923 +operand immUL32()
  1.2924 +%{
  1.2925 +  predicate(n->get_long() == (unsigned int) (n->get_long()));
  1.2926 +  match(ConL);
  1.2927 +
  1.2928 +  op_cost(10);
  1.2929 +  format %{ %}
  1.2930 +  interface(CONST_INTER);
  1.2931 +%}
  1.2932 +
  1.2933 +// Long Immediate 32-bit signed
  1.2934 +operand immL32()
  1.2935 +%{
  1.2936 +  predicate(n->get_long() == (int) (n->get_long()));
  1.2937 +  match(ConL);
  1.2938 +
  1.2939 +  op_cost(15);
  1.2940 +  format %{ %}
  1.2941 +  interface(CONST_INTER);
  1.2942 +%}
  1.2943 +
  1.2944 +// Long Immediate zero
  1.2945 +operand immL0()
  1.2946 +%{
  1.2947 +  predicate(n->get_long() == 0L);
  1.2948 +  match(ConL);
  1.2949 +
  1.2950 +  op_cost(10);
  1.2951 +  format %{ %}
  1.2952 +  interface(CONST_INTER);
  1.2953 +%}
  1.2954 +
  1.2955 +// Constant for increment
  1.2956 +operand immL1()
  1.2957 +%{
  1.2958 +  predicate(n->get_long() == 1);
  1.2959 +  match(ConL);
  1.2960 +
  1.2961 +  format %{ %}
  1.2962 +  interface(CONST_INTER);
  1.2963 +%}
  1.2964 +
  1.2965 +// Constant for decrement
  1.2966 +operand immL_M1()
  1.2967 +%{
  1.2968 +  predicate(n->get_long() == -1);
  1.2969 +  match(ConL);
  1.2970 +
  1.2971 +  format %{ %}
  1.2972 +  interface(CONST_INTER);
  1.2973 +%}
  1.2974 +
  1.2975 +// Long Immediate: the value 10
  1.2976 +operand immL10()
  1.2977 +%{
  1.2978 +  predicate(n->get_long() == 10);
  1.2979 +  match(ConL);
  1.2980 +
  1.2981 +  format %{ %}
  1.2982 +  interface(CONST_INTER);
  1.2983 +%}
  1.2984 +
  1.2985 +// Long immediate from 0 to 127.
  1.2986 +// Used for a shorter form of long mul by 10.
  1.2987 +operand immL_127()
  1.2988 +%{
  1.2989 +  predicate(0 <= n->get_long() && n->get_long() < 0x80);
  1.2990 +  match(ConL);
  1.2991 +
  1.2992 +  op_cost(10);
  1.2993 +  format %{ %}
  1.2994 +  interface(CONST_INTER);
  1.2995 +%}
  1.2996 +
  1.2997 +// Long Immediate: low 32-bit mask
  1.2998 +operand immL_32bits()
  1.2999 +%{
  1.3000 +  predicate(n->get_long() == 0xFFFFFFFFL);
  1.3001 +  match(ConL);
  1.3002 +  op_cost(20);
  1.3003 +
  1.3004 +  format %{ %}
  1.3005 +  interface(CONST_INTER);
  1.3006 +%}
  1.3007 +
  1.3008 +// Float Immediate zero
  1.3009 +operand immF0()
  1.3010 +%{
  1.3011 +  predicate(jint_cast(n->getf()) == 0);
  1.3012 +  match(ConF);
  1.3013 +
  1.3014 +  op_cost(5);
  1.3015 +  format %{ %}
  1.3016 +  interface(CONST_INTER);
  1.3017 +%}
  1.3018 +
  1.3019 +// Float Immediate
  1.3020 +operand immF()
  1.3021 +%{
  1.3022 +  match(ConF);
  1.3023 +
  1.3024 +  op_cost(15);
  1.3025 +  format %{ %}
  1.3026 +  interface(CONST_INTER);
  1.3027 +%}
  1.3028 +
  1.3029 +// Double Immediate zero
  1.3030 +operand immD0()
  1.3031 +%{
  1.3032 +  predicate(jlong_cast(n->getd()) == 0);
  1.3033 +  match(ConD);
  1.3034 +
  1.3035 +  op_cost(5);
  1.3036 +  format %{ %}
  1.3037 +  interface(CONST_INTER);
  1.3038 +%}
  1.3039 +
  1.3040 +// Double Immediate
  1.3041 +operand immD()
  1.3042 +%{
  1.3043 +  match(ConD);
  1.3044 +
  1.3045 +  op_cost(15);
  1.3046 +  format %{ %}
  1.3047 +  interface(CONST_INTER);
  1.3048 +%}
  1.3049 +
  1.3050 +// Immediates for special shifts (sign extend)
  1.3051 +
  1.3052 +// Constants for increment
  1.3053 +operand immI_16()
  1.3054 +%{
  1.3055 +  predicate(n->get_int() == 16);
  1.3056 +  match(ConI);
  1.3057 +
  1.3058 +  format %{ %}
  1.3059 +  interface(CONST_INTER);
  1.3060 +%}
  1.3061 +
  1.3062 +operand immI_24()
  1.3063 +%{
  1.3064 +  predicate(n->get_int() == 24);
  1.3065 +  match(ConI);
  1.3066 +
  1.3067 +  format %{ %}
  1.3068 +  interface(CONST_INTER);
  1.3069 +%}
  1.3070 +
  1.3071 +// Constant for byte-wide masking
  1.3072 +operand immI_255()
  1.3073 +%{
  1.3074 +  predicate(n->get_int() == 255);
  1.3075 +  match(ConI);
  1.3076 +
  1.3077 +  format %{ %}
  1.3078 +  interface(CONST_INTER);
  1.3079 +%}
  1.3080 +
  1.3081 +// Constant for short-wide masking
  1.3082 +operand immI_65535()
  1.3083 +%{
  1.3084 +  predicate(n->get_int() == 65535);
  1.3085 +  match(ConI);
  1.3086 +
  1.3087 +  format %{ %}
  1.3088 +  interface(CONST_INTER);
  1.3089 +%}
  1.3090 +
  1.3091 +// Constant for byte-wide masking
  1.3092 +operand immL_255()
  1.3093 +%{
  1.3094 +  predicate(n->get_long() == 255);
  1.3095 +  match(ConL);
  1.3096 +
  1.3097 +  format %{ %}
  1.3098 +  interface(CONST_INTER);
  1.3099 +%}
  1.3100 +
  1.3101 +// Constant for short-wide masking
  1.3102 +operand immL_65535()
  1.3103 +%{
  1.3104 +  predicate(n->get_long() == 65535);
  1.3105 +  match(ConL);
  1.3106 +
  1.3107 +  format %{ %}
  1.3108 +  interface(CONST_INTER);
  1.3109 +%}
  1.3110 +
  1.3111 +// Register Operands
  1.3112 +// Integer Register
  1.3113 +operand rRegI()
  1.3114 +%{
  1.3115 +  constraint(ALLOC_IN_RC(int_reg));
  1.3116 +  match(RegI);
  1.3117 +
  1.3118 +  match(rax_RegI);
  1.3119 +  match(rbx_RegI);
  1.3120 +  match(rcx_RegI);
  1.3121 +  match(rdx_RegI);
  1.3122 +  match(rdi_RegI);
  1.3123 +
  1.3124 +  format %{ %}
  1.3125 +  interface(REG_INTER);
  1.3126 +%}
  1.3127 +
  1.3128 +// Special Registers
  1.3129 +operand rax_RegI()
  1.3130 +%{
  1.3131 +  constraint(ALLOC_IN_RC(int_rax_reg));
  1.3132 +  match(RegI);
  1.3133 +  match(rRegI);
  1.3134 +
  1.3135 +  format %{ "RAX" %}
  1.3136 +  interface(REG_INTER);
  1.3137 +%}
  1.3138 +
  1.3139 +// Special Registers
  1.3140 +operand rbx_RegI()
  1.3141 +%{
  1.3142 +  constraint(ALLOC_IN_RC(int_rbx_reg));
  1.3143 +  match(RegI);
  1.3144 +  match(rRegI);
  1.3145 +
  1.3146 +  format %{ "RBX" %}
  1.3147 +  interface(REG_INTER);
  1.3148 +%}
  1.3149 +
  1.3150 +operand rcx_RegI()
  1.3151 +%{
  1.3152 +  constraint(ALLOC_IN_RC(int_rcx_reg));
  1.3153 +  match(RegI);
  1.3154 +  match(rRegI);
  1.3155 +
  1.3156 +  format %{ "RCX" %}
  1.3157 +  interface(REG_INTER);
  1.3158 +%}
  1.3159 +
  1.3160 +operand rdx_RegI()
  1.3161 +%{
  1.3162 +  constraint(ALLOC_IN_RC(int_rdx_reg));
  1.3163 +  match(RegI);
  1.3164 +  match(rRegI);
  1.3165 +
  1.3166 +  format %{ "RDX" %}
  1.3167 +  interface(REG_INTER);
  1.3168 +%}
  1.3169 +
  1.3170 +operand rdi_RegI()
  1.3171 +%{
  1.3172 +  constraint(ALLOC_IN_RC(int_rdi_reg));
  1.3173 +  match(RegI);
  1.3174 +  match(rRegI);
  1.3175 +
  1.3176 +  format %{ "RDI" %}
  1.3177 +  interface(REG_INTER);
  1.3178 +%}
  1.3179 +
  1.3180 +operand no_rcx_RegI()
  1.3181 +%{
  1.3182 +  constraint(ALLOC_IN_RC(int_no_rcx_reg));
  1.3183 +  match(RegI);
  1.3184 +  match(rax_RegI);
  1.3185 +  match(rbx_RegI);
  1.3186 +  match(rdx_RegI);
  1.3187 +  match(rdi_RegI);
  1.3188 +
  1.3189 +  format %{ %}
  1.3190 +  interface(REG_INTER);
  1.3191 +%}
  1.3192 +
  1.3193 +operand no_rax_rdx_RegI()
  1.3194 +%{
  1.3195 +  constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
  1.3196 +  match(RegI);
  1.3197 +  match(rbx_RegI);
  1.3198 +  match(rcx_RegI);
  1.3199 +  match(rdi_RegI);
  1.3200 +
  1.3201 +  format %{ %}
  1.3202 +  interface(REG_INTER);
  1.3203 +%}
  1.3204 +
  1.3205 +// Pointer Register
  1.3206 +operand any_RegP()
  1.3207 +%{
  1.3208 +  constraint(ALLOC_IN_RC(any_reg));
  1.3209 +  match(RegP);
  1.3210 +  match(rax_RegP);
  1.3211 +  match(rbx_RegP);
  1.3212 +  match(rdi_RegP);
  1.3213 +  match(rsi_RegP);
  1.3214 +  match(rbp_RegP);
  1.3215 +  match(r15_RegP);
  1.3216 +  match(rRegP);
  1.3217 +
  1.3218 +  format %{ %}
  1.3219 +  interface(REG_INTER);
  1.3220 +%}
  1.3221 +
  1.3222 +operand rRegP()
  1.3223 +%{
  1.3224 +  constraint(ALLOC_IN_RC(ptr_reg));
  1.3225 +  match(RegP);
  1.3226 +  match(rax_RegP);
  1.3227 +  match(rbx_RegP);
  1.3228 +  match(rdi_RegP);
  1.3229 +  match(rsi_RegP);
  1.3230 +  match(rbp_RegP);
  1.3231 +  match(r15_RegP);  // See Q&A below about r15_RegP.
  1.3232 +
  1.3233 +  format %{ %}
  1.3234 +  interface(REG_INTER);
  1.3235 +%}
  1.3236 +
  1.3237 +operand rRegN() %{
  1.3238 +  constraint(ALLOC_IN_RC(int_reg));
  1.3239 +  match(RegN);
  1.3240 +
  1.3241 +  format %{ %}
  1.3242 +  interface(REG_INTER);
  1.3243 +%}
  1.3244 +
  1.3245 +// Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
  1.3246 +// Answer: Operand match rules govern the DFA as it processes instruction inputs.
  1.3247 +// It's fine for an instruction input which expects rRegP to match a r15_RegP.
  1.3248 +// The output of an instruction is controlled by the allocator, which respects
  1.3249 +// register class masks, not match rules.  Unless an instruction mentions
  1.3250 +// r15_RegP or any_RegP explicitly as its output, r15 will not be considered
  1.3251 +// by the allocator as an input.
  1.3252 +
  1.3253 +operand no_rax_RegP()
  1.3254 +%{
  1.3255 +  constraint(ALLOC_IN_RC(ptr_no_rax_reg));
  1.3256 +  match(RegP);
  1.3257 +  match(rbx_RegP);
  1.3258 +  match(rsi_RegP);
  1.3259 +  match(rdi_RegP);
  1.3260 +
  1.3261 +  format %{ %}
  1.3262 +  interface(REG_INTER);
  1.3263 +%}
  1.3264 +
  1.3265 +operand no_rbp_RegP()
  1.3266 +%{
  1.3267 +  constraint(ALLOC_IN_RC(ptr_no_rbp_reg));
  1.3268 +  match(RegP);
  1.3269 +  match(rbx_RegP);
  1.3270 +  match(rsi_RegP);
  1.3271 +  match(rdi_RegP);
  1.3272 +
  1.3273 +  format %{ %}
  1.3274 +  interface(REG_INTER);
  1.3275 +%}
  1.3276 +
  1.3277 +operand no_rax_rbx_RegP()
  1.3278 +%{
  1.3279 +  constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
  1.3280 +  match(RegP);
  1.3281 +  match(rsi_RegP);
  1.3282 +  match(rdi_RegP);
  1.3283 +
  1.3284 +  format %{ %}
  1.3285 +  interface(REG_INTER);
  1.3286 +%}
  1.3287 +
  1.3288 +// Special Registers
  1.3289 +// Return a pointer value
  1.3290 +operand rax_RegP()
  1.3291 +%{
  1.3292 +  constraint(ALLOC_IN_RC(ptr_rax_reg));
  1.3293 +  match(RegP);
  1.3294 +  match(rRegP);
  1.3295 +
  1.3296 +  format %{ %}
  1.3297 +  interface(REG_INTER);
  1.3298 +%}
  1.3299 +
  1.3300 +// Special Registers
  1.3301 +// Return a compressed pointer value
  1.3302 +operand rax_RegN()
  1.3303 +%{
  1.3304 +  constraint(ALLOC_IN_RC(int_rax_reg));
  1.3305 +  match(RegN);
  1.3306 +  match(rRegN);
  1.3307 +
  1.3308 +  format %{ %}
  1.3309 +  interface(REG_INTER);
  1.3310 +%}
  1.3311 +
  1.3312 +// Used in AtomicAdd
  1.3313 +operand rbx_RegP()
  1.3314 +%{
  1.3315 +  constraint(ALLOC_IN_RC(ptr_rbx_reg));
  1.3316 +  match(RegP);
  1.3317 +  match(rRegP);
  1.3318 +
  1.3319 +  format %{ %}
  1.3320 +  interface(REG_INTER);
  1.3321 +%}
  1.3322 +
  1.3323 +operand rsi_RegP()
  1.3324 +%{
  1.3325 +  constraint(ALLOC_IN_RC(ptr_rsi_reg));
  1.3326 +  match(RegP);
  1.3327 +  match(rRegP);
  1.3328 +
  1.3329 +  format %{ %}
  1.3330 +  interface(REG_INTER);
  1.3331 +%}
  1.3332 +
  1.3333 +// Used in rep stosq
  1.3334 +operand rdi_RegP()
  1.3335 +%{
  1.3336 +  constraint(ALLOC_IN_RC(ptr_rdi_reg));
  1.3337 +  match(RegP);
  1.3338 +  match(rRegP);
  1.3339 +
  1.3340 +  format %{ %}
  1.3341 +  interface(REG_INTER);
  1.3342 +%}
  1.3343 +
  1.3344 +operand rbp_RegP()
  1.3345 +%{
  1.3346 +  constraint(ALLOC_IN_RC(ptr_rbp_reg));
  1.3347 +  match(RegP);
  1.3348 +  match(rRegP);
  1.3349 +
  1.3350 +  format %{ %}
  1.3351 +  interface(REG_INTER);
  1.3352 +%}
  1.3353 +
  1.3354 +operand r15_RegP()
  1.3355 +%{
  1.3356 +  constraint(ALLOC_IN_RC(ptr_r15_reg));
  1.3357 +  match(RegP);
  1.3358 +  match(rRegP);
  1.3359 +
  1.3360 +  format %{ %}
  1.3361 +  interface(REG_INTER);
  1.3362 +%}
  1.3363 +
  1.3364 +operand rRegL()
  1.3365 +%{
  1.3366 +  constraint(ALLOC_IN_RC(long_reg));
  1.3367 +  match(RegL);
  1.3368 +  match(rax_RegL);
  1.3369 +  match(rdx_RegL);
  1.3370 +
  1.3371 +  format %{ %}
  1.3372 +  interface(REG_INTER);
  1.3373 +%}
  1.3374 +
  1.3375 +// Special Registers
  1.3376 +operand no_rax_rdx_RegL()
  1.3377 +%{
  1.3378 +  constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
  1.3379 +  match(RegL);
  1.3380 +  match(rRegL);
  1.3381 +
  1.3382 +  format %{ %}
  1.3383 +  interface(REG_INTER);
  1.3384 +%}
  1.3385 +
  1.3386 +operand no_rax_RegL()
  1.3387 +%{
  1.3388 +  constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
  1.3389 +  match(RegL);
  1.3390 +  match(rRegL);
  1.3391 +  match(rdx_RegL);
  1.3392 +
  1.3393 +  format %{ %}
  1.3394 +  interface(REG_INTER);
  1.3395 +%}
  1.3396 +
  1.3397 +operand no_rcx_RegL()
  1.3398 +%{
  1.3399 +  constraint(ALLOC_IN_RC(long_no_rcx_reg));
  1.3400 +  match(RegL);
  1.3401 +  match(rRegL);
  1.3402 +
  1.3403 +  format %{ %}
  1.3404 +  interface(REG_INTER);
  1.3405 +%}
  1.3406 +
  1.3407 +operand rax_RegL()
  1.3408 +%{
  1.3409 +  constraint(ALLOC_IN_RC(long_rax_reg));
  1.3410 +  match(RegL);
  1.3411 +  match(rRegL);
  1.3412 +
  1.3413 +  format %{ "RAX" %}
  1.3414 +  interface(REG_INTER);
  1.3415 +%}
  1.3416 +
  1.3417 +operand rcx_RegL()
  1.3418 +%{
  1.3419 +  constraint(ALLOC_IN_RC(long_rcx_reg));
  1.3420 +  match(RegL);
  1.3421 +  match(rRegL);
  1.3422 +
  1.3423 +  format %{ %}
  1.3424 +  interface(REG_INTER);
  1.3425 +%}
  1.3426 +
  1.3427 +operand rdx_RegL()
  1.3428 +%{
  1.3429 +  constraint(ALLOC_IN_RC(long_rdx_reg));
  1.3430 +  match(RegL);
  1.3431 +  match(rRegL);
  1.3432 +
  1.3433 +  format %{ %}
  1.3434 +  interface(REG_INTER);
  1.3435 +%}
  1.3436 +
  1.3437 +// Flags register, used as output of compare instructions
  1.3438 +operand rFlagsReg()
  1.3439 +%{
  1.3440 +  constraint(ALLOC_IN_RC(int_flags));
  1.3441 +  match(RegFlags);
  1.3442 +
  1.3443 +  format %{ "RFLAGS" %}
  1.3444 +  interface(REG_INTER);
  1.3445 +%}
  1.3446 +
  1.3447 +// Flags register, used as output of FLOATING POINT compare instructions
  1.3448 +operand rFlagsRegU()
  1.3449 +%{
  1.3450 +  constraint(ALLOC_IN_RC(int_flags));
  1.3451 +  match(RegFlags);
  1.3452 +
  1.3453 +  format %{ "RFLAGS_U" %}
  1.3454 +  interface(REG_INTER);
  1.3455 +%}
  1.3456 +
  1.3457 +operand rFlagsRegUCF() %{
  1.3458 +  constraint(ALLOC_IN_RC(int_flags));
  1.3459 +  match(RegFlags);
  1.3460 +  predicate(false);
  1.3461 +
  1.3462 +  format %{ "RFLAGS_U_CF" %}
  1.3463 +  interface(REG_INTER);
  1.3464 +%}
  1.3465 +
  1.3466 +// Float register operands
  1.3467 +operand regF()
  1.3468 +%{
  1.3469 +  constraint(ALLOC_IN_RC(float_reg));
  1.3470 +  match(RegF);
  1.3471 +
  1.3472 +  format %{ %}
  1.3473 +  interface(REG_INTER);
  1.3474 +%}
  1.3475 +
  1.3476 +// Double register operands
  1.3477 +operand regD()
  1.3478 +%{
  1.3479 +  constraint(ALLOC_IN_RC(double_reg));
  1.3480 +  match(RegD);
  1.3481 +
  1.3482 +  format %{ %}
  1.3483 +  interface(REG_INTER);
  1.3484 +%}
  1.3485 +
  1.3486 +//----------Memory Operands----------------------------------------------------
  1.3487 +// Direct Memory Operand
  1.3488 +// operand direct(immP addr)
  1.3489 +// %{
  1.3490 +//   match(addr);
  1.3491 +
  1.3492 +//   format %{ "[$addr]" %}
  1.3493 +//   interface(MEMORY_INTER) %{
  1.3494 +//     base(0xFFFFFFFF);
  1.3495 +//     index(0x4);
  1.3496 +//     scale(0x0);
  1.3497 +//     disp($addr);
  1.3498 +//   %}
  1.3499 +// %}
  1.3500 +
  1.3501 +// Indirect Memory Operand
  1.3502 +operand indirect(any_RegP reg)
  1.3503 +%{
  1.3504 +  constraint(ALLOC_IN_RC(ptr_reg));
  1.3505 +  match(reg);
  1.3506 +
  1.3507 +  format %{ "[$reg]" %}
  1.3508 +  interface(MEMORY_INTER) %{
  1.3509 +    base($reg);
  1.3510 +    index(0x4);
  1.3511 +    scale(0x0);
  1.3512 +    disp(0x0);
  1.3513 +  %}
  1.3514 +%}
  1.3515 +
  1.3516 +// Indirect Memory Plus Short Offset Operand
  1.3517 +operand indOffset8(any_RegP reg, immL8 off)
  1.3518 +%{
  1.3519 +  constraint(ALLOC_IN_RC(ptr_reg));
  1.3520 +  match(AddP reg off);
  1.3521 +
  1.3522 +  format %{ "[$reg + $off (8-bit)]" %}
  1.3523 +  interface(MEMORY_INTER) %{
  1.3524 +    base($reg);
  1.3525 +    index(0x4);
  1.3526 +    scale(0x0);
  1.3527 +    disp($off);
  1.3528 +  %}
  1.3529 +%}
  1.3530 +
  1.3531 +// Indirect Memory Plus Long Offset Operand
  1.3532 +operand indOffset32(any_RegP reg, immL32 off)
  1.3533 +%{
  1.3534 +  constraint(ALLOC_IN_RC(ptr_reg));
  1.3535 +  match(AddP reg off);
  1.3536 +
  1.3537 +  format %{ "[$reg + $off (32-bit)]" %}
  1.3538 +  interface(MEMORY_INTER) %{
  1.3539 +    base($reg);
  1.3540 +    index(0x4);
  1.3541 +    scale(0x0);
  1.3542 +    disp($off);
  1.3543 +  %}
  1.3544 +%}
  1.3545 +
  1.3546 +// Indirect Memory Plus Index Register Plus Offset Operand
  1.3547 +operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
  1.3548 +%{
  1.3549 +  constraint(ALLOC_IN_RC(ptr_reg));
  1.3550 +  match(AddP (AddP reg lreg) off);
  1.3551 +
  1.3552 +  op_cost(10);
  1.3553 +  format %{"[$reg + $off + $lreg]" %}
  1.3554 +  interface(MEMORY_INTER) %{
  1.3555 +    base($reg);
  1.3556 +    index($lreg);
  1.3557 +    scale(0x0);
  1.3558 +    disp($off);
  1.3559 +  %}
  1.3560 +%}
  1.3561 +
  1.3562 +// Indirect Memory Plus Index Register Plus Offset Operand
  1.3563 +operand indIndex(any_RegP reg, rRegL lreg)
  1.3564 +%{
  1.3565 +  constraint(ALLOC_IN_RC(ptr_reg));
  1.3566 +  match(AddP reg lreg);
  1.3567 +
  1.3568 +  op_cost(10);
  1.3569 +  format %{"[$reg + $lreg]" %}
  1.3570 +  interface(MEMORY_INTER) %{
  1.3571 +    base($reg);
  1.3572 +    index($lreg);
  1.3573 +    scale(0x0);
  1.3574 +    disp(0x0);
  1.3575 +  %}
  1.3576 +%}
  1.3577 +
  1.3578 +// Indirect Memory Times Scale Plus Index Register
  1.3579 +operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
  1.3580 +%{
  1.3581 +  constraint(ALLOC_IN_RC(ptr_reg));
  1.3582 +  match(AddP reg (LShiftL lreg scale));
  1.3583 +
  1.3584 +  op_cost(10);
  1.3585 +  format %{"[$reg + $lreg << $scale]" %}
  1.3586 +  interface(MEMORY_INTER) %{
  1.3587 +    base($reg);
  1.3588 +    index($lreg);
  1.3589 +    scale($scale);
  1.3590 +    disp(0x0);
  1.3591 +  %}
  1.3592 +%}
  1.3593 +
  1.3594 +// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
  1.3595 +operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
  1.3596 +%{
  1.3597 +  constraint(ALLOC_IN_RC(ptr_reg));
  1.3598 +  match(AddP (AddP reg (LShiftL lreg scale)) off);
  1.3599 +
  1.3600 +  op_cost(10);
  1.3601 +  format %{"[$reg + $off + $lreg << $scale]" %}
  1.3602 +  interface(MEMORY_INTER) %{
  1.3603 +    base($reg);
  1.3604 +    index($lreg);
  1.3605 +    scale($scale);
  1.3606 +    disp($off);
  1.3607 +  %}
  1.3608 +%}
  1.3609 +
  1.3610 +// Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
  1.3611 +operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
  1.3612 +%{
  1.3613 +  constraint(ALLOC_IN_RC(ptr_reg));
  1.3614 +  predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
  1.3615 +  match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
  1.3616 +
  1.3617 +  op_cost(10);
  1.3618 +  format %{"[$reg + $off + $idx << $scale]" %}
  1.3619 +  interface(MEMORY_INTER) %{
  1.3620 +    base($reg);
  1.3621 +    index($idx);
  1.3622 +    scale($scale);
  1.3623 +    disp($off);
  1.3624 +  %}
  1.3625 +%}
  1.3626 +
  1.3627 +// Indirect Narrow Oop Plus Offset Operand
  1.3628 +// Note: x86 architecture doesn't support "scale * index + offset" without a base
  1.3629 +// we can't free r12 even with Universe::narrow_oop_base() == NULL.
  1.3630 +operand indCompressedOopOffset(rRegN reg, immL32 off) %{
  1.3631 +  predicate(UseCompressedOops && (Universe::narrow_oop_shift() == Address::times_8));
  1.3632 +  constraint(ALLOC_IN_RC(ptr_reg));
  1.3633 +  match(AddP (DecodeN reg) off);
  1.3634 +
  1.3635 +  op_cost(10);
  1.3636 +  format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
  1.3637 +  interface(MEMORY_INTER) %{
  1.3638 +    base(0xc); // R12
  1.3639 +    index($reg);
  1.3640 +    scale(0x3);
  1.3641 +    disp($off);
  1.3642 +  %}
  1.3643 +%}
  1.3644 +
  1.3645 +// Indirect Memory Operand
  1.3646 +operand indirectNarrow(rRegN reg)
  1.3647 +%{
  1.3648 +  predicate(Universe::narrow_oop_shift() == 0);
  1.3649 +  constraint(ALLOC_IN_RC(ptr_reg));
  1.3650 +  match(DecodeN reg);
  1.3651 +
  1.3652 +  format %{ "[$reg]" %}
  1.3653 +  interface(MEMORY_INTER) %{
  1.3654 +    base($reg);
  1.3655 +    index(0x4);
  1.3656 +    scale(0x0);
  1.3657 +    disp(0x0);
  1.3658 +  %}
  1.3659 +%}
  1.3660 +
  1.3661 +// Indirect Memory Plus Short Offset Operand
  1.3662 +operand indOffset8Narrow(rRegN reg, immL8 off)
  1.3663 +%{
  1.3664 +  predicate(Universe::narrow_oop_shift() == 0);
  1.3665 +  constraint(ALLOC_IN_RC(ptr_reg));
  1.3666 +  match(AddP (DecodeN reg) off);
  1.3667 +
  1.3668 +  format %{ "[$reg + $off (8-bit)]" %}
  1.3669 +  interface(MEMORY_INTER) %{
  1.3670 +    base($reg);
  1.3671 +    index(0x4);
  1.3672 +    scale(0x0);
  1.3673 +    disp($off);
  1.3674 +  %}
  1.3675 +%}
  1.3676 +
  1.3677 +// Indirect Memory Plus Long Offset Operand
  1.3678 +operand indOffset32Narrow(rRegN reg, immL32 off)
  1.3679 +%{
  1.3680 +  predicate(Universe::narrow_oop_shift() == 0);
  1.3681 +  constraint(ALLOC_IN_RC(ptr_reg));
  1.3682 +  match(AddP (DecodeN reg) off);
  1.3683 +
  1.3684 +  format %{ "[$reg + $off (32-bit)]" %}
  1.3685 +  interface(MEMORY_INTER) %{
  1.3686 +    base($reg);
  1.3687 +    index(0x4);
  1.3688 +    scale(0x0);
  1.3689 +    disp($off);
  1.3690 +  %}
  1.3691 +%}
  1.3692 +
  1.3693 +// Indirect Memory Plus Index Register Plus Offset Operand
  1.3694 +operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
  1.3695 +%{
  1.3696 +  predicate(Universe::narrow_oop_shift() == 0);
  1.3697 +  constraint(ALLOC_IN_RC(ptr_reg));
  1.3698 +  match(AddP (AddP (DecodeN reg) lreg) off);
  1.3699 +
  1.3700 +  op_cost(10);
  1.3701 +  format %{"[$reg + $off + $lreg]" %}
  1.3702 +  interface(MEMORY_INTER) %{
  1.3703 +    base($reg);
  1.3704 +    index($lreg);
  1.3705 +    scale(0x0);
  1.3706 +    disp($off);
  1.3707 +  %}
  1.3708 +%}
  1.3709 +
  1.3710 +// Indirect Memory Plus Index Register Plus Offset Operand
  1.3711 +operand indIndexNarrow(rRegN reg, rRegL lreg)
  1.3712 +%{
  1.3713 +  predicate(Universe::narrow_oop_shift() == 0);
  1.3714 +  constraint(ALLOC_IN_RC(ptr_reg));
  1.3715 +  match(AddP (DecodeN reg) lreg);
  1.3716 +
  1.3717 +  op_cost(10);
  1.3718 +  format %{"[$reg + $lreg]" %}
  1.3719 +  interface(MEMORY_INTER) %{
  1.3720 +    base($reg);
  1.3721 +    index($lreg);
  1.3722 +    scale(0x0);
  1.3723 +    disp(0x0);
  1.3724 +  %}
  1.3725 +%}
  1.3726 +
  1.3727 +// Indirect Memory Times Scale Plus Index Register
  1.3728 +operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
  1.3729 +%{
  1.3730 +  predicate(Universe::narrow_oop_shift() == 0);
  1.3731 +  constraint(ALLOC_IN_RC(ptr_reg));
  1.3732 +  match(AddP (DecodeN reg) (LShiftL lreg scale));
  1.3733 +
  1.3734 +  op_cost(10);
  1.3735 +  format %{"[$reg + $lreg << $scale]" %}
  1.3736 +  interface(MEMORY_INTER) %{
  1.3737 +    base($reg);
  1.3738 +    index($lreg);
  1.3739 +    scale($scale);
  1.3740 +    disp(0x0);
  1.3741 +  %}
  1.3742 +%}
  1.3743 +
  1.3744 +// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
  1.3745 +operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
  1.3746 +%{
  1.3747 +  predicate(Universe::narrow_oop_shift() == 0);
  1.3748 +  constraint(ALLOC_IN_RC(ptr_reg));
  1.3749 +  match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
  1.3750 +
  1.3751 +  op_cost(10);
  1.3752 +  format %{"[$reg + $off + $lreg << $scale]" %}
  1.3753 +  interface(MEMORY_INTER) %{
  1.3754 +    base($reg);
  1.3755 +    index($lreg);
  1.3756 +    scale($scale);
  1.3757 +    disp($off);
  1.3758 +  %}
  1.3759 +%}
  1.3760 +
  1.3761 +// Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
  1.3762 +operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
  1.3763 +%{
  1.3764 +  constraint(ALLOC_IN_RC(ptr_reg));
  1.3765 +  predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
  1.3766 +  match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
  1.3767 +
  1.3768 +  op_cost(10);
  1.3769 +  format %{"[$reg + $off + $idx << $scale]" %}
  1.3770 +  interface(MEMORY_INTER) %{
  1.3771 +    base($reg);
  1.3772 +    index($idx);
  1.3773 +    scale($scale);
  1.3774 +    disp($off);
  1.3775 +  %}
  1.3776 +%}
  1.3777 +
  1.3778 +//----------Special Memory Operands--------------------------------------------
  1.3779 +// Stack Slot Operand - This operand is used for loading and storing temporary
  1.3780 +//                      values on the stack where a match requires a value to
  1.3781 +//                      flow through memory.
  1.3782 +operand stackSlotP(sRegP reg)
  1.3783 +%{
  1.3784 +  constraint(ALLOC_IN_RC(stack_slots));
  1.3785 +  // No match rule because this operand is only generated in matching
  1.3786 +
  1.3787 +  format %{ "[$reg]" %}
  1.3788 +  interface(MEMORY_INTER) %{
  1.3789 +    base(0x4);   // RSP
  1.3790 +    index(0x4);  // No Index
  1.3791 +    scale(0x0);  // No Scale
  1.3792 +    disp($reg);  // Stack Offset
  1.3793 +  %}
  1.3794 +%}
  1.3795 +
  1.3796 +operand stackSlotI(sRegI reg)
  1.3797 +%{
  1.3798 +  constraint(ALLOC_IN_RC(stack_slots));
  1.3799 +  // No match rule because this operand is only generated in matching
  1.3800 +
  1.3801 +  format %{ "[$reg]" %}
  1.3802 +  interface(MEMORY_INTER) %{
  1.3803 +    base(0x4);   // RSP
  1.3804 +    index(0x4);  // No Index
  1.3805 +    scale(0x0);  // No Scale
  1.3806 +    disp($reg);  // Stack Offset
  1.3807 +  %}
  1.3808 +%}
  1.3809 +
  1.3810 +operand stackSlotF(sRegF reg)
  1.3811 +%{
  1.3812 +  constraint(ALLOC_IN_RC(stack_slots));
  1.3813 +  // No match rule because this operand is only generated in matching
  1.3814 +
  1.3815 +  format %{ "[$reg]" %}
  1.3816 +  interface(MEMORY_INTER) %{
  1.3817 +    base(0x4);   // RSP
  1.3818 +    index(0x4);  // No Index
  1.3819 +    scale(0x0);  // No Scale
  1.3820 +    disp($reg);  // Stack Offset
  1.3821 +  %}
  1.3822 +%}
  1.3823 +
  1.3824 +operand stackSlotD(sRegD reg)
  1.3825 +%{
  1.3826 +  constraint(ALLOC_IN_RC(stack_slots));
  1.3827 +  // No match rule because this operand is only generated in matching
  1.3828 +
  1.3829 +  format %{ "[$reg]" %}
  1.3830 +  interface(MEMORY_INTER) %{
  1.3831 +    base(0x4);   // RSP
  1.3832 +    index(0x4);  // No Index
  1.3833 +    scale(0x0);  // No Scale
  1.3834 +    disp($reg);  // Stack Offset
  1.3835 +  %}
  1.3836 +%}
  1.3837 +operand stackSlotL(sRegL reg)
  1.3838 +%{
  1.3839 +  constraint(ALLOC_IN_RC(stack_slots));
  1.3840 +  // No match rule because this operand is only generated in matching
  1.3841 +
  1.3842 +  format %{ "[$reg]" %}
  1.3843 +  interface(MEMORY_INTER) %{
  1.3844 +    base(0x4);   // RSP
  1.3845 +    index(0x4);  // No Index
  1.3846 +    scale(0x0);  // No Scale
  1.3847 +    disp($reg);  // Stack Offset
  1.3848 +  %}
  1.3849 +%}
  1.3850 +
  1.3851 +//----------Conditional Branch Operands----------------------------------------
  1.3852 +// Comparison Op  - This is the operation of the comparison, and is limited to
  1.3853 +//                  the following set of codes:
  1.3854 +//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
  1.3855 +//
  1.3856 +// Other attributes of the comparison, such as unsignedness, are specified
  1.3857 +// by the comparison instruction that sets a condition code flags register.
  1.3858 +// That result is represented by a flags operand whose subtype is appropriate
  1.3859 +// to the unsignedness (etc.) of the comparison.
  1.3860 +//
  1.3861 +// Later, the instruction which matches both the Comparison Op (a Bool) and
  1.3862 +// the flags (produced by the Cmp) specifies the coding of the comparison op
  1.3863 +// by matching a specific subtype of Bool operand below, such as cmpOpU.
  1.3864 +
  1.3865 +// Comparision Code
  1.3866 +operand cmpOp()
  1.3867 +%{
  1.3868 +  match(Bool);
  1.3869 +
  1.3870 +  format %{ "" %}
  1.3871 +  interface(COND_INTER) %{
  1.3872 +    equal(0x4, "e");
  1.3873 +    not_equal(0x5, "ne");
  1.3874 +    less(0xC, "l");
  1.3875 +    greater_equal(0xD, "ge");
  1.3876 +    less_equal(0xE, "le");
  1.3877 +    greater(0xF, "g");
  1.3878 +    overflow(0x0, "o");
  1.3879 +    no_overflow(0x1, "no");
  1.3880 +  %}
  1.3881 +%}
  1.3882 +
  1.3883 +// Comparison Code, unsigned compare.  Used by FP also, with
  1.3884 +// C2 (unordered) turned into GT or LT already.  The other bits
  1.3885 +// C0 and C3 are turned into Carry & Zero flags.
  1.3886 +operand cmpOpU()
  1.3887 +%{
  1.3888 +  match(Bool);
  1.3889 +
  1.3890 +  format %{ "" %}
  1.3891 +  interface(COND_INTER) %{
  1.3892 +    equal(0x4, "e");
  1.3893 +    not_equal(0x5, "ne");
  1.3894 +    less(0x2, "b");
  1.3895 +    greater_equal(0x3, "nb");
  1.3896 +    less_equal(0x6, "be");
  1.3897 +    greater(0x7, "nbe");
  1.3898 +    overflow(0x0, "o");
  1.3899 +    no_overflow(0x1, "no");
  1.3900 +  %}
  1.3901 +%}
  1.3902 +
  1.3903 +
  1.3904 +// Floating comparisons that don't require any fixup for the unordered case
  1.3905 +operand cmpOpUCF() %{
  1.3906 +  match(Bool);
  1.3907 +  predicate(n->as_Bool()->_test._test == BoolTest::lt ||
  1.3908 +            n->as_Bool()->_test._test == BoolTest::ge ||
  1.3909 +            n->as_Bool()->_test._test == BoolTest::le ||
  1.3910 +            n->as_Bool()->_test._test == BoolTest::gt);
  1.3911 +  format %{ "" %}
  1.3912 +  interface(COND_INTER) %{
  1.3913 +    equal(0x4, "e");
  1.3914 +    not_equal(0x5, "ne");
  1.3915 +    less(0x2, "b");
  1.3916 +    greater_equal(0x3, "nb");
  1.3917 +    less_equal(0x6, "be");
  1.3918 +    greater(0x7, "nbe");
  1.3919 +    overflow(0x0, "o");
  1.3920 +    no_overflow(0x1, "no");
  1.3921 +  %}
  1.3922 +%}
  1.3923 +
  1.3924 +
  1.3925 +// Floating comparisons that can be fixed up with extra conditional jumps
  1.3926 +operand cmpOpUCF2() %{
  1.3927 +  match(Bool);
  1.3928 +  predicate(n->as_Bool()->_test._test == BoolTest::ne ||
  1.3929 +            n->as_Bool()->_test._test == BoolTest::eq);
  1.3930 +  format %{ "" %}
  1.3931 +  interface(COND_INTER) %{
  1.3932 +    equal(0x4, "e");
  1.3933 +    not_equal(0x5, "ne");
  1.3934 +    less(0x2, "b");
  1.3935 +    greater_equal(0x3, "nb");
  1.3936 +    less_equal(0x6, "be");
  1.3937 +    greater(0x7, "nbe");
  1.3938 +    overflow(0x0, "o");
  1.3939 +    no_overflow(0x1, "no");
  1.3940 +  %}
  1.3941 +%}
  1.3942 +
  1.3943 +
  1.3944 +//----------OPERAND CLASSES----------------------------------------------------
  1.3945 +// Operand Classes are groups of operands that are used as to simplify
  1.3946 +// instruction definitions by not requiring the AD writer to specify separate
  1.3947 +// instructions for every form of operand when the instruction accepts
  1.3948 +// multiple operand types with the same basic encoding and format.  The classic
  1.3949 +// case of this is memory operands.
  1.3950 +
  1.3951 +opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
  1.3952 +               indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
  1.3953 +               indCompressedOopOffset,
  1.3954 +               indirectNarrow, indOffset8Narrow, indOffset32Narrow,
  1.3955 +               indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
  1.3956 +               indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
  1.3957 +
  1.3958 +//----------PIPELINE-----------------------------------------------------------
  1.3959 +// Rules which define the behavior of the target architectures pipeline.
  1.3960 +pipeline %{
  1.3961 +
  1.3962 +//----------ATTRIBUTES---------------------------------------------------------
  1.3963 +attributes %{
  1.3964 +  variable_size_instructions;        // Fixed size instructions
  1.3965 +  max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
  1.3966 +  instruction_unit_size = 1;         // An instruction is 1 bytes long
  1.3967 +  instruction_fetch_unit_size = 16;  // The processor fetches one line
  1.3968 +  instruction_fetch_units = 1;       // of 16 bytes
  1.3969 +
  1.3970 +  // List of nop instructions
  1.3971 +  nops( MachNop );
  1.3972 +%}
  1.3973 +
  1.3974 +//----------RESOURCES----------------------------------------------------------
  1.3975 +// Resources are the functional units available to the machine
  1.3976 +
  1.3977 +// Generic P2/P3 pipeline
  1.3978 +// 3 decoders, only D0 handles big operands; a "bundle" is the limit of
  1.3979 +// 3 instructions decoded per cycle.
  1.3980 +// 2 load/store ops per cycle, 1 branch, 1 FPU,
  1.3981 +// 3 ALU op, only ALU0 handles mul instructions.
  1.3982 +resources( D0, D1, D2, DECODE = D0 | D1 | D2,
  1.3983 +           MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
  1.3984 +           BR, FPU,
  1.3985 +           ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
  1.3986 +
  1.3987 +//----------PIPELINE DESCRIPTION-----------------------------------------------
  1.3988 +// Pipeline Description specifies the stages in the machine's pipeline
  1.3989 +
  1.3990 +// Generic P2/P3 pipeline
  1.3991 +pipe_desc(S0, S1, S2, S3, S4, S5);
  1.3992 +
  1.3993 +//----------PIPELINE CLASSES---------------------------------------------------
  1.3994 +// Pipeline Classes describe the stages in which input and output are
  1.3995 +// referenced by the hardware pipeline.
  1.3996 +
  1.3997 +// Naming convention: ialu or fpu
  1.3998 +// Then: _reg
  1.3999 +// Then: _reg if there is a 2nd register
  1.4000 +// Then: _long if it's a pair of instructions implementing a long
  1.4001 +// Then: _fat if it requires the big decoder
  1.4002 +//   Or: _mem if it requires the big decoder and a memory unit.
  1.4003 +
  1.4004 +// Integer ALU reg operation
  1.4005 +pipe_class ialu_reg(rRegI dst)
  1.4006 +%{
  1.4007 +    single_instruction;
  1.4008 +    dst    : S4(write);
  1.4009 +    dst    : S3(read);
  1.4010 +    DECODE : S0;        // any decoder
  1.4011 +    ALU    : S3;        // any alu
  1.4012 +%}
  1.4013 +
  1.4014 +// Long ALU reg operation
  1.4015 +pipe_class ialu_reg_long(rRegL dst)
  1.4016 +%{
  1.4017 +    instruction_count(2);
  1.4018 +    dst    : S4(write);
  1.4019 +    dst    : S3(read);
  1.4020 +    DECODE : S0(2);     // any 2 decoders
  1.4021 +    ALU    : S3(2);     // both alus
  1.4022 +%}
  1.4023 +
  1.4024 +// Integer ALU reg operation using big decoder
  1.4025 +pipe_class ialu_reg_fat(rRegI dst)
  1.4026 +%{
  1.4027 +    single_instruction;
  1.4028 +    dst    : S4(write);
  1.4029 +    dst    : S3(read);
  1.4030 +    D0     : S0;        // big decoder only
  1.4031 +    ALU    : S3;        // any alu
  1.4032 +%}
  1.4033 +
  1.4034 +// Long ALU reg operation using big decoder
  1.4035 +pipe_class ialu_reg_long_fat(rRegL dst)
  1.4036 +%{
  1.4037 +    instruction_count(2);
  1.4038 +    dst    : S4(write);
  1.4039 +    dst    : S3(read);
  1.4040 +    D0     : S0(2);     // big decoder only; twice
  1.4041 +    ALU    : S3(2);     // any 2 alus
  1.4042 +%}
  1.4043 +
  1.4044 +// Integer ALU reg-reg operation
  1.4045 +pipe_class ialu_reg_reg(rRegI dst, rRegI src)
  1.4046 +%{
  1.4047 +    single_instruction;
  1.4048 +    dst    : S4(write);
  1.4049 +    src    : S3(read);
  1.4050 +    DECODE : S0;        // any decoder
  1.4051 +    ALU    : S3;        // any alu
  1.4052 +%}
  1.4053 +
  1.4054 +// Long ALU reg-reg operation
  1.4055 +pipe_class ialu_reg_reg_long(rRegL dst, rRegL src)
  1.4056 +%{
  1.4057 +    instruction_count(2);
  1.4058 +    dst    : S4(write);
  1.4059 +    src    : S3(read);
  1.4060 +    DECODE : S0(2);     // any 2 decoders
  1.4061 +    ALU    : S3(2);     // both alus
  1.4062 +%}
  1.4063 +
  1.4064 +// Integer ALU reg-reg operation
  1.4065 +pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
  1.4066 +%{
  1.4067 +    single_instruction;
  1.4068 +    dst    : S4(write);
  1.4069 +    src    : S3(read);
  1.4070 +    D0     : S0;        // big decoder only
  1.4071 +    ALU    : S3;        // any alu
  1.4072 +%}
  1.4073 +
  1.4074 +// Long ALU reg-reg operation
  1.4075 +pipe_class ialu_reg_reg_long_fat(rRegL dst, rRegL src)
  1.4076 +%{
  1.4077 +    instruction_count(2);
  1.4078 +    dst    : S4(write);
  1.4079 +    src    : S3(read);
  1.4080 +    D0     : S0(2);     // big decoder only; twice
  1.4081 +    ALU    : S3(2);     // both alus
  1.4082 +%}
  1.4083 +
  1.4084 +// Integer ALU reg-mem operation
  1.4085 +pipe_class ialu_reg_mem(rRegI dst, memory mem)
  1.4086 +%{
  1.4087 +    single_instruction;
  1.4088 +    dst    : S5(write);
  1.4089 +    mem    : S3(read);
  1.4090 +    D0     : S0;        // big decoder only
  1.4091 +    ALU    : S4;        // any alu
  1.4092 +    MEM    : S3;        // any mem
  1.4093 +%}
  1.4094 +
  1.4095 +// Integer mem operation (prefetch)
  1.4096 +pipe_class ialu_mem(memory mem)
  1.4097 +%{
  1.4098 +    single_instruction;
  1.4099 +    mem    : S3(read);
  1.4100 +    D0     : S0;        // big decoder only
  1.4101 +    MEM    : S3;        // any mem
  1.4102 +%}
  1.4103 +
  1.4104 +// Integer Store to Memory
  1.4105 +pipe_class ialu_mem_reg(memory mem, rRegI src)
  1.4106 +%{
  1.4107 +    single_instruction;
  1.4108 +    mem    : S3(read);
  1.4109 +    src    : S5(read);
  1.4110 +    D0     : S0;        // big decoder only
  1.4111 +    ALU    : S4;        // any alu
  1.4112 +    MEM    : S3;
  1.4113 +%}
  1.4114 +
  1.4115 +// // Long Store to Memory
  1.4116 +// pipe_class ialu_mem_long_reg(memory mem, rRegL src)
  1.4117 +// %{
  1.4118 +//     instruction_count(2);
  1.4119 +//     mem    : S3(read);
  1.4120 +//     src    : S5(read);
  1.4121 +//     D0     : S0(2);          // big decoder only; twice
  1.4122 +//     ALU    : S4(2);     // any 2 alus
  1.4123 +//     MEM    : S3(2);  // Both mems
  1.4124 +// %}
  1.4125 +
  1.4126 +// Integer Store to Memory
  1.4127 +pipe_class ialu_mem_imm(memory mem)
  1.4128 +%{
  1.4129 +    single_instruction;
  1.4130 +    mem    : S3(read);
  1.4131 +    D0     : S0;        // big decoder only
  1.4132 +    ALU    : S4;        // any alu
  1.4133 +    MEM    : S3;
  1.4134 +%}
  1.4135 +
  1.4136 +// Integer ALU0 reg-reg operation
  1.4137 +pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
  1.4138 +%{
  1.4139 +    single_instruction;
  1.4140 +    dst    : S4(write);
  1.4141 +    src    : S3(read);
  1.4142 +    D0     : S0;        // Big decoder only
  1.4143 +    ALU0   : S3;        // only alu0
  1.4144 +%}
  1.4145 +
  1.4146 +// Integer ALU0 reg-mem operation
  1.4147 +pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
  1.4148 +%{
  1.4149 +    single_instruction;
  1.4150 +    dst    : S5(write);
  1.4151 +    mem    : S3(read);
  1.4152 +    D0     : S0;        // big decoder only
  1.4153 +    ALU0   : S4;        // ALU0 only
  1.4154 +    MEM    : S3;        // any mem
  1.4155 +%}
  1.4156 +
  1.4157 +// Integer ALU reg-reg operation
  1.4158 +pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
  1.4159 +%{
  1.4160 +    single_instruction;
  1.4161 +    cr     : S4(write);
  1.4162 +    src1   : S3(read);
  1.4163 +    src2   : S3(read);
  1.4164 +    DECODE : S0;        // any decoder
  1.4165 +    ALU    : S3;        // any alu
  1.4166 +%}
  1.4167 +
  1.4168 +// Integer ALU reg-imm operation
  1.4169 +pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
  1.4170 +%{
  1.4171 +    single_instruction;
  1.4172 +    cr     : S4(write);
  1.4173 +    src1   : S3(read);
  1.4174 +    DECODE : S0;        // any decoder
  1.4175 +    ALU    : S3;        // any alu
  1.4176 +%}
  1.4177 +
  1.4178 +// Integer ALU reg-mem operation
  1.4179 +pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
  1.4180 +%{
  1.4181 +    single_instruction;
  1.4182 +    cr     : S4(write);
  1.4183 +    src1   : S3(read);
  1.4184 +    src2   : S3(read);
  1.4185 +    D0     : S0;        // big decoder only
  1.4186 +    ALU    : S4;        // any alu
  1.4187 +    MEM    : S3;
  1.4188 +%}
  1.4189 +
  1.4190 +// Conditional move reg-reg
  1.4191 +pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
  1.4192 +%{
  1.4193 +    instruction_count(4);
  1.4194 +    y      : S4(read);
  1.4195 +    q      : S3(read);
  1.4196 +    p      : S3(read);
  1.4197 +    DECODE : S0(4);     // any decoder
  1.4198 +%}
  1.4199 +
  1.4200 +// Conditional move reg-reg
  1.4201 +pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
  1.4202 +%{
  1.4203 +    single_instruction;
  1.4204 +    dst    : S4(write);
  1.4205 +    src    : S3(read);
  1.4206 +    cr     : S3(read);
  1.4207 +    DECODE : S0;        // any decoder
  1.4208 +%}
  1.4209 +
  1.4210 +// Conditional move reg-mem
  1.4211 +pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
  1.4212 +%{
  1.4213 +    single_instruction;
  1.4214 +    dst    : S4(write);
  1.4215 +    src    : S3(read);
  1.4216 +    cr     : S3(read);
  1.4217 +    DECODE : S0;        // any decoder
  1.4218 +    MEM    : S3;
  1.4219 +%}
  1.4220 +
  1.4221 +// Conditional move reg-reg long
  1.4222 +pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
  1.4223 +%{
  1.4224 +    single_instruction;
  1.4225 +    dst    : S4(write);
  1.4226 +    src    : S3(read);
  1.4227 +    cr     : S3(read);
  1.4228 +    DECODE : S0(2);     // any 2 decoders
  1.4229 +%}
  1.4230 +
  1.4231 +// XXX
  1.4232 +// // Conditional move double reg-reg
  1.4233 +// pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
  1.4234 +// %{
  1.4235 +//     single_instruction;
  1.4236 +//     dst    : S4(write);
  1.4237 +//     src    : S3(read);
  1.4238 +//     cr     : S3(read);
  1.4239 +//     DECODE : S0;     // any decoder
  1.4240 +// %}
  1.4241 +
  1.4242 +// Float reg-reg operation
  1.4243 +pipe_class fpu_reg(regD dst)
  1.4244 +%{
  1.4245 +    instruction_count(2);
  1.4246 +    dst    : S3(read);
  1.4247 +    DECODE : S0(2);     // any 2 decoders
  1.4248 +    FPU    : S3;
  1.4249 +%}
  1.4250 +
  1.4251 +// Float reg-reg operation
  1.4252 +pipe_class fpu_reg_reg(regD dst, regD src)
  1.4253 +%{
  1.4254 +    instruction_count(2);
  1.4255 +    dst    : S4(write);
  1.4256 +    src    : S3(read);
  1.4257 +    DECODE : S0(2);     // any 2 decoders
  1.4258 +    FPU    : S3;
  1.4259 +%}
  1.4260 +
  1.4261 +// Float reg-reg operation
  1.4262 +pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
  1.4263 +%{
  1.4264 +    instruction_count(3);
  1.4265 +    dst    : S4(write);
  1.4266 +    src1   : S3(read);
  1.4267 +    src2   : S3(read);
  1.4268 +    DECODE : S0(3);     // any 3 decoders
  1.4269 +    FPU    : S3(2);
  1.4270 +%}
  1.4271 +
  1.4272 +// Float reg-reg operation
  1.4273 +pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
  1.4274 +%{
  1.4275 +    instruction_count(4);
  1.4276 +    dst    : S4(write);
  1.4277 +    src1   : S3(read);
  1.4278 +    src2   : S3(read);
  1.4279 +    src3   : S3(read);
  1.4280 +    DECODE : S0(4);     // any 3 decoders
  1.4281 +    FPU    : S3(2);
  1.4282 +%}
  1.4283 +
  1.4284 +// Float reg-reg operation
  1.4285 +pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
  1.4286 +%{
  1.4287 +    instruction_count(4);
  1.4288 +    dst    : S4(write);
  1.4289 +    src1   : S3(read);
  1.4290 +    src2   : S3(read);
  1.4291 +    src3   : S3(read);
  1.4292 +    DECODE : S1(3);     // any 3 decoders
  1.4293 +    D0     : S0;        // Big decoder only
  1.4294 +    FPU    : S3(2);
  1.4295 +    MEM    : S3;
  1.4296 +%}
  1.4297 +
  1.4298 +// Float reg-mem operation
  1.4299 +pipe_class fpu_reg_mem(regD dst, memory mem)
  1.4300 +%{
  1.4301 +    instruction_count(2);
  1.4302 +    dst    : S5(write);
  1.4303 +    mem    : S3(read);
  1.4304 +    D0     : S0;        // big decoder only
  1.4305 +    DECODE : S1;        // any decoder for FPU POP
  1.4306 +    FPU    : S4;
  1.4307 +    MEM    : S3;        // any mem
  1.4308 +%}
  1.4309 +
  1.4310 +// Float reg-mem operation
  1.4311 +pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
  1.4312 +%{
  1.4313 +    instruction_count(3);
  1.4314 +    dst    : S5(write);
  1.4315 +    src1   : S3(read);
  1.4316 +    mem    : S3(read);
  1.4317 +    D0     : S0;        // big decoder only
  1.4318 +    DECODE : S1(2);     // any decoder for FPU POP
  1.4319 +    FPU    : S4;
  1.4320 +    MEM    : S3;        // any mem
  1.4321 +%}
  1.4322 +
  1.4323 +// Float mem-reg operation
  1.4324 +pipe_class fpu_mem_reg(memory mem, regD src)
  1.4325 +%{
  1.4326 +    instruction_count(2);
  1.4327 +    src    : S5(read);
  1.4328 +    mem    : S3(read);
  1.4329 +    DECODE : S0;        // any decoder for FPU PUSH
  1.4330 +    D0     : S1;        // big decoder only
  1.4331 +    FPU    : S4;
  1.4332 +    MEM    : S3;        // any mem
  1.4333 +%}
  1.4334 +
  1.4335 +pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
  1.4336 +%{
  1.4337 +    instruction_count(3);
  1.4338 +    src1   : S3(read);
  1.4339 +    src2   : S3(read);
  1.4340 +    mem    : S3(read);
  1.4341 +    DECODE : S0(2);     // any decoder for FPU PUSH
  1.4342 +    D0     : S1;        // big decoder only
  1.4343 +    FPU    : S4;
  1.4344 +    MEM    : S3;        // any mem
  1.4345 +%}
  1.4346 +
  1.4347 +pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
  1.4348 +%{
  1.4349 +    instruction_count(3);
  1.4350 +    src1   : S3(read);
  1.4351 +    src2   : S3(read);
  1.4352 +    mem    : S4(read);
  1.4353 +    DECODE : S0;        // any decoder for FPU PUSH
  1.4354 +    D0     : S0(2);     // big decoder only
  1.4355 +    FPU    : S4;
  1.4356 +    MEM    : S3(2);     // any mem
  1.4357 +%}
  1.4358 +
  1.4359 +pipe_class fpu_mem_mem(memory dst, memory src1)
  1.4360 +%{
  1.4361 +    instruction_count(2);
  1.4362 +    src1   : S3(read);
  1.4363 +    dst    : S4(read);
  1.4364 +    D0     : S0(2);     // big decoder only
  1.4365 +    MEM    : S3(2);     // any mem
  1.4366 +%}
  1.4367 +
  1.4368 +pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
  1.4369 +%{
  1.4370 +    instruction_count(3);
  1.4371 +    src1   : S3(read);
  1.4372 +    src2   : S3(read);
  1.4373 +    dst    : S4(read);
  1.4374 +    D0     : S0(3);     // big decoder only
  1.4375 +    FPU    : S4;
  1.4376 +    MEM    : S3(3);     // any mem
  1.4377 +%}
  1.4378 +
  1.4379 +pipe_class fpu_mem_reg_con(memory mem, regD src1)
  1.4380 +%{
  1.4381 +    instruction_count(3);
  1.4382 +    src1   : S4(read);
  1.4383 +    mem    : S4(read);
  1.4384 +    DECODE : S0;        // any decoder for FPU PUSH
  1.4385 +    D0     : S0(2);     // big decoder only
  1.4386 +    FPU    : S4;
  1.4387 +    MEM    : S3(2);     // any mem
  1.4388 +%}
  1.4389 +
  1.4390 +// Float load constant
  1.4391 +pipe_class fpu_reg_con(regD dst)
  1.4392 +%{
  1.4393 +    instruction_count(2);
  1.4394 +    dst    : S5(write);
  1.4395 +    D0     : S0;        // big decoder only for the load
  1.4396 +    DECODE : S1;        // any decoder for FPU POP
  1.4397 +    FPU    : S4;
  1.4398 +    MEM    : S3;        // any mem
  1.4399 +%}
  1.4400 +
  1.4401 +// Float load constant
  1.4402 +pipe_class fpu_reg_reg_con(regD dst, regD src)
  1.4403 +%{
  1.4404 +    instruction_count(3);
  1.4405 +    dst    : S5(write);
  1.4406 +    src    : S3(read);
  1.4407 +    D0     : S0;        // big decoder only for the load
  1.4408 +    DECODE : S1(2);     // any decoder for FPU POP
  1.4409 +    FPU    : S4;
  1.4410 +    MEM    : S3;        // any mem
  1.4411 +%}
  1.4412 +
  1.4413 +// UnConditional branch
  1.4414 +pipe_class pipe_jmp(label labl)
  1.4415 +%{
  1.4416 +    single_instruction;
  1.4417 +    BR   : S3;
  1.4418 +%}
  1.4419 +
  1.4420 +// Conditional branch
  1.4421 +pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
  1.4422 +%{
  1.4423 +    single_instruction;
  1.4424 +    cr    : S1(read);
  1.4425 +    BR    : S3;
  1.4426 +%}
  1.4427 +
  1.4428 +// Allocation idiom
  1.4429 +pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
  1.4430 +%{
  1.4431 +    instruction_count(1); force_serialization;
  1.4432 +    fixed_latency(6);
  1.4433 +    heap_ptr : S3(read);
  1.4434 +    DECODE   : S0(3);
  1.4435 +    D0       : S2;
  1.4436 +    MEM      : S3;
  1.4437 +    ALU      : S3(2);
  1.4438 +    dst      : S5(write);
  1.4439 +    BR       : S5;
  1.4440 +%}
  1.4441 +
  1.4442 +// Generic big/slow expanded idiom
  1.4443 +pipe_class pipe_slow()
  1.4444 +%{
  1.4445 +    instruction_count(10); multiple_bundles; force_serialization;
  1.4446 +    fixed_latency(100);
  1.4447 +    D0  : S0(2);
  1.4448 +    MEM : S3(2);
  1.4449 +%}
  1.4450 +
  1.4451 +// The real do-nothing guy
  1.4452 +pipe_class empty()
  1.4453 +%{
  1.4454 +    instruction_count(0);
  1.4455 +%}
  1.4456 +
  1.4457 +// Define the class for the Nop node
  1.4458 +define
  1.4459 +%{
  1.4460 +   MachNop = empty;
  1.4461 +%}
  1.4462 +
  1.4463 +%}
  1.4464 +
  1.4465 +//----------INSTRUCTIONS-------------------------------------------------------
  1.4466 +//
  1.4467 +// match      -- States which machine-independent subtree may be replaced
  1.4468 +//               by this instruction.
  1.4469 +// ins_cost   -- The estimated cost of this instruction is used by instruction
  1.4470 +//               selection to identify a minimum cost tree of machine
  1.4471 +//               instructions that matches a tree of machine-independent
  1.4472 +//               instructions.
  1.4473 +// format     -- A string providing the disassembly for this instruction.
  1.4474 +//               The value of an instruction's operand may be inserted
  1.4475 +//               by referring to it with a '$' prefix.
  1.4476 +// opcode     -- Three instruction opcodes may be provided.  These are referred
  1.4477 +//               to within an encode class as $primary, $secondary, and $tertiary
  1.4478 +//               rrspectively.  The primary opcode is commonly used to
  1.4479 +//               indicate the type of machine instruction, while secondary
  1.4480 +//               and tertiary are often used for prefix options or addressing
  1.4481 +//               modes.
  1.4482 +// ins_encode -- A list of encode classes with parameters. The encode class
  1.4483 +//               name must have been defined in an 'enc_class' specification
  1.4484 +//               in the encode section of the architecture description.
  1.4485 +
  1.4486 +
  1.4487 +//----------Load/Store/Move Instructions---------------------------------------
  1.4488 +//----------Load Instructions--------------------------------------------------
  1.4489 +
  1.4490 +// Load Byte (8 bit signed)
  1.4491 +instruct loadB(rRegI dst, memory mem)
  1.4492 +%{
  1.4493 +  match(Set dst (LoadB mem));
  1.4494 +
  1.4495 +  ins_cost(125);
  1.4496 +  format %{ "movsbl  $dst, $mem\t# byte" %}
  1.4497 +
  1.4498 +  ins_encode %{
  1.4499 +    __ movsbl($dst$$Register, $mem$$Address);
  1.4500 +  %}
  1.4501 +
  1.4502 +  ins_pipe(ialu_reg_mem);
  1.4503 +%}
  1.4504 +
  1.4505 +// Load Byte (8 bit signed) into Long Register
  1.4506 +instruct loadB2L(rRegL dst, memory mem)
  1.4507 +%{
  1.4508 +  match(Set dst (ConvI2L (LoadB mem)));
  1.4509 +
  1.4510 +  ins_cost(125);
  1.4511 +  format %{ "movsbq  $dst, $mem\t# byte -> long" %}
  1.4512 +
  1.4513 +  ins_encode %{
  1.4514 +    __ movsbq($dst$$Register, $mem$$Address);
  1.4515 +  %}
  1.4516 +
  1.4517 +  ins_pipe(ialu_reg_mem);
  1.4518 +%}
  1.4519 +
  1.4520 +// Load Unsigned Byte (8 bit UNsigned)
  1.4521 +instruct loadUB(rRegI dst, memory mem)
  1.4522 +%{
  1.4523 +  match(Set dst (LoadUB mem));
  1.4524 +
  1.4525 +  ins_cost(125);
  1.4526 +  format %{ "movzbl  $dst, $mem\t# ubyte" %}
  1.4527 +
  1.4528 +  ins_encode %{
  1.4529 +    __ movzbl($dst$$Register, $mem$$Address);
  1.4530 +  %}
  1.4531 +
  1.4532 +  ins_pipe(ialu_reg_mem);
  1.4533 +%}
  1.4534 +
  1.4535 +// Load Unsigned Byte (8 bit UNsigned) into Long Register
  1.4536 +instruct loadUB2L(rRegL dst, memory mem)
  1.4537 +%{
  1.4538 +  match(Set dst (ConvI2L (LoadUB mem)));
  1.4539 +
  1.4540 +  ins_cost(125);
  1.4541 +  format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
  1.4542 +
  1.4543 +  ins_encode %{
  1.4544 +    __ movzbq($dst$$Register, $mem$$Address);
  1.4545 +  %}
  1.4546 +
  1.4547 +  ins_pipe(ialu_reg_mem);
  1.4548 +%}
  1.4549 +
  1.4550 +// Load Unsigned Byte (8 bit UNsigned) with a 8-bit mask into Long Register
  1.4551 +instruct loadUB2L_immI8(rRegL dst, memory mem, immI8 mask, rFlagsReg cr) %{
  1.4552 +  match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
  1.4553 +  effect(KILL cr);
  1.4554 +
  1.4555 +  format %{ "movzbq  $dst, $mem\t# ubyte & 8-bit mask -> long\n\t"
  1.4556 +            "andl    $dst, $mask" %}
  1.4557 +  ins_encode %{
  1.4558 +    Register Rdst = $dst$$Register;
  1.4559 +    __ movzbq(Rdst, $mem$$Address);
  1.4560 +    __ andl(Rdst, $mask$$constant);
  1.4561 +  %}
  1.4562 +  ins_pipe(ialu_reg_mem);
  1.4563 +%}
  1.4564 +
  1.4565 +// Load Short (16 bit signed)
  1.4566 +instruct loadS(rRegI dst, memory mem)
  1.4567 +%{
  1.4568 +  match(Set dst (LoadS mem));
  1.4569 +
  1.4570 +  ins_cost(125);
  1.4571 +  format %{ "movswl $dst, $mem\t# short" %}
  1.4572 +
  1.4573 +  ins_encode %{
  1.4574 +    __ movswl($dst$$Register, $mem$$Address);
  1.4575 +  %}
  1.4576 +
  1.4577 +  ins_pipe(ialu_reg_mem);
  1.4578 +%}
  1.4579 +
  1.4580 +// Load Short (16 bit signed) to Byte (8 bit signed)
  1.4581 +instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
  1.4582 +  match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
  1.4583 +
  1.4584 +  ins_cost(125);
  1.4585 +  format %{ "movsbl $dst, $mem\t# short -> byte" %}
  1.4586 +  ins_encode %{
  1.4587 +    __ movsbl($dst$$Register, $mem$$Address);
  1.4588 +  %}
  1.4589 +  ins_pipe(ialu_reg_mem);
  1.4590 +%}
  1.4591 +
  1.4592 +// Load Short (16 bit signed) into Long Register
  1.4593 +instruct loadS2L(rRegL dst, memory mem)
  1.4594 +%{
  1.4595 +  match(Set dst (ConvI2L (LoadS mem)));
  1.4596 +
  1.4597 +  ins_cost(125);
  1.4598 +  format %{ "movswq $dst, $mem\t# short -> long" %}
  1.4599 +
  1.4600 +  ins_encode %{
  1.4601 +    __ movswq($dst$$Register, $mem$$Address);
  1.4602 +  %}
  1.4603 +
  1.4604 +  ins_pipe(ialu_reg_mem);
  1.4605 +%}
  1.4606 +
  1.4607 +// Load Unsigned Short/Char (16 bit UNsigned)
  1.4608 +instruct loadUS(rRegI dst, memory mem)
  1.4609 +%{
  1.4610 +  match(Set dst (LoadUS mem));
  1.4611 +
  1.4612 +  ins_cost(125);
  1.4613 +  format %{ "movzwl  $dst, $mem\t# ushort/char" %}
  1.4614 +
  1.4615 +  ins_encode %{
  1.4616 +    __ movzwl($dst$$Register, $mem$$Address);
  1.4617 +  %}
  1.4618 +
  1.4619 +  ins_pipe(ialu_reg_mem);
  1.4620 +%}
  1.4621 +
  1.4622 +// Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
  1.4623 +instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
  1.4624 +  match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
  1.4625 +
  1.4626 +  ins_cost(125);
  1.4627 +  format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
  1.4628 +  ins_encode %{
  1.4629 +    __ movsbl($dst$$Register, $mem$$Address);
  1.4630 +  %}
  1.4631 +  ins_pipe(ialu_reg_mem);
  1.4632 +%}
  1.4633 +
  1.4634 +// Load Unsigned Short/Char (16 bit UNsigned) into Long Register
  1.4635 +instruct loadUS2L(rRegL dst, memory mem)
  1.4636 +%{
  1.4637 +  match(Set dst (ConvI2L (LoadUS mem)));
  1.4638 +
  1.4639 +  ins_cost(125);
  1.4640 +  format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
  1.4641 +
  1.4642 +  ins_encode %{
  1.4643 +    __ movzwq($dst$$Register, $mem$$Address);
  1.4644 +  %}
  1.4645 +
  1.4646 +  ins_pipe(ialu_reg_mem);
  1.4647 +%}
  1.4648 +
  1.4649 +// Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
  1.4650 +instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
  1.4651 +  match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
  1.4652 +
  1.4653 +  format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
  1.4654 +  ins_encode %{
  1.4655 +    __ movzbq($dst$$Register, $mem$$Address);
  1.4656 +  %}
  1.4657 +  ins_pipe(ialu_reg_mem);
  1.4658 +%}
  1.4659 +
  1.4660 +// Load Unsigned Short/Char (16 bit UNsigned) with mask into Long Register
  1.4661 +instruct loadUS2L_immI16(rRegL dst, memory mem, immI16 mask, rFlagsReg cr) %{
  1.4662 +  match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
  1.4663 +  effect(KILL cr);
  1.4664 +
  1.4665 +  format %{ "movzwq  $dst, $mem\t# ushort/char & 16-bit mask -> long\n\t"
  1.4666 +            "andl    $dst, $mask" %}
  1.4667 +  ins_encode %{
  1.4668 +    Register Rdst = $dst$$Register;
  1.4669 +    __ movzwq(Rdst, $mem$$Address);
  1.4670 +    __ andl(Rdst, $mask$$constant);
  1.4671 +  %}
  1.4672 +  ins_pipe(ialu_reg_mem);
  1.4673 +%}
  1.4674 +
  1.4675 +// Load Integer
  1.4676 +instruct loadI(rRegI dst, memory mem)
  1.4677 +%{
  1.4678 +  match(Set dst (LoadI mem));
  1.4679 +
  1.4680 +  ins_cost(125);
  1.4681 +  format %{ "movl    $dst, $mem\t# int" %}
  1.4682 +
  1.4683 +  ins_encode %{
  1.4684 +    __ movl($dst$$Register, $mem$$Address);
  1.4685 +  %}
  1.4686 +
  1.4687 +  ins_pipe(ialu_reg_mem);
  1.4688 +%}
  1.4689 +
  1.4690 +// Load Integer (32 bit signed) to Byte (8 bit signed)
  1.4691 +instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
  1.4692 +  match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
  1.4693 +
  1.4694 +  ins_cost(125);
  1.4695 +  format %{ "movsbl  $dst, $mem\t# int -> byte" %}
  1.4696 +  ins_encode %{
  1.4697 +    __ movsbl($dst$$Register, $mem$$Address);
  1.4698 +  %}
  1.4699 +  ins_pipe(ialu_reg_mem);
  1.4700 +%}
  1.4701 +
  1.4702 +// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
  1.4703 +instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
  1.4704 +  match(Set dst (AndI (LoadI mem) mask));
  1.4705 +
  1.4706 +  ins_cost(125);
  1.4707 +  format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
  1.4708 +  ins_encode %{
  1.4709 +    __ movzbl($dst$$Register, $mem$$Address);
  1.4710 +  %}
  1.4711 +  ins_pipe(ialu_reg_mem);
  1.4712 +%}
  1.4713 +
  1.4714 +// Load Integer (32 bit signed) to Short (16 bit signed)
  1.4715 +instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
  1.4716 +  match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
  1.4717 +
  1.4718 +  ins_cost(125);
  1.4719 +  format %{ "movswl  $dst, $mem\t# int -> short" %}
  1.4720 +  ins_encode %{
  1.4721 +    __ movswl($dst$$Register, $mem$$Address);
  1.4722 +  %}
  1.4723 +  ins_pipe(ialu_reg_mem);
  1.4724 +%}
  1.4725 +
  1.4726 +// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
  1.4727 +instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
  1.4728 +  match(Set dst (AndI (LoadI mem) mask));
  1.4729 +
  1.4730 +  ins_cost(125);
  1.4731 +  format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
  1.4732 +  ins_encode %{
  1.4733 +    __ movzwl($dst$$Register, $mem$$Address);
  1.4734 +  %}
  1.4735 +  ins_pipe(ialu_reg_mem);
  1.4736 +%}
  1.4737 +
  1.4738 +// Load Integer into Long Register
  1.4739 +instruct loadI2L(rRegL dst, memory mem)
  1.4740 +%{
  1.4741 +  match(Set dst (ConvI2L (LoadI mem)));
  1.4742 +
  1.4743 +  ins_cost(125);
  1.4744 +  format %{ "movslq  $dst, $mem\t# int -> long" %}
  1.4745 +
  1.4746 +  ins_encode %{
  1.4747 +    __ movslq($dst$$Register, $mem$$Address);
  1.4748 +  %}
  1.4749 +
  1.4750 +  ins_pipe(ialu_reg_mem);
  1.4751 +%}
  1.4752 +
  1.4753 +// Load Integer with mask 0xFF into Long Register
  1.4754 +instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
  1.4755 +  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
  1.4756 +
  1.4757 +  format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
  1.4758 +  ins_encode %{
  1.4759 +    __ movzbq($dst$$Register, $mem$$Address);
  1.4760 +  %}
  1.4761 +  ins_pipe(ialu_reg_mem);
  1.4762 +%}
  1.4763 +
  1.4764 +// Load Integer with mask 0xFFFF into Long Register
  1.4765 +instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
  1.4766 +  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
  1.4767 +
  1.4768 +  format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
  1.4769 +  ins_encode %{
  1.4770 +    __ movzwq($dst$$Register, $mem$$Address);
  1.4771 +  %}
  1.4772 +  ins_pipe(ialu_reg_mem);
  1.4773 +%}
  1.4774 +
  1.4775 +// Load Integer with a 31-bit mask into Long Register
  1.4776 +instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
  1.4777 +  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
  1.4778 +  effect(KILL cr);
  1.4779 +
  1.4780 +  format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
  1.4781 +            "andl    $dst, $mask" %}
  1.4782 +  ins_encode %{
  1.4783 +    Register Rdst = $dst$$Register;
  1.4784 +    __ movl(Rdst, $mem$$Address);
  1.4785 +    __ andl(Rdst, $mask$$constant);
  1.4786 +  %}
  1.4787 +  ins_pipe(ialu_reg_mem);
  1.4788 +%}
  1.4789 +
  1.4790 +// Load Unsigned Integer into Long Register
  1.4791 +instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask) 
  1.4792 +%{
  1.4793 +  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
  1.4794 +
  1.4795 +  ins_cost(125);
  1.4796 +  format %{ "movl    $dst, $mem\t# uint -> long" %}
  1.4797 +
  1.4798 +  ins_encode %{
  1.4799 +    __ movl($dst$$Register, $mem$$Address);
  1.4800 +  %}
  1.4801 +
  1.4802 +  ins_pipe(ialu_reg_mem);
  1.4803 +%}
  1.4804 +
  1.4805 +// Load Long
  1.4806 +instruct loadL(rRegL dst, memory mem)
  1.4807 +%{
  1.4808 +  match(Set dst (LoadL mem));
  1.4809 +
  1.4810 +  ins_cost(125);
  1.4811 +  format %{ "movq    $dst, $mem\t# long" %}
  1.4812 +
  1.4813 +  ins_encode %{
  1.4814 +    __ movq($dst$$Register, $mem$$Address);
  1.4815 +  %}
  1.4816 +
  1.4817 +  ins_pipe(ialu_reg_mem); // XXX
  1.4818 +%}
  1.4819 +
  1.4820 +// Load Range
  1.4821 +instruct loadRange(rRegI dst, memory mem)
  1.4822 +%{
  1.4823 +  match(Set dst (LoadRange mem));
  1.4824 +
  1.4825 +  ins_cost(125); // XXX
  1.4826 +  format %{ "movl    $dst, $mem\t# range" %}
  1.4827 +  opcode(0x8B);
  1.4828 +  ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
  1.4829 +  ins_pipe(ialu_reg_mem);
  1.4830 +%}
  1.4831 +
  1.4832 +// Load Pointer
  1.4833 +instruct loadP(rRegP dst, memory mem)
  1.4834 +%{
  1.4835 +  match(Set dst (LoadP mem));
  1.4836 +
  1.4837 +  ins_cost(125); // XXX
  1.4838 +  format %{ "movq    $dst, $mem\t# ptr" %}
  1.4839 +  opcode(0x8B);
  1.4840 +  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
  1.4841 +  ins_pipe(ialu_reg_mem); // XXX
  1.4842 +%}
  1.4843 +
  1.4844 +// Load Compressed Pointer
  1.4845 +instruct loadN(rRegN dst, memory mem)
  1.4846 +%{
  1.4847 +   match(Set dst (LoadN mem));
  1.4848 +
  1.4849 +   ins_cost(125); // XXX
  1.4850 +   format %{ "movl    $dst, $mem\t# compressed ptr" %}
  1.4851 +   ins_encode %{
  1.4852 +     __ movl($dst$$Register, $mem$$Address);
  1.4853 +   %}
  1.4854 +   ins_pipe(ialu_reg_mem); // XXX
  1.4855 +%}
  1.4856 +
  1.4857 +
  1.4858 +// Load Klass Pointer
  1.4859 +instruct loadKlass(rRegP dst, memory mem)
  1.4860 +%{
  1.4861 +  match(Set dst (LoadKlass mem));
  1.4862 +
  1.4863 +  ins_cost(125); // XXX
  1.4864 +  format %{ "movq    $dst, $mem\t# class" %}
  1.4865 +  opcode(0x8B);
  1.4866 +  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
  1.4867 +  ins_pipe(ialu_reg_mem); // XXX
  1.4868 +%}
  1.4869 +
  1.4870 +// Load narrow Klass Pointer
  1.4871 +instruct loadNKlass(rRegN dst, memory mem)
  1.4872 +%{
  1.4873 +  match(Set dst (LoadNKlass mem));
  1.4874 +
  1.4875 +  ins_cost(125); // XXX
  1.4876 +  format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
  1.4877 +  ins_encode %{
  1.4878 +    __ movl($dst$$Register, $mem$$Address);
  1.4879 +  %}
  1.4880 +  ins_pipe(ialu_reg_mem); // XXX
  1.4881 +%}
  1.4882 +
  1.4883 +// Load Float
  1.4884 +instruct loadF(regF dst, memory mem)
  1.4885 +%{
  1.4886 +  match(Set dst (LoadF mem));
  1.4887 +
  1.4888 +  ins_cost(145); // XXX
  1.4889 +  format %{ "movss   $dst, $mem\t# float" %}
  1.4890 +  ins_encode %{
  1.4891 +    __ movflt($dst$$XMMRegister, $mem$$Address);
  1.4892 +  %}
  1.4893 +  ins_pipe(pipe_slow); // XXX
  1.4894 +%}
  1.4895 +
  1.4896 +// Load Double
  1.4897 +instruct loadD_partial(regD dst, memory mem)
  1.4898 +%{
  1.4899 +  predicate(!UseXmmLoadAndClearUpper);
  1.4900 +  match(Set dst (LoadD mem));
  1.4901 +
  1.4902 +  ins_cost(145); // XXX
  1.4903 +  format %{ "movlpd  $dst, $mem\t# double" %}
  1.4904 +  ins_encode %{
  1.4905 +    __ movdbl($dst$$XMMRegister, $mem$$Address);
  1.4906 +  %}
  1.4907 +  ins_pipe(pipe_slow); // XXX
  1.4908 +%}
  1.4909 +
  1.4910 +instruct loadD(regD dst, memory mem)
  1.4911 +%{
  1.4912 +  predicate(UseXmmLoadAndClearUpper);
  1.4913 +  match(Set dst (LoadD mem));
  1.4914 +
  1.4915 +  ins_cost(145); // XXX
  1.4916 +  format %{ "movsd   $dst, $mem\t# double" %}
  1.4917 +  ins_encode %{
  1.4918 +    __ movdbl($dst$$XMMRegister, $mem$$Address);
  1.4919 +  %}
  1.4920 +  ins_pipe(pipe_slow); // XXX
  1.4921 +%}
  1.4922 +
  1.4923 +// Load Effective Address
  1.4924 +instruct leaP8(rRegP dst, indOffset8 mem)
  1.4925 +%{
  1.4926 +  match(Set dst mem);
  1.4927 +
  1.4928 +  ins_cost(110); // XXX
  1.4929 +  format %{ "leaq    $dst, $mem\t# ptr 8" %}
  1.4930 +  opcode(0x8D);
  1.4931 +  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
  1.4932 +  ins_pipe(ialu_reg_reg_fat);
  1.4933 +%}
  1.4934 +
  1.4935 +instruct leaP32(rRegP dst, indOffset32 mem)
  1.4936 +%{
  1.4937 +  match(Set dst mem);
  1.4938 +
  1.4939 +  ins_cost(110);
  1.4940 +  format %{ "leaq    $dst, $mem\t# ptr 32" %}
  1.4941 +  opcode(0x8D);
  1.4942 +  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
  1.4943 +  ins_pipe(ialu_reg_reg_fat);
  1.4944 +%}
  1.4945 +
  1.4946 +// instruct leaPIdx(rRegP dst, indIndex mem)
  1.4947 +// %{
  1.4948 +//   match(Set dst mem);
  1.4949 +
  1.4950 +//   ins_cost(110);
  1.4951 +//   format %{ "leaq    $dst, $mem\t# ptr idx" %}
  1.4952 +//   opcode(0x8D);
  1.4953 +//   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
  1.4954 +//   ins_pipe(ialu_reg_reg_fat);
  1.4955 +// %}
  1.4956 +
  1.4957 +instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
  1.4958 +%{
  1.4959 +  match(Set dst mem);
  1.4960 +
  1.4961 +  ins_cost(110);
  1.4962 +  format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
  1.4963 +  opcode(0x8D);
  1.4964 +  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
  1.4965 +  ins_pipe(ialu_reg_reg_fat);
  1.4966 +%}
  1.4967 +
  1.4968 +instruct leaPIdxScale(rRegP dst, indIndexScale mem)
  1.4969 +%{
  1.4970 +  match(Set dst mem);
  1.4971 +
  1.4972 +  ins_cost(110);
  1.4973 +  format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
  1.4974 +  opcode(0x8D);
  1.4975 +  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
  1.4976 +  ins_pipe(ialu_reg_reg_fat);
  1.4977 +%}
  1.4978 +
  1.4979 +instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
  1.4980 +%{
  1.4981 +  match(Set dst mem);
  1.4982 +
  1.4983 +  ins_cost(110);
  1.4984 +  format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
  1.4985 +  opcode(0x8D);
  1.4986 +  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
  1.4987 +  ins_pipe(ialu_reg_reg_fat);
  1.4988 +%}
  1.4989 +
  1.4990 +instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
  1.4991 +%{
  1.4992 +  match(Set dst mem);
  1.4993 +
  1.4994 +  ins_cost(110);
  1.4995 +  format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
  1.4996 +  opcode(0x8D);
  1.4997 +  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
  1.4998 +  ins_pipe(ialu_reg_reg_fat);
  1.4999 +%}
  1.5000 +
  1.5001 +// Load Effective Address which uses Narrow (32-bits) oop
  1.5002 +instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
  1.5003 +%{
  1.5004 +  predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
  1.5005 +  match(Set dst mem);
  1.5006 +
  1.5007 +  ins_cost(110);
  1.5008 +  format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
  1.5009 +  opcode(0x8D);
  1.5010 +  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
  1.5011 +  ins_pipe(ialu_reg_reg_fat);
  1.5012 +%}
  1.5013 +
  1.5014 +instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
  1.5015 +%{
  1.5016 +  predicate(Universe::narrow_oop_shift() == 0);
  1.5017 +  match(Set dst mem);
  1.5018 +
  1.5019 +  ins_cost(110); // XXX
  1.5020 +  format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
  1.5021 +  opcode(0x8D);
  1.5022 +  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
  1.5023 +  ins_pipe(ialu_reg_reg_fat);
  1.5024 +%}
  1.5025 +
  1.5026 +instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
  1.5027 +%{
  1.5028 +  predicate(Universe::narrow_oop_shift() == 0);
  1.5029 +  match(Set dst mem);
  1.5030 +
  1.5031 +  ins_cost(110);
  1.5032 +  format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
  1.5033 +  opcode(0x8D);
  1.5034 +  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
  1.5035 +  ins_pipe(ialu_reg_reg_fat);
  1.5036 +%}
  1.5037 +
  1.5038 +instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
  1.5039 +%{
  1.5040 +  predicate(Universe::narrow_oop_shift() == 0);
  1.5041 +  match(Set dst mem);
  1.5042 +
  1.5043 +  ins_cost(110);
  1.5044 +  format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
  1.5045 +  opcode(0x8D);
  1.5046 +  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
  1.5047 +  ins_pipe(ialu_reg_reg_fat);
  1.5048 +%}
  1.5049 +
  1.5050 +instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
  1.5051 +%{
  1.5052 +  predicate(Universe::narrow_oop_shift() == 0);
  1.5053 +  match(Set dst mem);
  1.5054 +
  1.5055 +  ins_cost(110);
  1.5056 +  format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
  1.5057 +  opcode(0x8D);
  1.5058 +  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
  1.5059 +  ins_pipe(ialu_reg_reg_fat);
  1.5060 +%}
  1.5061 +
  1.5062 +instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
  1.5063 +%{
  1.5064 +  predicate(Universe::narrow_oop_shift() == 0);
  1.5065 +  match(Set dst mem);
  1.5066 +
  1.5067 +  ins_cost(110);
  1.5068 +  format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
  1.5069 +  opcode(0x8D);
  1.5070 +  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
  1.5071 +  ins_pipe(ialu_reg_reg_fat);
  1.5072 +%}
  1.5073 +
  1.5074 +instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
  1.5075 +%{
  1.5076 +  predicate(Universe::narrow_oop_shift() == 0);
  1.5077 +  match(Set dst mem);
  1.5078 +
  1.5079 +  ins_cost(110);
  1.5080 +  format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
  1.5081 +  opcode(0x8D);
  1.5082 +  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
  1.5083 +  ins_pipe(ialu_reg_reg_fat);
  1.5084 +%}
  1.5085 +
  1.5086 +instruct loadConI(rRegI dst, immI src)
  1.5087 +%{
  1.5088 +  match(Set dst src);
  1.5089 +
  1.5090 +  format %{ "movl    $dst, $src\t# int" %}
  1.5091 +  ins_encode(load_immI(dst, src));
  1.5092 +  ins_pipe(ialu_reg_fat); // XXX
  1.5093 +%}
  1.5094 +
  1.5095 +instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
  1.5096 +%{
  1.5097 +  match(Set dst src);
  1.5098 +  effect(KILL cr);
  1.5099 +
  1.5100 +  ins_cost(50);
  1.5101 +  format %{ "xorl    $dst, $dst\t# int" %}
  1.5102 +  opcode(0x33); /* + rd */
  1.5103 +  ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
  1.5104 +  ins_pipe(ialu_reg);
  1.5105 +%}
  1.5106 +
  1.5107 +instruct loadConL(rRegL dst, immL src)
  1.5108 +%{
  1.5109 +  match(Set dst src);
  1.5110 +
  1.5111 +  ins_cost(150);
  1.5112 +  format %{ "movq    $dst, $src\t# long" %}
  1.5113 +  ins_encode(load_immL(dst, src));
  1.5114 +  ins_pipe(ialu_reg);
  1.5115 +%}
  1.5116 +
  1.5117 +instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
  1.5118 +%{
  1.5119 +  match(Set dst src);
  1.5120 +  effect(KILL cr);
  1.5121 +
  1.5122 +  ins_cost(50);
  1.5123 +  format %{ "xorl    $dst, $dst\t# long" %}
  1.5124 +  opcode(0x33); /* + rd */
  1.5125 +  ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
  1.5126 +  ins_pipe(ialu_reg); // XXX
  1.5127 +%}
  1.5128 +
  1.5129 +instruct loadConUL32(rRegL dst, immUL32 src)
  1.5130 +%{
  1.5131 +  match(Set dst src);
  1.5132 +
  1.5133 +  ins_cost(60);
  1.5134 +  format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
  1.5135 +  ins_encode(load_immUL32(dst, src));
  1.5136 +  ins_pipe(ialu_reg);
  1.5137 +%}
  1.5138 +
  1.5139 +instruct loadConL32(rRegL dst, immL32 src)
  1.5140 +%{
  1.5141 +  match(Set dst src);
  1.5142 +
  1.5143 +  ins_cost(70);
  1.5144 +  format %{ "movq    $dst, $src\t# long (32-bit)" %}
  1.5145 +  ins_encode(load_immL32(dst, src));
  1.5146 +  ins_pipe(ialu_reg);
  1.5147 +%}
  1.5148 +
  1.5149 +instruct loadConP(rRegP dst, immP con) %{
  1.5150 +  match(Set dst con);
  1.5151 +
  1.5152 +  format %{ "movq    $dst, $con\t# ptr" %}
  1.5153 +  ins_encode(load_immP(dst, con));
  1.5154 +  ins_pipe(ialu_reg_fat); // XXX
  1.5155 +%}
  1.5156 +
  1.5157 +instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
  1.5158 +%{
  1.5159 +  match(Set dst src);
  1.5160 +  effect(KILL cr);
  1.5161 +
  1.5162 +  ins_cost(50);
  1.5163 +  format %{ "xorl    $dst, $dst\t# ptr" %}
  1.5164 +  opcode(0x33); /* + rd */
  1.5165 +  ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
  1.5166 +  ins_pipe(ialu_reg);
  1.5167 +%}
  1.5168 +
  1.5169 +instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
  1.5170 +%{
  1.5171 +  match(Set dst src);
  1.5172 +  effect(KILL cr);
  1.5173 +
  1.5174 +  ins_cost(60);
  1.5175 +  format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
  1.5176 +  ins_encode(load_immP31(dst, src));
  1.5177 +  ins_pipe(ialu_reg);
  1.5178 +%}
  1.5179 +
  1.5180 +instruct loadConF(regF dst, immF con) %{
  1.5181 +  match(Set dst con);
  1.5182 +  ins_cost(125);
  1.5183 +  format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
  1.5184 +  ins_encode %{
  1.5185 +    __ movflt($dst$$XMMRegister, $constantaddress($con));
  1.5186 +  %}
  1.5187 +  ins_pipe(pipe_slow);
  1.5188 +%}
  1.5189 +
  1.5190 +instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
  1.5191 +  match(Set dst src);
  1.5192 +  effect(KILL cr);
  1.5193 +  format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
  1.5194 +  ins_encode %{
  1.5195 +    __ xorq($dst$$Register, $dst$$Register);
  1.5196 +  %}
  1.5197 +  ins_pipe(ialu_reg);
  1.5198 +%}
  1.5199 +
  1.5200 +instruct loadConN(rRegN dst, immN src) %{
  1.5201 +  match(Set dst src);
  1.5202 +
  1.5203 +  ins_cost(125);
  1.5204 +  format %{ "movl    $dst, $src\t# compressed ptr" %}
  1.5205 +  ins_encode %{
  1.5206 +    address con = (address)$src$$constant;
  1.5207 +    if (con == NULL) {
  1.5208 +      ShouldNotReachHere();
  1.5209 +    } else {
  1.5210 +      __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
  1.5211 +    }
  1.5212 +  %}
  1.5213 +  ins_pipe(ialu_reg_fat); // XXX
  1.5214 +%}
  1.5215 +
  1.5216 +instruct loadConNKlass(rRegN dst, immNKlass src) %{
  1.5217 +  match(Set dst src);
  1.5218 +
  1.5219 +  ins_cost(125);
  1.5220 +  format %{ "movl    $dst, $src\t# compressed klass ptr" %}
  1.5221 +  ins_encode %{
  1.5222 +    address con = (address)$src$$constant;
  1.5223 +    if (con == NULL) {
  1.5224 +      ShouldNotReachHere();
  1.5225 +    } else {
  1.5226 +      __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
  1.5227 +    }
  1.5228 +  %}
  1.5229 +  ins_pipe(ialu_reg_fat); // XXX
  1.5230 +%}
  1.5231 +
  1.5232 +instruct loadConF0(regF dst, immF0 src)
  1.5233 +%{
  1.5234 +  match(Set dst src);
  1.5235 +  ins_cost(100);
  1.5236 +
  1.5237 +  format %{ "xorps   $dst, $dst\t# float 0.0" %}
  1.5238 +  ins_encode %{
  1.5239 +    __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
  1.5240 +  %}
  1.5241 +  ins_pipe(pipe_slow);
  1.5242 +%}
  1.5243 +
  1.5244 +// Use the same format since predicate() can not be used here.
  1.5245 +instruct loadConD(regD dst, immD con) %{
  1.5246 +  match(Set dst con);
  1.5247 +  ins_cost(125);
  1.5248 +  format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
  1.5249 +  ins_encode %{
  1.5250 +    __ movdbl($dst$$XMMRegister, $constantaddress($con));
  1.5251 +  %}
  1.5252 +  ins_pipe(pipe_slow);
  1.5253 +%}
  1.5254 +
  1.5255 +instruct loadConD0(regD dst, immD0 src)
  1.5256 +%{
  1.5257 +  match(Set dst src);
  1.5258 +  ins_cost(100);
  1.5259 +
  1.5260 +  format %{ "xorpd   $dst, $dst\t# double 0.0" %}
  1.5261 +  ins_encode %{
  1.5262 +    __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
  1.5263 +  %}
  1.5264 +  ins_pipe(pipe_slow);
  1.5265 +%}
  1.5266 +
  1.5267 +instruct loadSSI(rRegI dst, stackSlotI src)
  1.5268 +%{
  1.5269 +  match(Set dst src);
  1.5270 +
  1.5271 +  ins_cost(125);
  1.5272 +  format %{ "movl    $dst, $src\t# int stk" %}
  1.5273 +  opcode(0x8B);
  1.5274 +  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
  1.5275 +  ins_pipe(ialu_reg_mem);
  1.5276 +%}
  1.5277 +
  1.5278 +instruct loadSSL(rRegL dst, stackSlotL src)
  1.5279 +%{
  1.5280 +  match(Set dst src);
  1.5281 +
  1.5282 +  ins_cost(125);
  1.5283 +  format %{ "movq    $dst, $src\t# long stk" %}
  1.5284 +  opcode(0x8B);
  1.5285 +  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
  1.5286 +  ins_pipe(ialu_reg_mem);
  1.5287 +%}
  1.5288 +
  1.5289 +instruct loadSSP(rRegP dst, stackSlotP src)
  1.5290 +%{
  1.5291 +  match(Set dst src);
  1.5292 +
  1.5293 +  ins_cost(125);
  1.5294 +  format %{ "movq    $dst, $src\t# ptr stk" %}
  1.5295 +  opcode(0x8B);
  1.5296 +  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
  1.5297 +  ins_pipe(ialu_reg_mem);
  1.5298 +%}
  1.5299 +
  1.5300 +instruct loadSSF(regF dst, stackSlotF src)
  1.5301 +%{
  1.5302 +  match(Set dst src);
  1.5303 +
  1.5304 +  ins_cost(125);
  1.5305 +  format %{ "movss   $dst, $src\t# float stk" %}
  1.5306 +  ins_encode %{
  1.5307 +    __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
  1.5308 +  %}
  1.5309 +  ins_pipe(pipe_slow); // XXX
  1.5310 +%}
  1.5311 +
  1.5312 +// Use the same format since predicate() can not be used here.
  1.5313 +instruct loadSSD(regD dst, stackSlotD src)
  1.5314 +%{
  1.5315 +  match(Set dst src);
  1.5316 +
  1.5317 +  ins_cost(125);
  1.5318 +  format %{ "movsd   $dst, $src\t# double stk" %}
  1.5319 +  ins_encode  %{
  1.5320 +    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
  1.5321 +  %}
  1.5322 +  ins_pipe(pipe_slow); // XXX
  1.5323 +%}
  1.5324 +
  1.5325 +// Prefetch instructions.
  1.5326 +// Must be safe to execute with invalid address (cannot fault).
  1.5327 +
  1.5328 +instruct prefetchr( memory mem ) %{
  1.5329 +  predicate(ReadPrefetchInstr==3);
  1.5330 +  match(PrefetchRead mem);
  1.5331 +  ins_cost(125);
  1.5332 +
  1.5333 +  format %{ "PREFETCHR $mem\t# Prefetch into level 1 cache" %}
  1.5334 +  ins_encode %{
  1.5335 +    __ prefetchr($mem$$Address);
  1.5336 +  %}
  1.5337 +  ins_pipe(ialu_mem);
  1.5338 +%}
  1.5339 +
  1.5340 +instruct prefetchrNTA( memory mem ) %{
  1.5341 +  predicate(ReadPrefetchInstr==0);
  1.5342 +  match(PrefetchRead mem);
  1.5343 +  ins_cost(125);
  1.5344 +
  1.5345 +  format %{ "PREFETCHNTA $mem\t# Prefetch into non-temporal cache for read" %}
  1.5346 +  ins_encode %{
  1.5347 +    __ prefetchnta($mem$$Address);
  1.5348 +  %}
  1.5349 +  ins_pipe(ialu_mem);
  1.5350 +%}
  1.5351 +
  1.5352 +instruct prefetchrT0( memory mem ) %{
  1.5353 +  predicate(ReadPrefetchInstr==1);
  1.5354 +  match(PrefetchRead mem);
  1.5355 +  ins_cost(125);
  1.5356 +
  1.5357 +  format %{ "PREFETCHT0 $mem\t# prefetch into L1 and L2 caches for read" %}
  1.5358 +  ins_encode %{
  1.5359 +    __ prefetcht0($mem$$Address);
  1.5360 +  %}
  1.5361 +  ins_pipe(ialu_mem);
  1.5362 +%}
  1.5363 +
  1.5364 +instruct prefetchrT2( memory mem ) %{
  1.5365 +  predicate(ReadPrefetchInstr==2);
  1.5366 +  match(PrefetchRead mem);
  1.5367 +  ins_cost(125);
  1.5368 +
  1.5369 +  format %{ "PREFETCHT2 $mem\t# prefetch into L2 caches for read" %}
  1.5370 +  ins_encode %{
  1.5371 +    __ prefetcht2($mem$$Address);
  1.5372 +  %}
  1.5373 +  ins_pipe(ialu_mem);
  1.5374 +%}
  1.5375 +
  1.5376 +instruct prefetchwNTA( memory mem ) %{
  1.5377 +  match(PrefetchWrite mem);
  1.5378 +  ins_cost(125);
  1.5379 +
  1.5380 +  format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
  1.5381 +  ins_encode %{
  1.5382 +    __ prefetchnta($mem$$Address);
  1.5383 +  %}
  1.5384 +  ins_pipe(ialu_mem);
  1.5385 +%}
  1.5386 +
  1.5387 +// Prefetch instructions for allocation.
  1.5388 +
  1.5389 +instruct prefetchAlloc( memory mem ) %{
  1.5390 +  predicate(AllocatePrefetchInstr==3);
  1.5391 +  match(PrefetchAllocation mem);
  1.5392 +  ins_cost(125);
  1.5393 +
  1.5394 +  format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
  1.5395 +  ins_encode %{
  1.5396 +    __ prefetchw($mem$$Address);
  1.5397 +  %}
  1.5398 +  ins_pipe(ialu_mem);
  1.5399 +%}
  1.5400 +
  1.5401 +instruct prefetchAllocNTA( memory mem ) %{
  1.5402 +  predicate(AllocatePrefetchInstr==0);
  1.5403 +  match(PrefetchAllocation mem);
  1.5404 +  ins_cost(125);
  1.5405 +
  1.5406 +  format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
  1.5407 +  ins_encode %{
  1.5408 +    __ prefetchnta($mem$$Address);
  1.5409 +  %}
  1.5410 +  ins_pipe(ialu_mem);
  1.5411 +%}
  1.5412 +
  1.5413 +instruct prefetchAllocT0( memory mem ) %{
  1.5414 +  predicate(AllocatePrefetchInstr==1);
  1.5415 +  match(PrefetchAllocation mem);
  1.5416 +  ins_cost(125);
  1.5417 +
  1.5418 +  format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
  1.5419 +  ins_encode %{
  1.5420 +    __ prefetcht0($mem$$Address);
  1.5421 +  %}
  1.5422 +  ins_pipe(ialu_mem);
  1.5423 +%}
  1.5424 +
  1.5425 +instruct prefetchAllocT2( memory mem ) %{
  1.5426 +  predicate(AllocatePrefetchInstr==2);
  1.5427 +  match(PrefetchAllocation mem);
  1.5428 +  ins_cost(125);
  1.5429 +
  1.5430 +  format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
  1.5431 +  ins_encode %{
  1.5432 +    __ prefetcht2($mem$$Address);
  1.5433 +  %}
  1.5434 +  ins_pipe(ialu_mem);
  1.5435 +%}
  1.5436 +
  1.5437 +//----------Store Instructions-------------------------------------------------
  1.5438 +
  1.5439 +// Store Byte
  1.5440 +instruct storeB(memory mem, rRegI src)
  1.5441 +%{
  1.5442 +  match(Set mem (StoreB mem src));
  1.5443 +
  1.5444 +  ins_cost(125); // XXX
  1.5445 +  format %{ "movb    $mem, $src\t# byte" %}
  1.5446 +  opcode(0x88);
  1.5447 +  ins_encode(REX_breg_mem(src, mem), OpcP, reg_mem(src, mem));
  1.5448 +  ins_pipe(ialu_mem_reg);
  1.5449 +%}
  1.5450 +
  1.5451 +// Store Char/Short
  1.5452 +instruct storeC(memory mem, rRegI src)
  1.5453 +%{
  1.5454 +  match(Set mem (StoreC mem src));
  1.5455 +
  1.5456 +  ins_cost(125); // XXX
  1.5457 +  format %{ "movw    $mem, $src\t# char/short" %}
  1.5458 +  opcode(0x89);
  1.5459 +  ins_encode(SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
  1.5460 +  ins_pipe(ialu_mem_reg);
  1.5461 +%}
  1.5462 +
  1.5463 +// Store Integer
  1.5464 +instruct storeI(memory mem, rRegI src)
  1.5465 +%{
  1.5466 +  match(Set mem (StoreI mem src));
  1.5467 +
  1.5468 +  ins_cost(125); // XXX
  1.5469 +  format %{ "movl    $mem, $src\t# int" %}
  1.5470 +  opcode(0x89);
  1.5471 +  ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
  1.5472 +  ins_pipe(ialu_mem_reg);
  1.5473 +%}
  1.5474 +
  1.5475 +// Store Long
  1.5476 +instruct storeL(memory mem, rRegL src)
  1.5477 +%{
  1.5478 +  match(Set mem (StoreL mem src));
  1.5479 +
  1.5480 +  ins_cost(125); // XXX
  1.5481 +  format %{ "movq    $mem, $src\t# long" %}
  1.5482 +  opcode(0x89);
  1.5483 +  ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
  1.5484 +  ins_pipe(ialu_mem_reg); // XXX
  1.5485 +%}
  1.5486 +
  1.5487 +// Store Pointer
  1.5488 +instruct storeP(memory mem, any_RegP src)
  1.5489 +%{
  1.5490 +  match(Set mem (StoreP mem src));
  1.5491 +
  1.5492 +  ins_cost(125); // XXX
  1.5493 +  format %{ "movq    $mem, $src\t# ptr" %}
  1.5494 +  opcode(0x89);
  1.5495 +  ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
  1.5496 +  ins_pipe(ialu_mem_reg);
  1.5497 +%}
  1.5498 +
  1.5499 +instruct storeImmP0(memory mem, immP0 zero)
  1.5500 +%{
  1.5501 +  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
  1.5502 +  match(Set mem (StoreP mem zero));
  1.5503 +
  1.5504 +  ins_cost(125); // XXX
  1.5505 +  format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
  1.5506 +  ins_encode %{
  1.5507 +    __ movq($mem$$Address, r12);
  1.5508 +  %}
  1.5509 +  ins_pipe(ialu_mem_reg);
  1.5510 +%}
  1.5511 +
  1.5512 +// Store NULL Pointer, mark word, or other simple pointer constant.
  1.5513 +instruct storeImmP(memory mem, immP31 src)
  1.5514 +%{
  1.5515 +  match(Set mem (StoreP mem src));
  1.5516 +
  1.5517 +  ins_cost(150); // XXX
  1.5518 +  format %{ "movq    $mem, $src\t# ptr" %}
  1.5519 +  opcode(0xC7); /* C7 /0 */
  1.5520 +  ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
  1.5521 +  ins_pipe(ialu_mem_imm);
  1.5522 +%}
  1.5523 +
  1.5524 +// Store Compressed Pointer
  1.5525 +instruct storeN(memory mem, rRegN src)
  1.5526 +%{
  1.5527 +  match(Set mem (StoreN mem src));
  1.5528 +
  1.5529 +  ins_cost(125); // XXX
  1.5530 +  format %{ "movl    $mem, $src\t# compressed ptr" %}
  1.5531 +  ins_encode %{
  1.5532 +    __ movl($mem$$Address, $src$$Register);
  1.5533 +  %}
  1.5534 +  ins_pipe(ialu_mem_reg);
  1.5535 +%}
  1.5536 +
  1.5537 +instruct storeNKlass(memory mem, rRegN src)
  1.5538 +%{
  1.5539 +  match(Set mem (StoreNKlass mem src));
  1.5540 +
  1.5541 +  ins_cost(125); // XXX
  1.5542 +  format %{ "movl    $mem, $src\t# compressed klass ptr" %}
  1.5543 +  ins_encode %{
  1.5544 +    __ movl($mem$$Address, $src$$Register);
  1.5545 +  %}
  1.5546 +  ins_pipe(ialu_mem_reg);
  1.5547 +%}
  1.5548 +
  1.5549 +instruct storeImmN0(memory mem, immN0 zero)
  1.5550 +%{
  1.5551 +  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_klass_base() == NULL);
  1.5552 +  match(Set mem (StoreN mem zero));
  1.5553 +
  1.5554 +  ins_cost(125); // XXX
  1.5555 +  format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
  1.5556 +  ins_encode %{
  1.5557 +    __ movl($mem$$Address, r12);
  1.5558 +  %}
  1.5559 +  ins_pipe(ialu_mem_reg);
  1.5560 +%}
  1.5561 +
  1.5562 +instruct storeImmN(memory mem, immN src)
  1.5563 +%{
  1.5564 +  match(Set mem (StoreN mem src));
  1.5565 +
  1.5566 +  ins_cost(150); // XXX
  1.5567 +  format %{ "movl    $mem, $src\t# compressed ptr" %}
  1.5568 +  ins_encode %{
  1.5569 +    address con = (address)$src$$constant;
  1.5570 +    if (con == NULL) {
  1.5571 +      __ movl($mem$$Address, (int32_t)0);
  1.5572 +    } else {
  1.5573 +      __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
  1.5574 +    }
  1.5575 +  %}
  1.5576 +  ins_pipe(ialu_mem_imm);
  1.5577 +%}
  1.5578 +
  1.5579 +instruct storeImmNKlass(memory mem, immNKlass src)
  1.5580 +%{
  1.5581 +  match(Set mem (StoreNKlass mem src));
  1.5582 +
  1.5583 +  ins_cost(150); // XXX
  1.5584 +  format %{ "movl    $mem, $src\t# compressed klass ptr" %}
  1.5585 +  ins_encode %{
  1.5586 +    __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
  1.5587 +  %}
  1.5588 +  ins_pipe(ialu_mem_imm);
  1.5589 +%}
  1.5590 +
  1.5591 +// Store Integer Immediate
  1.5592 +instruct storeImmI0(memory mem, immI0 zero)
  1.5593 +%{
  1.5594 +  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
  1.5595 +  match(Set mem (StoreI mem zero));
  1.5596 +
  1.5597 +  ins_cost(125); // XXX
  1.5598 +  format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
  1.5599 +  ins_encode %{
  1.5600 +    __ movl($mem$$Address, r12);
  1.5601 +  %}
  1.5602 +  ins_pipe(ialu_mem_reg);
  1.5603 +%}
  1.5604 +
  1.5605 +instruct storeImmI(memory mem, immI src)
  1.5606 +%{
  1.5607 +  match(Set mem (StoreI mem src));
  1.5608 +
  1.5609 +  ins_cost(150);
  1.5610 +  format %{ "movl    $mem, $src\t# int" %}
  1.5611 +  opcode(0xC7); /* C7 /0 */
  1.5612 +  ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
  1.5613 +  ins_pipe(ialu_mem_imm);
  1.5614 +%}
  1.5615 +
  1.5616 +// Store Long Immediate
  1.5617 +instruct storeImmL0(memory mem, immL0 zero)
  1.5618 +%{
  1.5619 +  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
  1.5620 +  match(Set mem (StoreL mem zero));
  1.5621 +
  1.5622 +  ins_cost(125); // XXX
  1.5623 +  format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
  1.5624 +  ins_encode %{
  1.5625 +    __ movq($mem$$Address, r12);
  1.5626 +  %}
  1.5627 +  ins_pipe(ialu_mem_reg);
  1.5628 +%}
  1.5629 +
  1.5630 +instruct storeImmL(memory mem, immL32 src)
  1.5631 +%{
  1.5632 +  match(Set mem (StoreL mem src));
  1.5633 +
  1.5634 +  ins_cost(150);
  1.5635 +  format %{ "movq    $mem, $src\t# long" %}
  1.5636 +  opcode(0xC7); /* C7 /0 */
  1.5637 +  ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
  1.5638 +  ins_pipe(ialu_mem_imm);
  1.5639 +%}
  1.5640 +
  1.5641 +// Store Short/Char Immediate
  1.5642 +instruct storeImmC0(memory mem, immI0 zero)
  1.5643 +%{
  1.5644 +  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
  1.5645 +  match(Set mem (StoreC mem zero));
  1.5646 +
  1.5647 +  ins_cost(125); // XXX
  1.5648 +  format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
  1.5649 +  ins_encode %{
  1.5650 +    __ movw($mem$$Address, r12);
  1.5651 +  %}
  1.5652 +  ins_pipe(ialu_mem_reg);
  1.5653 +%}
  1.5654 +
  1.5655 +instruct storeImmI16(memory mem, immI16 src)
  1.5656 +%{
  1.5657 +  predicate(UseStoreImmI16);
  1.5658 +  match(Set mem (StoreC mem src));
  1.5659 +
  1.5660 +  ins_cost(150);
  1.5661 +  format %{ "movw    $mem, $src\t# short/char" %}
  1.5662 +  opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
  1.5663 +  ins_encode(SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src));
  1.5664 +  ins_pipe(ialu_mem_imm);
  1.5665 +%}
  1.5666 +
  1.5667 +// Store Byte Immediate
  1.5668 +instruct storeImmB0(memory mem, immI0 zero)
  1.5669 +%{
  1.5670 +  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
  1.5671 +  match(Set mem (StoreB mem zero));
  1.5672 +
  1.5673 +  ins_cost(125); // XXX
  1.5674 +  format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
  1.5675 +  ins_encode %{
  1.5676 +    __ movb($mem$$Address, r12);
  1.5677 +  %}
  1.5678 +  ins_pipe(ialu_mem_reg);
  1.5679 +%}
  1.5680 +
  1.5681 +instruct storeImmB(memory mem, immI8 src)
  1.5682 +%{
  1.5683 +  match(Set mem (StoreB mem src));
  1.5684 +
  1.5685 +  ins_cost(150); // XXX
  1.5686 +  format %{ "movb    $mem, $src\t# byte" %}
  1.5687 +  opcode(0xC6); /* C6 /0 */
  1.5688 +  ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
  1.5689 +  ins_pipe(ialu_mem_imm);
  1.5690 +%}
  1.5691 +
  1.5692 +// Store CMS card-mark Immediate
  1.5693 +instruct storeImmCM0_reg(memory mem, immI0 zero)
  1.5694 +%{
  1.5695 +  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
  1.5696 +  match(Set mem (StoreCM mem zero));
  1.5697 +
  1.5698 +  ins_cost(125); // XXX
  1.5699 +  format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
  1.5700 +  ins_encode %{
  1.5701 +    __ movb($mem$$Address, r12);
  1.5702 +  %}
  1.5703 +  ins_pipe(ialu_mem_reg);
  1.5704 +%}
  1.5705 +
  1.5706 +instruct storeImmCM0(memory mem, immI0 src)
  1.5707 +%{
  1.5708 +  match(Set mem (StoreCM mem src));
  1.5709 +
  1.5710 +  ins_cost(150); // XXX
  1.5711 +  format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
  1.5712 +  opcode(0xC6); /* C6 /0 */
  1.5713 +  ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
  1.5714 +  ins_pipe(ialu_mem_imm);
  1.5715 +%}
  1.5716 +
  1.5717 +// Store Float
  1.5718 +instruct storeF(memory mem, regF src)
  1.5719 +%{
  1.5720 +  match(Set mem (StoreF mem src));
  1.5721 +
  1.5722 +  ins_cost(95); // XXX
  1.5723 +  format %{ "movss   $mem, $src\t# float" %}
  1.5724 +  ins_encode %{
  1.5725 +    __ movflt($mem$$Address, $src$$XMMRegister);
  1.5726 +  %}
  1.5727 +  ins_pipe(pipe_slow); // XXX
  1.5728 +%}
  1.5729 +
  1.5730 +// Store immediate Float value (it is faster than store from XMM register)
  1.5731 +instruct storeF0(memory mem, immF0 zero)
  1.5732 +%{
  1.5733 +  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
  1.5734 +  match(Set mem (StoreF mem zero));
  1.5735 +
  1.5736 +  ins_cost(25); // XXX
  1.5737 +  format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
  1.5738 +  ins_encode %{
  1.5739 +    __ movl($mem$$Address, r12);
  1.5740 +  %}
  1.5741 +  ins_pipe(ialu_mem_reg);
  1.5742 +%}
  1.5743 +
  1.5744 +instruct storeF_imm(memory mem, immF src)
  1.5745 +%{
  1.5746 +  match(Set mem (StoreF mem src));
  1.5747 +
  1.5748 +  ins_cost(50);
  1.5749 +  format %{ "movl    $mem, $src\t# float" %}
  1.5750 +  opcode(0xC7); /* C7 /0 */
  1.5751 +  ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
  1.5752 +  ins_pipe(ialu_mem_imm);
  1.5753 +%}
  1.5754 +
  1.5755 +// Store Double
  1.5756 +instruct storeD(memory mem, regD src)
  1.5757 +%{
  1.5758 +  match(Set mem (StoreD mem src));
  1.5759 +
  1.5760 +  ins_cost(95); // XXX
  1.5761 +  format %{ "movsd   $mem, $src\t# double" %}
  1.5762 +  ins_encode %{
  1.5763 +    __ movdbl($mem$$Address, $src$$XMMRegister);
  1.5764 +  %}
  1.5765 +  ins_pipe(pipe_slow); // XXX
  1.5766 +%}
  1.5767 +
  1.5768 +// Store immediate double 0.0 (it is faster than store from XMM register)
  1.5769 +instruct storeD0_imm(memory mem, immD0 src)
  1.5770 +%{
  1.5771 +  predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
  1.5772 +  match(Set mem (StoreD mem src));
  1.5773 +
  1.5774 +  ins_cost(50);
  1.5775 +  format %{ "movq    $mem, $src\t# double 0." %}
  1.5776 +  opcode(0xC7); /* C7 /0 */
  1.5777 +  ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
  1.5778 +  ins_pipe(ialu_mem_imm);
  1.5779 +%}
  1.5780 +
  1.5781 +instruct storeD0(memory mem, immD0 zero)
  1.5782 +%{
  1.5783 +  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
  1.5784 +  match(Set mem (StoreD mem zero));
  1.5785 +
  1.5786 +  ins_cost(25); // XXX
  1.5787 +  format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
  1.5788 +  ins_encode %{
  1.5789 +    __ movq($mem$$Address, r12);
  1.5790 +  %}
  1.5791 +  ins_pipe(ialu_mem_reg);
  1.5792 +%}
  1.5793 +
  1.5794 +instruct storeSSI(stackSlotI dst, rRegI src)
  1.5795 +%{
  1.5796 +  match(Set dst src);
  1.5797 +
  1.5798 +  ins_cost(100);
  1.5799 +  format %{ "movl    $dst, $src\t# int stk" %}
  1.5800 +  opcode(0x89);
  1.5801 +  ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
  1.5802 +  ins_pipe( ialu_mem_reg );
  1.5803 +%}
  1.5804 +
  1.5805 +instruct storeSSL(stackSlotL dst, rRegL src)
  1.5806 +%{
  1.5807 +  match(Set dst src);
  1.5808 +
  1.5809 +  ins_cost(100);
  1.5810 +  format %{ "movq    $dst, $src\t# long stk" %}
  1.5811 +  opcode(0x89);
  1.5812 +  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
  1.5813 +  ins_pipe(ialu_mem_reg);
  1.5814 +%}
  1.5815 +
  1.5816 +instruct storeSSP(stackSlotP dst, rRegP src)
  1.5817 +%{
  1.5818 +  match(Set dst src);
  1.5819 +
  1.5820 +  ins_cost(100);
  1.5821 +  format %{ "movq    $dst, $src\t# ptr stk" %}
  1.5822 +  opcode(0x89);
  1.5823 +  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
  1.5824 +  ins_pipe(ialu_mem_reg);
  1.5825 +%}
  1.5826 +
  1.5827 +instruct storeSSF(stackSlotF dst, regF src)
  1.5828 +%{
  1.5829 +  match(Set dst src);
  1.5830 +
  1.5831 +  ins_cost(95); // XXX
  1.5832 +  format %{ "movss   $dst, $src\t# float stk" %}
  1.5833 +  ins_encode %{
  1.5834 +    __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
  1.5835 +  %}
  1.5836 +  ins_pipe(pipe_slow); // XXX
  1.5837 +%}
  1.5838 +
  1.5839 +instruct storeSSD(stackSlotD dst, regD src)
  1.5840 +%{
  1.5841 +  match(Set dst src);
  1.5842 +
  1.5843 +  ins_cost(95); // XXX
  1.5844 +  format %{ "movsd   $dst, $src\t# double stk" %}
  1.5845 +  ins_encode %{
  1.5846 +    __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
  1.5847 +  %}
  1.5848 +  ins_pipe(pipe_slow); // XXX
  1.5849 +%}
  1.5850 +
  1.5851 +//----------BSWAP Instructions-------------------------------------------------
  1.5852 +instruct bytes_reverse_int(rRegI dst) %{
  1.5853 +  match(Set dst (ReverseBytesI dst));
  1.5854 +
  1.5855 +  format %{ "bswapl  $dst" %}
  1.5856 +  opcode(0x0F, 0xC8);  /*Opcode 0F /C8 */
  1.5857 +  ins_encode( REX_reg(dst), OpcP, opc2_reg(dst) );
  1.5858 +  ins_pipe( ialu_reg );
  1.5859 +%}
  1.5860 +
  1.5861 +instruct bytes_reverse_long(rRegL dst) %{
  1.5862 +  match(Set dst (ReverseBytesL dst));
  1.5863 +
  1.5864 +  format %{ "bswapq  $dst" %}
  1.5865 +  opcode(0x0F, 0xC8); /* Opcode 0F /C8 */
  1.5866 +  ins_encode( REX_reg_wide(dst), OpcP, opc2_reg(dst) );
  1.5867 +  ins_pipe( ialu_reg);
  1.5868 +%}
  1.5869 +
  1.5870 +instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
  1.5871 +  match(Set dst (ReverseBytesUS dst));
  1.5872 +  effect(KILL cr);
  1.5873 +
  1.5874 +  format %{ "bswapl  $dst\n\t"
  1.5875 +            "shrl    $dst,16\n\t" %}
  1.5876 +  ins_encode %{
  1.5877 +    __ bswapl($dst$$Register);
  1.5878 +    __ shrl($dst$$Register, 16);
  1.5879 +  %}
  1.5880 +  ins_pipe( ialu_reg );
  1.5881 +%}
  1.5882 +
  1.5883 +instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
  1.5884 +  match(Set dst (ReverseBytesS dst));
  1.5885 +  effect(KILL cr);
  1.5886 +
  1.5887 +  format %{ "bswapl  $dst\n\t"
  1.5888 +            "sar     $dst,16\n\t" %}
  1.5889 +  ins_encode %{
  1.5890 +    __ bswapl($dst$$Register);
  1.5891 +    __ sarl($dst$$Register, 16);
  1.5892 +  %}
  1.5893 +  ins_pipe( ialu_reg );
  1.5894 +%}
  1.5895 +
  1.5896 +//---------- Zeros Count Instructions ------------------------------------------
  1.5897 +
  1.5898 +instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
  1.5899 +  predicate(UseCountLeadingZerosInstruction);
  1.5900 +  match(Set dst (CountLeadingZerosI src));
  1.5901 +  effect(KILL cr);
  1.5902 +
  1.5903 +  format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
  1.5904 +  ins_encode %{
  1.5905 +    __ lzcntl($dst$$Register, $src$$Register);
  1.5906 +  %}
  1.5907 +  ins_pipe(ialu_reg);
  1.5908 +%}
  1.5909 +
  1.5910 +instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
  1.5911 +  predicate(!UseCountLeadingZerosInstruction);
  1.5912 +  match(Set dst (CountLeadingZerosI src));
  1.5913 +  effect(KILL cr);
  1.5914 +
  1.5915 +  format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
  1.5916 +            "jnz     skip\n\t"
  1.5917 +            "movl    $dst, -1\n"
  1.5918 +      "skip:\n\t"
  1.5919 +            "negl    $dst\n\t"
  1.5920 +            "addl    $dst, 31" %}
  1.5921 +  ins_encode %{
  1.5922 +    Register Rdst = $dst$$Register;
  1.5923 +    Register Rsrc = $src$$Register;
  1.5924 +    Label skip;
  1.5925 +    __ bsrl(Rdst, Rsrc);
  1.5926 +    __ jccb(Assembler::notZero, skip);
  1.5927 +    __ movl(Rdst, -1);
  1.5928 +    __ bind(skip);
  1.5929 +    __ negl(Rdst);
  1.5930 +    __ addl(Rdst, BitsPerInt - 1);
  1.5931 +  %}
  1.5932 +  ins_pipe(ialu_reg);
  1.5933 +%}
  1.5934 +
  1.5935 +instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
  1.5936 +  predicate(UseCountLeadingZerosInstruction);
  1.5937 +  match(Set dst (CountLeadingZerosL src));
  1.5938 +  effect(KILL cr);
  1.5939 +
  1.5940 +  format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
  1.5941 +  ins_encode %{
  1.5942 +    __ lzcntq($dst$$Register, $src$$Register);
  1.5943 +  %}
  1.5944 +  ins_pipe(ialu_reg);
  1.5945 +%}
  1.5946 +
  1.5947 +instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
  1.5948 +  predicate(!UseCountLeadingZerosInstruction);
  1.5949 +  match(Set dst (CountLeadingZerosL src));
  1.5950 +  effect(KILL cr);
  1.5951 +
  1.5952 +  format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
  1.5953 +            "jnz     skip\n\t"
  1.5954 +            "movl    $dst, -1\n"
  1.5955 +      "skip:\n\t"
  1.5956 +            "negl    $dst\n\t"
  1.5957 +            "addl    $dst, 63" %}
  1.5958 +  ins_encode %{
  1.5959 +    Register Rdst = $dst$$Register;
  1.5960 +    Register Rsrc = $src$$Register;
  1.5961 +    Label skip;
  1.5962 +    __ bsrq(Rdst, Rsrc);
  1.5963 +    __ jccb(Assembler::notZero, skip);
  1.5964 +    __ movl(Rdst, -1);
  1.5965 +    __ bind(skip);
  1.5966 +    __ negl(Rdst);
  1.5967 +    __ addl(Rdst, BitsPerLong - 1);
  1.5968 +  %}
  1.5969 +  ins_pipe(ialu_reg);
  1.5970 +%}
  1.5971 +
  1.5972 +instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
  1.5973 +  predicate(UseCountTrailingZerosInstruction);
  1.5974 +  match(Set dst (CountTrailingZerosI src));
  1.5975 +  effect(KILL cr);
  1.5976 +
  1.5977 +  format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
  1.5978 +  ins_encode %{
  1.5979 +    __ tzcntl($dst$$Register, $src$$Register);
  1.5980 +  %}
  1.5981 +  ins_pipe(ialu_reg);
  1.5982 +%}
  1.5983 +
  1.5984 +instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
  1.5985 +  predicate(!UseCountTrailingZerosInstruction);
  1.5986 +  match(Set dst (CountTrailingZerosI src));
  1.5987 +  effect(KILL cr);
  1.5988 +
  1.5989 +  format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
  1.5990 +            "jnz     done\n\t"
  1.5991 +            "movl    $dst, 32\n"
  1.5992 +      "done:" %}
  1.5993 +  ins_encode %{
  1.5994 +    Register Rdst = $dst$$Register;
  1.5995 +    Label done;
  1.5996 +    __ bsfl(Rdst, $src$$Register);
  1.5997 +    __ jccb(Assembler::notZero, done);
  1.5998 +    __ movl(Rdst, BitsPerInt);
  1.5999 +    __ bind(done);
  1.6000 +  %}
  1.6001 +  ins_pipe(ialu_reg);
  1.6002 +%}
  1.6003 +
  1.6004 +instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
  1.6005 +  predicate(UseCountTrailingZerosInstruction);
  1.6006 +  match(Set dst (CountTrailingZerosL src));
  1.6007 +  effect(KILL cr);
  1.6008 +
  1.6009 +  format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
  1.6010 +  ins_encode %{
  1.6011 +    __ tzcntq($dst$$Register, $src$$Register);
  1.6012 +  %}
  1.6013 +  ins_pipe(ialu_reg);
  1.6014 +%}
  1.6015 +
  1.6016 +instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
  1.6017 +  predicate(!UseCountTrailingZerosInstruction);
  1.6018 +  match(Set dst (CountTrailingZerosL src));
  1.6019 +  effect(KILL cr);
  1.6020 +
  1.6021 +  format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
  1.6022 +            "jnz     done\n\t"
  1.6023 +            "movl    $dst, 64\n"
  1.6024 +      "done:" %}
  1.6025 +  ins_encode %{
  1.6026 +    Register Rdst = $dst$$Register;
  1.6027 +    Label done;
  1.6028 +    __ bsfq(Rdst, $src$$Register);
  1.6029 +    __ jccb(Assembler::notZero, done);
  1.6030 +    __ movl(Rdst, BitsPerLong);
  1.6031 +    __ bind(done);
  1.6032 +  %}
  1.6033 +  ins_pipe(ialu_reg);
  1.6034 +%}
  1.6035 +
  1.6036 +
  1.6037 +//---------- Population Count Instructions -------------------------------------
  1.6038 +
  1.6039 +instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
  1.6040 +  predicate(UsePopCountInstruction);
  1.6041 +  match(Set dst (PopCountI src));
  1.6042 +  effect(KILL cr);
  1.6043 +
  1.6044 +  format %{ "popcnt  $dst, $src" %}
  1.6045 +  ins_encode %{
  1.6046 +    __ popcntl($dst$$Register, $src$$Register);
  1.6047 +  %}
  1.6048 +  ins_pipe(ialu_reg);
  1.6049 +%}
  1.6050 +
  1.6051 +instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
  1.6052 +  predicate(UsePopCountInstruction);
  1.6053 +  match(Set dst (PopCountI (LoadI mem)));
  1.6054 +  effect(KILL cr);
  1.6055 +
  1.6056 +  format %{ "popcnt  $dst, $mem" %}
  1.6057 +  ins_encode %{
  1.6058 +    __ popcntl($dst$$Register, $mem$$Address);
  1.6059 +  %}
  1.6060 +  ins_pipe(ialu_reg);
  1.6061 +%}
  1.6062 +
  1.6063 +// Note: Long.bitCount(long) returns an int.
  1.6064 +instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
  1.6065 +  predicate(UsePopCountInstruction);
  1.6066 +  match(Set dst (PopCountL src));
  1.6067 +  effect(KILL cr);
  1.6068 +
  1.6069 +  format %{ "popcnt  $dst, $src" %}
  1.6070 +  ins_encode %{
  1.6071 +    __ popcntq($dst$$Register, $src$$Register);
  1.6072 +  %}
  1.6073 +  ins_pipe(ialu_reg);
  1.6074 +%}
  1.6075 +
  1.6076 +// Note: Long.bitCount(long) returns an int.
  1.6077 +instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
  1.6078 +  predicate(UsePopCountInstruction);
  1.6079 +  match(Set dst (PopCountL (LoadL mem)));
  1.6080 +  effect(KILL cr);
  1.6081 +
  1.6082 +  format %{ "popcnt  $dst, $mem" %}
  1.6083 +  ins_encode %{
  1.6084 +    __ popcntq($dst$$Register, $mem$$Address);
  1.6085 +  %}
  1.6086 +  ins_pipe(ialu_reg);
  1.6087 +%}
  1.6088 +
  1.6089 +
  1.6090 +//----------MemBar Instructions-----------------------------------------------
  1.6091 +// Memory barrier flavors
  1.6092 +
  1.6093 +instruct membar_acquire()
  1.6094 +%{
  1.6095 +  match(MemBarAcquire);
  1.6096 +  match(LoadFence);
  1.6097 +  ins_cost(0);
  1.6098 +
  1.6099 +  size(0);
  1.6100 +  format %{ "MEMBAR-acquire ! (empty encoding)" %}
  1.6101 +  ins_encode();
  1.6102 +  ins_pipe(empty);
  1.6103 +%}
  1.6104 +
  1.6105 +instruct membar_acquire_lock()
  1.6106 +%{
  1.6107 +  match(MemBarAcquireLock);
  1.6108 +  ins_cost(0);
  1.6109 +
  1.6110 +  size(0);
  1.6111 +  format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
  1.6112 +  ins_encode();
  1.6113 +  ins_pipe(empty);
  1.6114 +%}
  1.6115 +
  1.6116 +instruct membar_release()
  1.6117 +%{
  1.6118 +  match(MemBarRelease);
  1.6119 +  match(StoreFence);
  1.6120 +  ins_cost(0);
  1.6121 +
  1.6122 +  size(0);
  1.6123 +  format %{ "MEMBAR-release ! (empty encoding)" %}
  1.6124 +  ins_encode();
  1.6125 +  ins_pipe(empty);
  1.6126 +%}
  1.6127 +
  1.6128 +instruct membar_release_lock()
  1.6129 +%{
  1.6130 +  match(MemBarReleaseLock);
  1.6131 +  ins_cost(0);
  1.6132 +
  1.6133 +  size(0);
  1.6134 +  format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
  1.6135 +  ins_encode();
  1.6136 +  ins_pipe(empty);
  1.6137 +%}
  1.6138 +
  1.6139 +instruct membar_volatile(rFlagsReg cr) %{
  1.6140 +  match(MemBarVolatile);
  1.6141 +  effect(KILL cr);
  1.6142 +  ins_cost(400);
  1.6143 +
  1.6144 +  format %{
  1.6145 +    $$template
  1.6146 +    if (os::is_MP()) {
  1.6147 +      $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
  1.6148 +    } else {
  1.6149 +      $$emit$$"MEMBAR-volatile ! (empty encoding)"
  1.6150 +    }
  1.6151 +  %}
  1.6152 +  ins_encode %{
  1.6153 +    __ membar(Assembler::StoreLoad);
  1.6154 +  %}
  1.6155 +  ins_pipe(pipe_slow);
  1.6156 +%}
  1.6157 +
  1.6158 +instruct unnecessary_membar_volatile()
  1.6159 +%{
  1.6160 +  match(MemBarVolatile);
  1.6161 +  predicate(Matcher::post_store_load_barrier(n));
  1.6162 +  ins_cost(0);
  1.6163 +
  1.6164 +  size(0);
  1.6165 +  format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
  1.6166 +  ins_encode();
  1.6167 +  ins_pipe(empty);
  1.6168 +%}
  1.6169 +
  1.6170 +instruct membar_storestore() %{
  1.6171 +  match(MemBarStoreStore);
  1.6172 +  ins_cost(0);
  1.6173 +
  1.6174 +  size(0);
  1.6175 +  format %{ "MEMBAR-storestore (empty encoding)" %}
  1.6176 +  ins_encode( );
  1.6177 +  ins_pipe(empty);
  1.6178 +%}
  1.6179 +
  1.6180 +//----------Move Instructions--------------------------------------------------
  1.6181 +
  1.6182 +instruct castX2P(rRegP dst, rRegL src)
  1.6183 +%{
  1.6184 +  match(Set dst (CastX2P src));
  1.6185 +
  1.6186 +  format %{ "movq    $dst, $src\t# long->ptr" %}
  1.6187 +  ins_encode %{
  1.6188 +    if ($dst$$reg != $src$$reg) {
  1.6189 +      __ movptr($dst$$Register, $src$$Register);
  1.6190 +    }
  1.6191 +  %}
  1.6192 +  ins_pipe(ialu_reg_reg); // XXX
  1.6193 +%}
  1.6194 +
  1.6195 +instruct castP2X(rRegL dst, rRegP src)
  1.6196 +%{
  1.6197 +  match(Set dst (CastP2X src));
  1.6198 +
  1.6199 +  format %{ "movq    $dst, $src\t# ptr -> long" %}
  1.6200 +  ins_encode %{
  1.6201 +    if ($dst$$reg != $src$$reg) {
  1.6202 +      __ movptr($dst$$Register, $src$$Register);
  1.6203 +    }
  1.6204 +  %}
  1.6205 +  ins_pipe(ialu_reg_reg); // XXX
  1.6206 +%}
  1.6207 +
  1.6208 +// Convert oop into int for vectors alignment masking
  1.6209 +instruct convP2I(rRegI dst, rRegP src)
  1.6210 +%{
  1.6211 +  match(Set dst (ConvL2I (CastP2X src)));
  1.6212 +
  1.6213 +  format %{ "movl    $dst, $src\t# ptr -> int" %}
  1.6214 +  ins_encode %{
  1.6215 +    __ movl($dst$$Register, $src$$Register);
  1.6216 +  %}
  1.6217 +  ins_pipe(ialu_reg_reg); // XXX
  1.6218 +%}
  1.6219 +
  1.6220 +// Convert compressed oop into int for vectors alignment masking
  1.6221 +// in case of 32bit oops (heap < 4Gb).
  1.6222 +instruct convN2I(rRegI dst, rRegN src)
  1.6223 +%{
  1.6224 +  predicate(Universe::narrow_oop_shift() == 0);
  1.6225 +  match(Set dst (ConvL2I (CastP2X (DecodeN src))));
  1.6226 +
  1.6227 +  format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
  1.6228 +  ins_encode %{
  1.6229 +    __ movl($dst$$Register, $src$$Register);
  1.6230 +  %}
  1.6231 +  ins_pipe(ialu_reg_reg); // XXX
  1.6232 +%}
  1.6233 +
  1.6234 +// Convert oop pointer into compressed form
  1.6235 +instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
  1.6236 +  predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
  1.6237 +  match(Set dst (EncodeP src));
  1.6238 +  effect(KILL cr);
  1.6239 +  format %{ "encode_heap_oop $dst,$src" %}
  1.6240 +  ins_encode %{
  1.6241 +    Register s = $src$$Register;
  1.6242 +    Register d = $dst$$Register;
  1.6243 +    if (s != d) {
  1.6244 +      __ movq(d, s);
  1.6245 +    }
  1.6246 +    __ encode_heap_oop(d);
  1.6247 +  %}
  1.6248 +  ins_pipe(ialu_reg_long);
  1.6249 +%}
  1.6250 +
  1.6251 +instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
  1.6252 +  predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
  1.6253 +  match(Set dst (EncodeP src));
  1.6254 +  effect(KILL cr);
  1.6255 +  format %{ "encode_heap_oop_not_null $dst,$src" %}
  1.6256 +  ins_encode %{
  1.6257 +    __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
  1.6258 +  %}
  1.6259 +  ins_pipe(ialu_reg_long);
  1.6260 +%}
  1.6261 +
  1.6262 +instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
  1.6263 +  predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
  1.6264 +            n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
  1.6265 +  match(Set dst (DecodeN src));
  1.6266 +  effect(KILL cr);
  1.6267 +  format %{ "decode_heap_oop $dst,$src" %}
  1.6268 +  ins_encode %{
  1.6269 +    Register s = $src$$Register;
  1.6270 +    Register d = $dst$$Register;
  1.6271 +    if (s != d) {
  1.6272 +      __ movq(d, s);
  1.6273 +    }
  1.6274 +    __ decode_heap_oop(d);
  1.6275 +  %}
  1.6276 +  ins_pipe(ialu_reg_long);
  1.6277 +%}
  1.6278 +
  1.6279 +instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
  1.6280 +  predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
  1.6281 +            n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
  1.6282 +  match(Set dst (DecodeN src));
  1.6283 +  effect(KILL cr);
  1.6284 +  format %{ "decode_heap_oop_not_null $dst,$src" %}
  1.6285 +  ins_encode %{
  1.6286 +    Register s = $src$$Register;
  1.6287 +    Register d = $dst$$Register;
  1.6288 +    if (s != d) {
  1.6289 +      __ decode_heap_oop_not_null(d, s);
  1.6290 +    } else {
  1.6291 +      __ decode_heap_oop_not_null(d);
  1.6292 +    }
  1.6293 +  %}
  1.6294 +  ins_pipe(ialu_reg_long);
  1.6295 +%}
  1.6296 +
  1.6297 +instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
  1.6298 +  match(Set dst (EncodePKlass src));
  1.6299 +  effect(KILL cr);
  1.6300 +  format %{ "encode_klass_not_null $dst,$src" %}
  1.6301 +  ins_encode %{
  1.6302 +    __ encode_klass_not_null($dst$$Register, $src$$Register);
  1.6303 +  %}
  1.6304 +  ins_pipe(ialu_reg_long);
  1.6305 +%}
  1.6306 +
  1.6307 +instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
  1.6308 +  match(Set dst (DecodeNKlass src));
  1.6309 +  effect(KILL cr);
  1.6310 +  format %{ "decode_klass_not_null $dst,$src" %}
  1.6311 +  ins_encode %{
  1.6312 +    Register s = $src$$Register;
  1.6313 +    Register d = $dst$$Register;
  1.6314 +    if (s != d) {
  1.6315 +      __ decode_klass_not_null(d, s);
  1.6316 +    } else {
  1.6317 +      __ decode_klass_not_null(d);
  1.6318 +    }
  1.6319 +  %}
  1.6320 +  ins_pipe(ialu_reg_long);
  1.6321 +%}
  1.6322 +
  1.6323 +
  1.6324 +//----------Conditional Move---------------------------------------------------
  1.6325 +// Jump
  1.6326 +// dummy instruction for generating temp registers
  1.6327 +instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
  1.6328 +  match(Jump (LShiftL switch_val shift));
  1.6329 +  ins_cost(350);
  1.6330 +  predicate(false);
  1.6331 +  effect(TEMP dest);
  1.6332 +
  1.6333 +  format %{ "leaq    $dest, [$constantaddress]\n\t"
  1.6334 +            "jmp     [$dest + $switch_val << $shift]\n\t" %}
  1.6335 +  ins_encode %{
  1.6336 +    // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
  1.6337 +    // to do that and the compiler is using that register as one it can allocate.
  1.6338 +    // So we build it all by hand.
  1.6339 +    // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
  1.6340 +    // ArrayAddress dispatch(table, index);
  1.6341 +    Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
  1.6342 +    __ lea($dest$$Register, $constantaddress);
  1.6343 +    __ jmp(dispatch);
  1.6344 +  %}
  1.6345 +  ins_pipe(pipe_jmp);
  1.6346 +%}
  1.6347 +
  1.6348 +instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
  1.6349 +  match(Jump (AddL (LShiftL switch_val shift) offset));
  1.6350 +  ins_cost(350);
  1.6351 +  effect(TEMP dest);
  1.6352 +
  1.6353 +  format %{ "leaq    $dest, [$constantaddress]\n\t"
  1.6354 +            "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
  1.6355 +  ins_encode %{
  1.6356 +    // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
  1.6357 +    // to do that and the compiler is using that register as one it can allocate.
  1.6358 +    // So we build it all by hand.
  1.6359 +    // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
  1.6360 +    // ArrayAddress dispatch(table, index);
  1.6361 +    Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
  1.6362 +    __ lea($dest$$Register, $constantaddress);
  1.6363 +    __ jmp(dispatch);
  1.6364 +  %}
  1.6365 +  ins_pipe(pipe_jmp);
  1.6366 +%}
  1.6367 +
  1.6368 +instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
  1.6369 +  match(Jump switch_val);
  1.6370 +  ins_cost(350);
  1.6371 +  effect(TEMP dest);
  1.6372 +
  1.6373 +  format %{ "leaq    $dest, [$constantaddress]\n\t"
  1.6374 +            "jmp     [$dest + $switch_val]\n\t" %}
  1.6375 +  ins_encode %{
  1.6376 +    // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
  1.6377 +    // to do that and the compiler is using that register as one it can allocate.
  1.6378 +    // So we build it all by hand.
  1.6379 +    // Address index(noreg, switch_reg, Address::times_1);
  1.6380 +    // ArrayAddress dispatch(table, index);
  1.6381 +    Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
  1.6382 +    __ lea($dest$$Register, $constantaddress);
  1.6383 +    __ jmp(dispatch);
  1.6384 +  %}
  1.6385 +  ins_pipe(pipe_jmp);
  1.6386 +%}
  1.6387 +
  1.6388 +// Conditional move
  1.6389 +instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
  1.6390 +%{
  1.6391 +  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
  1.6392 +
  1.6393 +  ins_cost(200); // XXX
  1.6394 +  format %{ "cmovl$cop $dst, $src\t# signed, int" %}
  1.6395 +  opcode(0x0F, 0x40);
  1.6396 +  ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
  1.6397 +  ins_pipe(pipe_cmov_reg);
  1.6398 +%}
  1.6399 +
  1.6400 +instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
  1.6401 +  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
  1.6402 +
  1.6403 +  ins_cost(200); // XXX
  1.6404 +  format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
  1.6405 +  opcode(0x0F, 0x40);
  1.6406 +  ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
  1.6407 +  ins_pipe(pipe_cmov_reg);
  1.6408 +%}
  1.6409 +
  1.6410 +instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
  1.6411 +  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
  1.6412 +  ins_cost(200);
  1.6413 +  expand %{
  1.6414 +    cmovI_regU(cop, cr, dst, src);
  1.6415 +  %}
  1.6416 +%}
  1.6417 +
  1.6418 +// Conditional move
  1.6419 +instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
  1.6420 +  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
  1.6421 +
  1.6422 +  ins_cost(250); // XXX
  1.6423 +  format %{ "cmovl$cop $dst, $src\t# signed, int" %}
  1.6424 +  opcode(0x0F, 0x40);
  1.6425 +  ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
  1.6426 +  ins_pipe(pipe_cmov_mem);
  1.6427 +%}
  1.6428 +
  1.6429 +// Conditional move
  1.6430 +instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
  1.6431 +%{
  1.6432 +  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
  1.6433 +
  1.6434 +  ins_cost(250); // XXX
  1.6435 +  format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
  1.6436 +  opcode(0x0F, 0x40);
  1.6437 +  ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
  1.6438 +  ins_pipe(pipe_cmov_mem);
  1.6439 +%}
  1.6440 +
  1.6441 +instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
  1.6442 +  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
  1.6443 +  ins_cost(250);
  1.6444 +  expand %{
  1.6445 +    cmovI_memU(cop, cr, dst, src);
  1.6446 +  %}
  1.6447 +%}
  1.6448 +
  1.6449 +// Conditional move
  1.6450 +instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
  1.6451 +%{
  1.6452 +  match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
  1.6453 +
  1.6454 +  ins_cost(200); // XXX
  1.6455 +  format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
  1.6456 +  opcode(0x0F, 0x40);
  1.6457 +  ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
  1.6458 +  ins_pipe(pipe_cmov_reg);
  1.6459 +%}
  1.6460 +
  1.6461 +// Conditional move
  1.6462 +instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
  1.6463 +%{
  1.6464 +  match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
  1.6465 +
  1.6466 +  ins_cost(200); // XXX
  1.6467 +  format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
  1.6468 +  opcode(0x0F, 0x40);
  1.6469 +  ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
  1.6470 +  ins_pipe(pipe_cmov_reg);
  1.6471 +%}
  1.6472 +
  1.6473 +instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
  1.6474 +  match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
  1.6475 +  ins_cost(200);
  1.6476 +  expand %{
  1.6477 +    cmovN_regU(cop, cr, dst, src);
  1.6478 +  %}
  1.6479 +%}
  1.6480 +
  1.6481 +// Conditional move
  1.6482 +instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
  1.6483 +%{
  1.6484 +  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
  1.6485 +
  1.6486 +  ins_cost(200); // XXX
  1.6487 +  format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
  1.6488 +  opcode(0x0F, 0x40);
  1.6489 +  ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
  1.6490 +  ins_pipe(pipe_cmov_reg);  // XXX
  1.6491 +%}
  1.6492 +
  1.6493 +// Conditional move
  1.6494 +instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
  1.6495 +%{
  1.6496 +  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
  1.6497 +
  1.6498 +  ins_cost(200); // XXX
  1.6499 +  format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
  1.6500 +  opcode(0x0F, 0x40);
  1.6501 +  ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
  1.6502 +  ins_pipe(pipe_cmov_reg); // XXX
  1.6503 +%}
  1.6504 +
  1.6505 +instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
  1.6506 +  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
  1.6507 +  ins_cost(200);
  1.6508 +  expand %{
  1.6509 +    cmovP_regU(cop, cr, dst, src);
  1.6510 +  %}
  1.6511 +%}
  1.6512 +
  1.6513 +// DISABLED: Requires the ADLC to emit a bottom_type call that
  1.6514 +// correctly meets the two pointer arguments; one is an incoming
  1.6515 +// register but the other is a memory operand.  ALSO appears to
  1.6516 +// be buggy with implicit null checks.
  1.6517 +//
  1.6518 +//// Conditional move
  1.6519 +//instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
  1.6520 +//%{
  1.6521 +//  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
  1.6522 +//  ins_cost(250);
  1.6523 +//  format %{ "CMOV$cop $dst,$src\t# ptr" %}
  1.6524 +//  opcode(0x0F,0x40);
  1.6525 +//  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
  1.6526 +//  ins_pipe( pipe_cmov_mem );
  1.6527 +//%}
  1.6528 +//
  1.6529 +//// Conditional move
  1.6530 +//instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
  1.6531 +//%{
  1.6532 +//  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
  1.6533 +//  ins_cost(250);
  1.6534 +//  format %{ "CMOV$cop $dst,$src\t# ptr" %}
  1.6535 +//  opcode(0x0F,0x40);
  1.6536 +//  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
  1.6537 +//  ins_pipe( pipe_cmov_mem );
  1.6538 +//%}
  1.6539 +
  1.6540 +instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
  1.6541 +%{
  1.6542 +  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
  1.6543 +
  1.6544 +  ins_cost(200); // XXX
  1.6545 +  format %{ "cmovq$cop $dst, $src\t# signed, long" %}
  1.6546 +  opcode(0x0F, 0x40);
  1.6547 +  ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
  1.6548 +  ins_pipe(pipe_cmov_reg);  // XXX
  1.6549 +%}
  1.6550 +
  1.6551 +instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
  1.6552 +%{
  1.6553 +  match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
  1.6554 +
  1.6555 +  ins_cost(200); // XXX
  1.6556 +  format %{ "cmovq$cop $dst, $src\t# signed, long" %}
  1.6557 +  opcode(0x0F, 0x40);
  1.6558 +  ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
  1.6559 +  ins_pipe(pipe_cmov_mem);  // XXX
  1.6560 +%}
  1.6561 +
  1.6562 +instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
  1.6563 +%{
  1.6564 +  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
  1.6565 +
  1.6566 +  ins_cost(200); // XXX
  1.6567 +  format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
  1.6568 +  opcode(0x0F, 0x40);
  1.6569 +  ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
  1.6570 +  ins_pipe(pipe_cmov_reg); // XXX
  1.6571 +%}
  1.6572 +
  1.6573 +instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
  1.6574 +  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
  1.6575 +  ins_cost(200);
  1.6576 +  expand %{
  1.6577 +    cmovL_regU(cop, cr, dst, src);
  1.6578 +  %}
  1.6579 +%}
  1.6580 +
  1.6581 +instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
  1.6582 +%{
  1.6583 +  match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
  1.6584 +
  1.6585 +  ins_cost(200); // XXX
  1.6586 +  format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
  1.6587 +  opcode(0x0F, 0x40);
  1.6588 +  ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
  1.6589 +  ins_pipe(pipe_cmov_mem); // XXX
  1.6590 +%}
  1.6591 +
  1.6592 +instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
  1.6593 +  match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
  1.6594 +  ins_cost(200);
  1.6595 +  expand %{
  1.6596 +    cmovL_memU(cop, cr, dst, src);
  1.6597 +  %}
  1.6598 +%}
  1.6599 +
  1.6600 +instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
  1.6601 +%{
  1.6602 +  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
  1.6603 +
  1.6604 +  ins_cost(200); // XXX
  1.6605 +  format %{ "jn$cop    skip\t# signed cmove float\n\t"
  1.6606 +            "movss     $dst, $src\n"
  1.6607 +    "skip:" %}
  1.6608 +  ins_encode %{
  1.6609 +    Label Lskip;
  1.6610 +    // Invert sense of branch from sense of CMOV
  1.6611 +    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
  1.6612 +    __ movflt($dst$$XMMRegister, $src$$XMMRegister);
  1.6613 +    __ bind(Lskip);
  1.6614 +  %}
  1.6615 +  ins_pipe(pipe_slow);
  1.6616 +%}
  1.6617 +
  1.6618 +// instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
  1.6619 +// %{
  1.6620 +//   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
  1.6621 +
  1.6622 +//   ins_cost(200); // XXX
  1.6623 +//   format %{ "jn$cop    skip\t# signed cmove float\n\t"
  1.6624 +//             "movss     $dst, $src\n"
  1.6625 +//     "skip:" %}
  1.6626 +//   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
  1.6627 +//   ins_pipe(pipe_slow);
  1.6628 +// %}
  1.6629 +
  1.6630 +instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
  1.6631 +%{
  1.6632 +  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
  1.6633 +
  1.6634 +  ins_cost(200); // XXX
  1.6635 +  format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
  1.6636 +            "movss     $dst, $src\n"
  1.6637 +    "skip:" %}
  1.6638 +  ins_encode %{
  1.6639 +    Label Lskip;
  1.6640 +    // Invert sense of branch from sense of CMOV
  1.6641 +    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
  1.6642 +    __ movflt($dst$$XMMRegister, $src$$XMMRegister);
  1.6643 +    __ bind(Lskip);
  1.6644 +  %}
  1.6645 +  ins_pipe(pipe_slow);
  1.6646 +%}
  1.6647 +
  1.6648 +instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
  1.6649 +  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
  1.6650 +  ins_cost(200);
  1.6651 +  expand %{
  1.6652 +    cmovF_regU(cop, cr, dst, src);
  1.6653 +  %}
  1.6654 +%}
  1.6655 +
  1.6656 +instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
  1.6657 +%{
  1.6658 +  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
  1.6659 +
  1.6660 +  ins_cost(200); // XXX
  1.6661 +  format %{ "jn$cop    skip\t# signed cmove double\n\t"
  1.6662 +            "movsd     $dst, $src\n"
  1.6663 +    "skip:" %}
  1.6664 +  ins_encode %{
  1.6665 +    Label Lskip;
  1.6666 +    // Invert sense of branch from sense of CMOV
  1.6667 +    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
  1.6668 +    __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
  1.6669 +    __ bind(Lskip);
  1.6670 +  %}
  1.6671 +  ins_pipe(pipe_slow);
  1.6672 +%}
  1.6673 +
  1.6674 +instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
  1.6675 +%{
  1.6676 +  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
  1.6677 +
  1.6678 +  ins_cost(200); // XXX
  1.6679 +  format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
  1.6680 +            "movsd     $dst, $src\n"
  1.6681 +    "skip:" %}
  1.6682 +  ins_encode %{
  1.6683 +    Label Lskip;
  1.6684 +    // Invert sense of branch from sense of CMOV
  1.6685 +    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
  1.6686 +    __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
  1.6687 +    __ bind(Lskip);
  1.6688 +  %}
  1.6689 +  ins_pipe(pipe_slow);
  1.6690 +%}
  1.6691 +
  1.6692 +instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
  1.6693 +  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
  1.6694 +  ins_cost(200);
  1.6695 +  expand %{
  1.6696 +    cmovD_regU(cop, cr, dst, src);
  1.6697 +  %}
  1.6698 +%}
  1.6699 +
  1.6700 +//----------Arithmetic Instructions--------------------------------------------
  1.6701 +//----------Addition Instructions----------------------------------------------
  1.6702 +
  1.6703 +instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
  1.6704 +%{
  1.6705 +  match(Set dst (AddI dst src));
  1.6706 +  effect(KILL cr);
  1.6707 +
  1.6708 +  format %{ "addl    $dst, $src\t# int" %}
  1.6709 +  opcode(0x03);
  1.6710 +  ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
  1.6711 +  ins_pipe(ialu_reg_reg);
  1.6712 +%}
  1.6713 +
  1.6714 +instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
  1.6715 +%{
  1.6716 +  match(Set dst (AddI dst src));
  1.6717 +  effect(KILL cr);
  1.6718 +
  1.6719 +  format %{ "addl    $dst, $src\t# int" %}
  1.6720 +  opcode(0x81, 0x00); /* /0 id */
  1.6721 +  ins_encode(OpcSErm(dst, src), Con8or32(src));
  1.6722 +  ins_pipe( ialu_reg );
  1.6723 +%}
  1.6724 +
  1.6725 +instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
  1.6726 +%{
  1.6727 +  match(Set dst (AddI dst (LoadI src)));
  1.6728 +  effect(KILL cr);
  1.6729 +
  1.6730 +  ins_cost(125); // XXX
  1.6731 +  format %{ "addl    $dst, $src\t# int" %}
  1.6732 +  opcode(0x03);
  1.6733 +  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
  1.6734 +  ins_pipe(ialu_reg_mem);
  1.6735 +%}
  1.6736 +
  1.6737 +instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
  1.6738 +%{
  1.6739 +  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
  1.6740 +  effect(KILL cr);
  1.6741 +
  1.6742 +  ins_cost(150); // XXX
  1.6743 +  format %{ "addl    $dst, $src\t# int" %}
  1.6744 +  opcode(0x01); /* Opcode 01 /r */
  1.6745 +  ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
  1.6746 +  ins_pipe(ialu_mem_reg);
  1.6747 +%}
  1.6748 +
  1.6749 +instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
  1.6750 +%{
  1.6751 +  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
  1.6752 +  effect(KILL cr);
  1.6753 +
  1.6754 +  ins_cost(125); // XXX
  1.6755 +  format %{ "addl    $dst, $src\t# int" %}
  1.6756 +  opcode(0x81); /* Opcode 81 /0 id */
  1.6757 +  ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
  1.6758 +  ins_pipe(ialu_mem_imm);
  1.6759 +%}
  1.6760 +
  1.6761 +instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
  1.6762 +%{
  1.6763 +  predicate(UseIncDec);
  1.6764 +  match(Set dst (AddI dst src));
  1.6765 +  effect(KILL cr);
  1.6766 +
  1.6767 +  format %{ "incl    $dst\t# int" %}
  1.6768 +  opcode(0xFF, 0x00); // FF /0
  1.6769 +  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
  1.6770 +  ins_pipe(ialu_reg);
  1.6771 +%}
  1.6772 +
  1.6773 +instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
  1.6774 +%{
  1.6775 +  predicate(UseIncDec);
  1.6776 +  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
  1.6777 +  effect(KILL cr);
  1.6778 +
  1.6779 +  ins_cost(125); // XXX
  1.6780 +  format %{ "incl    $dst\t# int" %}
  1.6781 +  opcode(0xFF); /* Opcode FF /0 */
  1.6782 +  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x00, dst));
  1.6783 +  ins_pipe(ialu_mem_imm);
  1.6784 +%}
  1.6785 +
  1.6786 +// XXX why does that use AddI
  1.6787 +instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
  1.6788 +%{
  1.6789 +  predicate(UseIncDec);
  1.6790 +  match(Set dst (AddI dst src));
  1.6791 +  effect(KILL cr);
  1.6792 +
  1.6793 +  format %{ "decl    $dst\t# int" %}
  1.6794 +  opcode(0xFF, 0x01); // FF /1
  1.6795 +  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
  1.6796 +  ins_pipe(ialu_reg);
  1.6797 +%}
  1.6798 +
  1.6799 +// XXX why does that use AddI
  1.6800 +instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
  1.6801 +%{
  1.6802 +  predicate(UseIncDec);
  1.6803 +  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
  1.6804 +  effect(KILL cr);
  1.6805 +
  1.6806 +  ins_cost(125); // XXX
  1.6807 +  format %{ "decl    $dst\t# int" %}
  1.6808 +  opcode(0xFF); /* Opcode FF /1 */
  1.6809 +  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x01, dst));
  1.6810 +  ins_pipe(ialu_mem_imm);
  1.6811 +%}
  1.6812 +
  1.6813 +instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
  1.6814 +%{
  1.6815 +  match(Set dst (AddI src0 src1));
  1.6816 +
  1.6817 +  ins_cost(110);
  1.6818 +  format %{ "addr32 leal $dst, [$src0 + $src1]\t# int" %}
  1.6819 +  opcode(0x8D); /* 0x8D /r */
  1.6820 +  ins_encode(Opcode(0x67), REX_reg_reg(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
  1.6821 +  ins_pipe(ialu_reg_reg);
  1.6822 +%}
  1.6823 +
  1.6824 +instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
  1.6825 +%{
  1.6826 +  match(Set dst (AddL dst src));
  1.6827 +  effect(KILL cr);
  1.6828 +
  1.6829 +  format %{ "addq    $dst, $src\t# long" %}
  1.6830 +  opcode(0x03);
  1.6831 +  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
  1.6832 +  ins_pipe(ialu_reg_reg);
  1.6833 +%}
  1.6834 +
  1.6835 +instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
  1.6836 +%{
  1.6837 +  match(Set dst (AddL dst src));
  1.6838 +  effect(KILL cr);
  1.6839 +
  1.6840 +  format %{ "addq    $dst, $src\t# long" %}
  1.6841 +  opcode(0x81, 0x00); /* /0 id */
  1.6842 +  ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
  1.6843 +  ins_pipe( ialu_reg );
  1.6844 +%}
  1.6845 +
  1.6846 +instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
  1.6847 +%{
  1.6848 +  match(Set dst (AddL dst (LoadL src)));
  1.6849 +  effect(KILL cr);
  1.6850 +
  1.6851 +  ins_cost(125); // XXX
  1.6852 +  format %{ "addq    $dst, $src\t# long" %}
  1.6853 +  opcode(0x03);
  1.6854 +  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
  1.6855 +  ins_pipe(ialu_reg_mem);
  1.6856 +%}
  1.6857 +
  1.6858 +instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
  1.6859 +%{
  1.6860 +  match(Set dst (StoreL dst (AddL (LoadL dst) src)));
  1.6861 +  effect(KILL cr);
  1.6862 +
  1.6863 +  ins_cost(150); // XXX
  1.6864 +  format %{ "addq    $dst, $src\t# long" %}
  1.6865 +  opcode(0x01); /* Opcode 01 /r */
  1.6866 +  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
  1.6867 +  ins_pipe(ialu_mem_reg);
  1.6868 +%}
  1.6869 +
  1.6870 +instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
  1.6871 +%{
  1.6872 +  match(Set dst (StoreL dst (AddL (LoadL dst) src)));
  1.6873 +  effect(KILL cr);
  1.6874 +
  1.6875 +  ins_cost(125); // XXX
  1.6876 +  format %{ "addq    $dst, $src\t# long" %}
  1.6877 +  opcode(0x81); /* Opcode 81 /0 id */
  1.6878 +  ins_encode(REX_mem_wide(dst),
  1.6879 +             OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
  1.6880 +  ins_pipe(ialu_mem_imm);
  1.6881 +%}
  1.6882 +
  1.6883 +instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
  1.6884 +%{
  1.6885 +  predicate(UseIncDec);
  1.6886 +  match(Set dst (AddL dst src));
  1.6887 +  effect(KILL cr);
  1.6888 +
  1.6889 +  format %{ "incq    $dst\t# long" %}
  1.6890 +  opcode(0xFF, 0x00); // FF /0
  1.6891 +  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
  1.6892 +  ins_pipe(ialu_reg);
  1.6893 +%}
  1.6894 +
  1.6895 +instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
  1.6896 +%{
  1.6897 +  predicate(UseIncDec);
  1.6898 +  match(Set dst (StoreL dst (AddL (LoadL dst) src)));
  1.6899 +  effect(KILL cr);
  1.6900 +
  1.6901 +  ins_cost(125); // XXX
  1.6902 +  format %{ "incq    $dst\t# long" %}
  1.6903 +  opcode(0xFF); /* Opcode FF /0 */
  1.6904 +  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x00, dst));
  1.6905 +  ins_pipe(ialu_mem_imm);
  1.6906 +%}
  1.6907 +
  1.6908 +// XXX why does that use AddL
  1.6909 +instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
  1.6910 +%{
  1.6911 +  predicate(UseIncDec);
  1.6912 +  match(Set dst (AddL dst src));
  1.6913 +  effect(KILL cr);
  1.6914 +
  1.6915 +  format %{ "decq    $dst\t# long" %}
  1.6916 +  opcode(0xFF, 0x01); // FF /1
  1.6917 +  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
  1.6918 +  ins_pipe(ialu_reg);
  1.6919 +%}
  1.6920 +
  1.6921 +// XXX why does that use AddL
  1.6922 +instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
  1.6923 +%{
  1.6924 +  predicate(UseIncDec);
  1.6925 +  match(Set dst (StoreL dst (AddL (LoadL dst) src)));
  1.6926 +  effect(KILL cr);
  1.6927 +
  1.6928 +  ins_cost(125); // XXX
  1.6929 +  format %{ "decq    $dst\t# long" %}
  1.6930 +  opcode(0xFF); /* Opcode FF /1 */
  1.6931 +  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x01, dst));
  1.6932 +  ins_pipe(ialu_mem_imm);
  1.6933 +%}
  1.6934 +
  1.6935 +instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
  1.6936 +%{
  1.6937 +  match(Set dst (AddL src0 src1));
  1.6938 +
  1.6939 +  ins_cost(110);
  1.6940 +  format %{ "leaq    $dst, [$src0 + $src1]\t# long" %}
  1.6941 +  opcode(0x8D); /* 0x8D /r */
  1.6942 +  ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
  1.6943 +  ins_pipe(ialu_reg_reg);
  1.6944 +%}
  1.6945 +
  1.6946 +instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
  1.6947 +%{
  1.6948 +  match(Set dst (AddP dst src));
  1.6949 +  effect(KILL cr);
  1.6950 +
  1.6951 +  format %{ "addq    $dst, $src\t# ptr" %}
  1.6952 +  opcode(0x03);
  1.6953 +  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
  1.6954 +  ins_pipe(ialu_reg_reg);
  1.6955 +%}
  1.6956 +
  1.6957 +instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
  1.6958 +%{
  1.6959 +  match(Set dst (AddP dst src));
  1.6960 +  effect(KILL cr);
  1.6961 +
  1.6962 +  format %{ "addq    $dst, $src\t# ptr" %}
  1.6963 +  opcode(0x81, 0x00); /* /0 id */
  1.6964 +  ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
  1.6965 +  ins_pipe( ialu_reg );
  1.6966 +%}
  1.6967 +
  1.6968 +// XXX addP mem ops ????
  1.6969 +
  1.6970 +instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
  1.6971 +%{
  1.6972 +  match(Set dst (AddP src0 src1));
  1.6973 +
  1.6974 +  ins_cost(110);
  1.6975 +  format %{ "leaq    $dst, [$src0 + $src1]\t# ptr" %}
  1.6976 +  opcode(0x8D); /* 0x8D /r */
  1.6977 +  ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1));// XXX
  1.6978 +  ins_pipe(ialu_reg_reg);
  1.6979 +%}
  1.6980 +
  1.6981 +instruct checkCastPP(rRegP dst)
  1.6982 +%{
  1.6983 +  match(Set dst (CheckCastPP dst));
  1.6984 +
  1.6985 +  size(0);
  1.6986 +  format %{ "# checkcastPP of $dst" %}
  1.6987 +  ins_encode(/* empty encoding */);
  1.6988 +  ins_pipe(empty);
  1.6989 +%}
  1.6990 +
  1.6991 +instruct castPP(rRegP dst)
  1.6992 +%{
  1.6993 +  match(Set dst (CastPP dst));
  1.6994 +
  1.6995 +  size(0);
  1.6996 +  format %{ "# castPP of $dst" %}
  1.6997 +  ins_encode(/* empty encoding */);
  1.6998 +  ins_pipe(empty);
  1.6999 +%}
  1.7000 +
  1.7001 +instruct castII(rRegI dst)
  1.7002 +%{
  1.7003 +  match(Set dst (CastII dst));
  1.7004 +
  1.7005 +  size(0);
  1.7006 +  format %{ "# castII of $dst" %}
  1.7007 +  ins_encode(/* empty encoding */);
  1.7008 +  ins_cost(0);
  1.7009 +  ins_pipe(empty);
  1.7010 +%}
  1.7011 +
  1.7012 +// LoadP-locked same as a regular LoadP when used with compare-swap
  1.7013 +instruct loadPLocked(rRegP dst, memory mem)
  1.7014 +%{
  1.7015 +  match(Set dst (LoadPLocked mem));
  1.7016 +
  1.7017 +  ins_cost(125); // XXX
  1.7018 +  format %{ "movq    $dst, $mem\t# ptr locked" %}
  1.7019 +  opcode(0x8B);
  1.7020 +  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
  1.7021 +  ins_pipe(ialu_reg_mem); // XXX
  1.7022 +%}
  1.7023 +
  1.7024 +// Conditional-store of the updated heap-top.
  1.7025 +// Used during allocation of the shared heap.
  1.7026 +// Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
  1.7027 +
  1.7028 +instruct storePConditional(memory heap_top_ptr,
  1.7029 +                           rax_RegP oldval, rRegP newval,
  1.7030 +                           rFlagsReg cr)
  1.7031 +%{
  1.7032 +  match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
  1.7033 +
  1.7034 +  format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
  1.7035 +            "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
  1.7036 +  opcode(0x0F, 0xB1);
  1.7037 +  ins_encode(lock_prefix,
  1.7038 +             REX_reg_mem_wide(newval, heap_top_ptr),
  1.7039 +             OpcP, OpcS,
  1.7040 +             reg_mem(newval, heap_top_ptr));
  1.7041 +  ins_pipe(pipe_cmpxchg);
  1.7042 +%}
  1.7043 +
  1.7044 +// Conditional-store of an int value.
  1.7045 +// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
  1.7046 +instruct storeIConditional(memory mem, rax_RegI oldval, rRegI newval, rFlagsReg cr)
  1.7047 +%{
  1.7048 +  match(Set cr (StoreIConditional mem (Binary oldval newval)));
  1.7049 +  effect(KILL oldval);
  1.7050 +
  1.7051 +  format %{ "cmpxchgl $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
  1.7052 +  opcode(0x0F, 0xB1);
  1.7053 +  ins_encode(lock_prefix,
  1.7054 +             REX_reg_mem(newval, mem),
  1.7055 +             OpcP, OpcS,
  1.7056 +             reg_mem(newval, mem));
  1.7057 +  ins_pipe(pipe_cmpxchg);
  1.7058 +%}
  1.7059 +
  1.7060 +// Conditional-store of a long value.
  1.7061 +// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
  1.7062 +instruct storeLConditional(memory mem, rax_RegL oldval, rRegL newval, rFlagsReg cr)
  1.7063 +%{
  1.7064 +  match(Set cr (StoreLConditional mem (Binary oldval newval)));
  1.7065 +  effect(KILL oldval);
  1.7066 +
  1.7067 +  format %{ "cmpxchgq $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
  1.7068 +  opcode(0x0F, 0xB1);
  1.7069 +  ins_encode(lock_prefix,
  1.7070 +             REX_reg_mem_wide(newval, mem),
  1.7071 +             OpcP, OpcS,
  1.7072 +             reg_mem(newval, mem));
  1.7073 +  ins_pipe(pipe_cmpxchg);
  1.7074 +%}
  1.7075 +
  1.7076 +
  1.7077 +// XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
  1.7078 +instruct compareAndSwapP(rRegI res,
  1.7079 +                         memory mem_ptr,
  1.7080 +                         rax_RegP oldval, rRegP newval,
  1.7081 +                         rFlagsReg cr)
  1.7082 +%{
  1.7083 +  predicate(VM_Version::supports_cx8());
  1.7084 +  match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
  1.7085 +  effect(KILL cr, KILL oldval);
  1.7086 +
  1.7087 +  format %{ "cmpxchgq $mem_ptr,$newval\t# "
  1.7088 +            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
  1.7089 +            "sete    $res\n\t"
  1.7090 +            "movzbl  $res, $res" %}
  1.7091 +  opcode(0x0F, 0xB1);
  1.7092 +  ins_encode(lock_prefix,
  1.7093 +             REX_reg_mem_wide(newval, mem_ptr),
  1.7094 +             OpcP, OpcS,
  1.7095 +             reg_mem(newval, mem_ptr),
  1.7096 +             REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
  1.7097 +             REX_reg_breg(res, res), // movzbl
  1.7098 +             Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
  1.7099 +  ins_pipe( pipe_cmpxchg );
  1.7100 +%}
  1.7101 +
  1.7102 +instruct compareAndSwapL(rRegI res,
  1.7103 +                         memory mem_ptr,
  1.7104 +                         rax_RegL oldval, rRegL newval,
  1.7105 +                         rFlagsReg cr)
  1.7106 +%{
  1.7107 +  predicate(VM_Version::supports_cx8());
  1.7108 +  match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
  1.7109 +  effect(KILL cr, KILL oldval);
  1.7110 +
  1.7111 +  format %{ "cmpxchgq $mem_ptr,$newval\t# "
  1.7112 +            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
  1.7113 +            "sete    $res\n\t"
  1.7114 +            "movzbl  $res, $res" %}
  1.7115 +  opcode(0x0F, 0xB1);
  1.7116 +  ins_encode(lock_prefix,
  1.7117 +             REX_reg_mem_wide(newval, mem_ptr),
  1.7118 +             OpcP, OpcS,
  1.7119 +             reg_mem(newval, mem_ptr),
  1.7120 +             REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
  1.7121 +             REX_reg_breg(res, res), // movzbl
  1.7122 +             Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
  1.7123 +  ins_pipe( pipe_cmpxchg );
  1.7124 +%}
  1.7125 +
  1.7126 +instruct compareAndSwapI(rRegI res,
  1.7127 +                         memory mem_ptr,
  1.7128 +                         rax_RegI oldval, rRegI newval,
  1.7129 +                         rFlagsReg cr)
  1.7130 +%{
  1.7131 +  match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
  1.7132 +  effect(KILL cr, KILL oldval);
  1.7133 +
  1.7134 +  format %{ "cmpxchgl $mem_ptr,$newval\t# "
  1.7135 +            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
  1.7136 +            "sete    $res\n\t"
  1.7137 +            "movzbl  $res, $res" %}
  1.7138 +  opcode(0x0F, 0xB1);
  1.7139 +  ins_encode(lock_prefix,
  1.7140 +             REX_reg_mem(newval, mem_ptr),
  1.7141 +             OpcP, OpcS,
  1.7142 +             reg_mem(newval, mem_ptr),
  1.7143 +             REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
  1.7144 +             REX_reg_breg(res, res), // movzbl
  1.7145 +             Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
  1.7146 +  ins_pipe( pipe_cmpxchg );
  1.7147 +%}
  1.7148 +
  1.7149 +
  1.7150 +instruct compareAndSwapN(rRegI res,
  1.7151 +                          memory mem_ptr,
  1.7152 +                          rax_RegN oldval, rRegN newval,
  1.7153 +                          rFlagsReg cr) %{
  1.7154 +  match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
  1.7155 +  effect(KILL cr, KILL oldval);
  1.7156 +
  1.7157 +  format %{ "cmpxchgl $mem_ptr,$newval\t# "
  1.7158 +            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
  1.7159 +            "sete    $res\n\t"
  1.7160 +            "movzbl  $res, $res" %}
  1.7161 +  opcode(0x0F, 0xB1);
  1.7162 +  ins_encode(lock_prefix,
  1.7163 +             REX_reg_mem(newval, mem_ptr),
  1.7164 +             OpcP, OpcS,
  1.7165 +             reg_mem(newval, mem_ptr),
  1.7166 +             REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
  1.7167 +             REX_reg_breg(res, res), // movzbl
  1.7168 +             Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
  1.7169 +  ins_pipe( pipe_cmpxchg );
  1.7170 +%}
  1.7171 +
  1.7172 +instruct xaddI_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
  1.7173 +  predicate(n->as_LoadStore()->result_not_used());
  1.7174 +  match(Set dummy (GetAndAddI mem add));
  1.7175 +  effect(KILL cr);
  1.7176 +  format %{ "ADDL  [$mem],$add" %}
  1.7177 +  ins_encode %{
  1.7178 +    if (os::is_MP()) { __ lock(); }
  1.7179 +    __ addl($mem$$Address, $add$$constant);
  1.7180 +  %}
  1.7181 +  ins_pipe( pipe_cmpxchg );
  1.7182 +%}
  1.7183 +
  1.7184 +instruct xaddI( memory mem, rRegI newval, rFlagsReg cr) %{
  1.7185 +  match(Set newval (GetAndAddI mem newval));
  1.7186 +  effect(KILL cr);
  1.7187 +  format %{ "XADDL  [$mem],$newval" %}
  1.7188 +  ins_encode %{
  1.7189 +    if (os::is_MP()) { __ lock(); }
  1.7190 +    __ xaddl($mem$$Address, $newval$$Register);
  1.7191 +  %}
  1.7192 +  ins_pipe( pipe_cmpxchg );
  1.7193 +%}
  1.7194 +
  1.7195 +instruct xaddL_no_res( memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
  1.7196 +  predicate(n->as_LoadStore()->result_not_used());
  1.7197 +  match(Set dummy (GetAndAddL mem add));
  1.7198 +  effect(KILL cr);
  1.7199 +  format %{ "ADDQ  [$mem],$add" %}
  1.7200 +  ins_encode %{
  1.7201 +    if (os::is_MP()) { __ lock(); }
  1.7202 +    __ addq($mem$$Address, $add$$constant);
  1.7203 +  %}
  1.7204 +  ins_pipe( pipe_cmpxchg );
  1.7205 +%}
  1.7206 +
  1.7207 +instruct xaddL( memory mem, rRegL newval, rFlagsReg cr) %{
  1.7208 +  match(Set newval (GetAndAddL mem newval));
  1.7209 +  effect(KILL cr);
  1.7210 +  format %{ "XADDQ  [$mem],$newval" %}
  1.7211 +  ins_encode %{
  1.7212 +    if (os::is_MP()) { __ lock(); }
  1.7213 +    __ xaddq($mem$$Address, $newval$$Register);
  1.7214 +  %}
  1.7215 +  ins_pipe( pipe_cmpxchg );
  1.7216 +%}
  1.7217 +
  1.7218 +instruct xchgI( memory mem, rRegI newval) %{
  1.7219 +  match(Set newval (GetAndSetI mem newval));
  1.7220 +  format %{ "XCHGL  $newval,[$mem]" %}
  1.7221 +  ins_encode %{
  1.7222 +    __ xchgl($newval$$Register, $mem$$Address);
  1.7223 +  %}
  1.7224 +  ins_pipe( pipe_cmpxchg );
  1.7225 +%}
  1.7226 +
  1.7227 +instruct xchgL( memory mem, rRegL newval) %{
  1.7228 +  match(Set newval (GetAndSetL mem newval));
  1.7229 +  format %{ "XCHGL  $newval,[$mem]" %}
  1.7230 +  ins_encode %{
  1.7231 +    __ xchgq($newval$$Register, $mem$$Address);
  1.7232 +  %}
  1.7233 +  ins_pipe( pipe_cmpxchg );
  1.7234 +%}
  1.7235 +
  1.7236 +instruct xchgP( memory mem, rRegP newval) %{
  1.7237 +  match(Set newval (GetAndSetP mem newval));
  1.7238 +  format %{ "XCHGQ  $newval,[$mem]" %}
  1.7239 +  ins_encode %{
  1.7240 +    __ xchgq($newval$$Register, $mem$$Address);
  1.7241 +  %}
  1.7242 +  ins_pipe( pipe_cmpxchg );
  1.7243 +%}
  1.7244 +
  1.7245 +instruct xchgN( memory mem, rRegN newval) %{
  1.7246 +  match(Set newval (GetAndSetN mem newval));
  1.7247 +  format %{ "XCHGL  $newval,$mem]" %}
  1.7248 +  ins_encode %{
  1.7249 +    __ xchgl($newval$$Register, $mem$$Address);
  1.7250 +  %}
  1.7251 +  ins_pipe( pipe_cmpxchg );
  1.7252 +%}
  1.7253 +
  1.7254 +//----------Subtraction Instructions-------------------------------------------
  1.7255 +
  1.7256 +// Integer Subtraction Instructions
  1.7257 +instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
  1.7258 +%{
  1.7259 +  match(Set dst (SubI dst src));
  1.7260 +  effect(KILL cr);
  1.7261 +
  1.7262 +  format %{ "subl    $dst, $src\t# int" %}
  1.7263 +  opcode(0x2B);
  1.7264 +  ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
  1.7265 +  ins_pipe(ialu_reg_reg);
  1.7266 +%}
  1.7267 +
  1.7268 +instruct subI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
  1.7269 +%{
  1.7270 +  match(Set dst (SubI dst src));
  1.7271 +  effect(KILL cr);
  1.7272 +
  1.7273 +  format %{ "subl    $dst, $src\t# int" %}
  1.7274 +  opcode(0x81, 0x05);  /* Opcode 81 /5 */
  1.7275 +  ins_encode(OpcSErm(dst, src), Con8or32(src));
  1.7276 +  ins_pipe(ialu_reg);
  1.7277 +%}
  1.7278 +
  1.7279 +instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
  1.7280 +%{
  1.7281 +  match(Set dst (SubI dst (LoadI src)));
  1.7282 +  effect(KILL cr);
  1.7283 +
  1.7284 +  ins_cost(125);
  1.7285 +  format %{ "subl    $dst, $src\t# int" %}
  1.7286 +  opcode(0x2B);
  1.7287 +  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
  1.7288 +  ins_pipe(ialu_reg_mem);
  1.7289 +%}
  1.7290 +
  1.7291 +instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
  1.7292 +%{
  1.7293 +  match(Set dst (StoreI dst (SubI (LoadI dst) src)));
  1.7294 +  effect(KILL cr);
  1.7295 +
  1.7296 +  ins_cost(150);
  1.7297 +  format %{ "subl    $dst, $src\t# int" %}
  1.7298 +  opcode(0x29); /* Opcode 29 /r */
  1.7299 +  ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
  1.7300 +  ins_pipe(ialu_mem_reg);
  1.7301 +%}
  1.7302 +
  1.7303 +instruct subI_mem_imm(memory dst, immI src, rFlagsReg cr)
  1.7304 +%{
  1.7305 +  match(Set dst (StoreI dst (SubI (LoadI dst) src)));
  1.7306 +  effect(KILL cr);
  1.7307 +
  1.7308 +  ins_cost(125); // XXX
  1.7309 +  format %{ "subl    $dst, $src\t# int" %}
  1.7310 +  opcode(0x81); /* Opcode 81 /5 id */
  1.7311 +  ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
  1.7312 +  ins_pipe(ialu_mem_imm);
  1.7313 +%}
  1.7314 +
  1.7315 +instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
  1.7316 +%{
  1.7317 +  match(Set dst (SubL dst src));
  1.7318 +  effect(KILL cr);
  1.7319 +
  1.7320 +  format %{ "subq    $dst, $src\t# long" %}
  1.7321 +  opcode(0x2B);
  1.7322 +  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
  1.7323 +  ins_pipe(ialu_reg_reg);
  1.7324 +%}
  1.7325 +
  1.7326 +instruct subL_rReg_imm(rRegI dst, immL32 src, rFlagsReg cr)
  1.7327 +%{
  1.7328 +  match(Set dst (SubL dst src));
  1.7329 +  effect(KILL cr);
  1.7330 +
  1.7331 +  format %{ "subq    $dst, $src\t# long" %}
  1.7332 +  opcode(0x81, 0x05);  /* Opcode 81 /5 */
  1.7333 +  ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
  1.7334 +  ins_pipe(ialu_reg);
  1.7335 +%}
  1.7336 +
  1.7337 +instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
  1.7338 +%{
  1.7339 +  match(Set dst (SubL dst (LoadL src)));
  1.7340 +  effect(KILL cr);
  1.7341 +
  1.7342 +  ins_cost(125);
  1.7343 +  format %{ "subq    $dst, $src\t# long" %}
  1.7344 +  opcode(0x2B);
  1.7345 +  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
  1.7346 +  ins_pipe(ialu_reg_mem);
  1.7347 +%}
  1.7348 +
  1.7349 +instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
  1.7350 +%{
  1.7351 +  match(Set dst (StoreL dst (SubL (LoadL dst) src)));
  1.7352 +  effect(KILL cr);
  1.7353 +
  1.7354 +  ins_cost(150);
  1.7355 +  format %{ "subq    $dst, $src\t# long" %}
  1.7356 +  opcode(0x29); /* Opcode 29 /r */
  1.7357 +  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
  1.7358 +  ins_pipe(ialu_mem_reg);
  1.7359 +%}
  1.7360 +
  1.7361 +instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
  1.7362 +%{
  1.7363 +  match(Set dst (StoreL dst (SubL (LoadL dst) src)));
  1.7364 +  effect(KILL cr);
  1.7365 +
  1.7366 +  ins_cost(125); // XXX
  1.7367 +  format %{ "subq    $dst, $src\t# long" %}
  1.7368 +  opcode(0x81); /* Opcode 81 /5 id */
  1.7369 +  ins_encode(REX_mem_wide(dst),
  1.7370 +             OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
  1.7371 +  ins_pipe(ialu_mem_imm);
  1.7372 +%}
  1.7373 +
  1.7374 +// Subtract from a pointer
  1.7375 +// XXX hmpf???
  1.7376 +instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
  1.7377 +%{
  1.7378 +  match(Set dst (AddP dst (SubI zero src)));
  1.7379 +  effect(KILL cr);
  1.7380 +
  1.7381 +  format %{ "subq    $dst, $src\t# ptr - int" %}
  1.7382 +  opcode(0x2B);
  1.7383 +  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
  1.7384 +  ins_pipe(ialu_reg_reg);
  1.7385 +%}
  1.7386 +
  1.7387 +instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
  1.7388 +%{
  1.7389 +  match(Set dst (SubI zero dst));
  1.7390 +  effect(KILL cr);
  1.7391 +
  1.7392 +  format %{ "negl    $dst\t# int" %}
  1.7393 +  opcode(0xF7, 0x03);  // Opcode F7 /3
  1.7394 +  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
  1.7395 +  ins_pipe(ialu_reg);
  1.7396 +%}
  1.7397 +
  1.7398 +instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
  1.7399 +%{
  1.7400 +  match(Set dst (StoreI dst (SubI zero (LoadI dst))));
  1.7401 +  effect(KILL cr);
  1.7402 +
  1.7403 +  format %{ "negl    $dst\t# int" %}
  1.7404 +  opcode(0xF7, 0x03);  // Opcode F7 /3
  1.7405 +  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
  1.7406 +  ins_pipe(ialu_reg);
  1.7407 +%}
  1.7408 +
  1.7409 +instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
  1.7410 +%{
  1.7411 +  match(Set dst (SubL zero dst));
  1.7412 +  effect(KILL cr);
  1.7413 +
  1.7414 +  format %{ "negq    $dst\t# long" %}
  1.7415 +  opcode(0xF7, 0x03);  // Opcode F7 /3
  1.7416 +  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
  1.7417 +  ins_pipe(ialu_reg);
  1.7418 +%}
  1.7419 +
  1.7420 +instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
  1.7421 +%{
  1.7422 +  match(Set dst (StoreL dst (SubL zero (LoadL dst))));
  1.7423 +  effect(KILL cr);
  1.7424 +
  1.7425 +  format %{ "negq    $dst\t# long" %}
  1.7426 +  opcode(0xF7, 0x03);  // Opcode F7 /3
  1.7427 +  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
  1.7428 +  ins_pipe(ialu_reg);
  1.7429 +%}
  1.7430 +
  1.7431 +//----------Multiplication/Division Instructions-------------------------------
  1.7432 +// Integer Multiplication Instructions
  1.7433 +// Multiply Register
  1.7434 +
  1.7435 +instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
  1.7436 +%{
  1.7437 +  match(Set dst (MulI dst src));
  1.7438 +  effect(KILL cr);
  1.7439 +
  1.7440 +  ins_cost(300);
  1.7441 +  format %{ "imull   $dst, $src\t# int" %}
  1.7442 +  opcode(0x0F, 0xAF);
  1.7443 +  ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
  1.7444 +  ins_pipe(ialu_reg_reg_alu0);
  1.7445 +%}
  1.7446 +
  1.7447 +instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
  1.7448 +%{
  1.7449 +  match(Set dst (MulI src imm));
  1.7450 +  effect(KILL cr);
  1.7451 +
  1.7452 +  ins_cost(300);
  1.7453 +  format %{ "imull   $dst, $src, $imm\t# int" %}
  1.7454 +  opcode(0x69); /* 69 /r id */
  1.7455 +  ins_encode(REX_reg_reg(dst, src),
  1.7456 +             OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
  1.7457 +  ins_pipe(ialu_reg_reg_alu0);
  1.7458 +%}
  1.7459 +
  1.7460 +instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
  1.7461 +%{
  1.7462 +  match(Set dst (MulI dst (LoadI src)));
  1.7463 +  effect(KILL cr);
  1.7464 +
  1.7465 +  ins_cost(350);
  1.7466 +  format %{ "imull   $dst, $src\t# int" %}
  1.7467 +  opcode(0x0F, 0xAF);
  1.7468 +  ins_encode(REX_reg_mem(dst, src), OpcP, OpcS, reg_mem(dst, src));
  1.7469 +  ins_pipe(ialu_reg_mem_alu0);
  1.7470 +%}
  1.7471 +
  1.7472 +instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
  1.7473 +%{
  1.7474 +  match(Set dst (MulI (LoadI src) imm));
  1.7475 +  effect(KILL cr);
  1.7476 +
  1.7477 +  ins_cost(300);
  1.7478 +  format %{ "imull   $dst, $src, $imm\t# int" %}
  1.7479 +  opcode(0x69); /* 69 /r id */
  1.7480 +  ins_encode(REX_reg_mem(dst, src),
  1.7481 +             OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
  1.7482 +  ins_pipe(ialu_reg_mem_alu0);
  1.7483 +%}
  1.7484 +
  1.7485 +instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
  1.7486 +%{
  1.7487 +  match(Set dst (MulL dst src));
  1.7488 +  effect(KILL cr);
  1.7489 +
  1.7490 +  ins_cost(300);
  1.7491 +  format %{ "imulq   $dst, $src\t# long" %}
  1.7492 +  opcode(0x0F, 0xAF);
  1.7493 +  ins_encode(REX_reg_reg_wide(dst, src), OpcP, OpcS, reg_reg(dst, src));
  1.7494 +  ins_pipe(ialu_reg_reg_alu0);
  1.7495 +%}
  1.7496 +
  1.7497 +instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
  1.7498 +%{
  1.7499 +  match(Set dst (MulL src imm));
  1.7500 +  effect(KILL cr);
  1.7501 +
  1.7502 +  ins_cost(300);
  1.7503 +  format %{ "imulq   $dst, $src, $imm\t# long" %}
  1.7504 +  opcode(0x69); /* 69 /r id */
  1.7505 +  ins_encode(REX_reg_reg_wide(dst, src),
  1.7506 +             OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
  1.7507 +  ins_pipe(ialu_reg_reg_alu0);
  1.7508 +%}
  1.7509 +
  1.7510 +instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
  1.7511 +%{
  1.7512 +  match(Set dst (MulL dst (LoadL src)));
  1.7513 +  effect(KILL cr);
  1.7514 +
  1.7515 +  ins_cost(350);
  1.7516 +  format %{ "imulq   $dst, $src\t# long" %}
  1.7517 +  opcode(0x0F, 0xAF);
  1.7518 +  ins_encode(REX_reg_mem_wide(dst, src), OpcP, OpcS, reg_mem(dst, src));
  1.7519 +  ins_pipe(ialu_reg_mem_alu0);
  1.7520 +%}
  1.7521 +
  1.7522 +instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
  1.7523 +%{
  1.7524 +  match(Set dst (MulL (LoadL src) imm));
  1.7525 +  effect(KILL cr);
  1.7526 +
  1.7527 +  ins_cost(300);
  1.7528 +  format %{ "imulq   $dst, $src, $imm\t# long" %}
  1.7529 +  opcode(0x69); /* 69 /r id */
  1.7530 +  ins_encode(REX_reg_mem_wide(dst, src),
  1.7531 +             OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
  1.7532 +  ins_pipe(ialu_reg_mem_alu0);
  1.7533 +%}
  1.7534 +
  1.7535 +instruct mulHiL_rReg(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
  1.7536 +%{
  1.7537 +  match(Set dst (MulHiL src rax));
  1.7538 +  effect(USE_KILL rax, KILL cr);
  1.7539 +
  1.7540 +  ins_cost(300);
  1.7541 +  format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
  1.7542 +  opcode(0xF7, 0x5); /* Opcode F7 /5 */
  1.7543 +  ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
  1.7544 +  ins_pipe(ialu_reg_reg_alu0);
  1.7545 +%}
  1.7546 +
  1.7547 +instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
  1.7548 +                   rFlagsReg cr)
  1.7549 +%{
  1.7550 +  match(Set rax (DivI rax div));
  1.7551 +  effect(KILL rdx, KILL cr);
  1.7552 +
  1.7553 +  ins_cost(30*100+10*100); // XXX
  1.7554 +  format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
  1.7555 +            "jne,s   normal\n\t"
  1.7556 +            "xorl    rdx, rdx\n\t"
  1.7557 +            "cmpl    $div, -1\n\t"
  1.7558 +            "je,s    done\n"
  1.7559 +    "normal: cdql\n\t"
  1.7560 +            "idivl   $div\n"
  1.7561 +    "done:"        %}
  1.7562 +  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
  1.7563 +  ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
  1.7564 +  ins_pipe(ialu_reg_reg_alu0);
  1.7565 +%}
  1.7566 +
  1.7567 +instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
  1.7568 +                   rFlagsReg cr)
  1.7569 +%{
  1.7570 +  match(Set rax (DivL rax div));
  1.7571 +  effect(KILL rdx, KILL cr);
  1.7572 +
  1.7573 +  ins_cost(30*100+10*100); // XXX
  1.7574 +  format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
  1.7575 +            "cmpq    rax, rdx\n\t"
  1.7576 +            "jne,s   normal\n\t"
  1.7577 +            "xorl    rdx, rdx\n\t"
  1.7578 +            "cmpq    $div, -1\n\t"
  1.7579 +            "je,s    done\n"
  1.7580 +    "normal: cdqq\n\t"
  1.7581 +            "idivq   $div\n"
  1.7582 +    "done:"        %}
  1.7583 +  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
  1.7584 +  ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
  1.7585 +  ins_pipe(ialu_reg_reg_alu0);
  1.7586 +%}
  1.7587 +
  1.7588 +// Integer DIVMOD with Register, both quotient and mod results
  1.7589 +instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
  1.7590 +                             rFlagsReg cr)
  1.7591 +%{
  1.7592 +  match(DivModI rax div);
  1.7593 +  effect(KILL cr);
  1.7594 +
  1.7595 +  ins_cost(30*100+10*100); // XXX
  1.7596 +  format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
  1.7597 +            "jne,s   normal\n\t"
  1.7598 +            "xorl    rdx, rdx\n\t"
  1.7599 +            "cmpl    $div, -1\n\t"
  1.7600 +            "je,s    done\n"
  1.7601 +    "normal: cdql\n\t"
  1.7602 +            "idivl   $div\n"
  1.7603 +    "done:"        %}
  1.7604 +  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
  1.7605 +  ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
  1.7606 +  ins_pipe(pipe_slow);
  1.7607 +%}
  1.7608 +
  1.7609 +// Long DIVMOD with Register, both quotient and mod results
  1.7610 +instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
  1.7611 +                             rFlagsReg cr)
  1.7612 +%{
  1.7613 +  match(DivModL rax div);
  1.7614 +  effect(KILL cr);
  1.7615 +
  1.7616 +  ins_cost(30*100+10*100); // XXX
  1.7617 +  format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
  1.7618 +            "cmpq    rax, rdx\n\t"
  1.7619 +            "jne,s   normal\n\t"
  1.7620 +            "xorl    rdx, rdx\n\t"
  1.7621 +            "cmpq    $div, -1\n\t"
  1.7622 +            "je,s    done\n"
  1.7623 +    "normal: cdqq\n\t"
  1.7624 +            "idivq   $div\n"
  1.7625 +    "done:"        %}
  1.7626 +  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
  1.7627 +  ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
  1.7628 +  ins_pipe(pipe_slow);
  1.7629 +%}
  1.7630 +
  1.7631 +//----------- DivL-By-Constant-Expansions--------------------------------------
  1.7632 +// DivI cases are handled by the compiler
  1.7633 +
  1.7634 +// Magic constant, reciprocal of 10
  1.7635 +instruct loadConL_0x6666666666666667(rRegL dst)
  1.7636 +%{
  1.7637 +  effect(DEF dst);
  1.7638 +
  1.7639 +  format %{ "movq    $dst, #0x666666666666667\t# Used in div-by-10" %}
  1.7640 +  ins_encode(load_immL(dst, 0x6666666666666667));
  1.7641 +  ins_pipe(ialu_reg);
  1.7642 +%}
  1.7643 +
  1.7644 +instruct mul_hi(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
  1.7645 +%{
  1.7646 +  effect(DEF dst, USE src, USE_KILL rax, KILL cr);
  1.7647 +
  1.7648 +  format %{ "imulq   rdx:rax, rax, $src\t# Used in div-by-10" %}
  1.7649 +  opcode(0xF7, 0x5); /* Opcode F7 /5 */
  1.7650 +  ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
  1.7651 +  ins_pipe(ialu_reg_reg_alu0);
  1.7652 +%}
  1.7653 +
  1.7654 +instruct sarL_rReg_63(rRegL dst, rFlagsReg cr)
  1.7655 +%{
  1.7656 +  effect(USE_DEF dst, KILL cr);
  1.7657 +
  1.7658 +  format %{ "sarq    $dst, #63\t# Used in div-by-10" %}
  1.7659 +  opcode(0xC1, 0x7); /* C1 /7 ib */
  1.7660 +  ins_encode(reg_opc_imm_wide(dst, 0x3F));
  1.7661 +  ins_pipe(ialu_reg);
  1.7662 +%}
  1.7663 +
  1.7664 +instruct sarL_rReg_2(rRegL dst, rFlagsReg cr)
  1.7665 +%{
  1.7666 +  effect(USE_DEF dst, KILL cr);
  1.7667 +
  1.7668 +  format %{ "sarq    $dst, #2\t# Used in div-by-10" %}
  1.7669 +  opcode(0xC1, 0x7); /* C1 /7 ib */
  1.7670 +  ins_encode(reg_opc_imm_wide(dst, 0x2));
  1.7671 +  ins_pipe(ialu_reg);
  1.7672 +%}
  1.7673 +
  1.7674 +instruct divL_10(rdx_RegL dst, no_rax_RegL src, immL10 div)
  1.7675 +%{
  1.7676 +  match(Set dst (DivL src div));
  1.7677 +
  1.7678 +  ins_cost((5+8)*100);
  1.7679 +  expand %{
  1.7680 +    rax_RegL rax;                     // Killed temp
  1.7681 +    rFlagsReg cr;                     // Killed
  1.7682 +    loadConL_0x6666666666666667(rax); // movq  rax, 0x6666666666666667
  1.7683 +    mul_hi(dst, src, rax, cr);        // mulq  rdx:rax <= rax * $src
  1.7684 +    sarL_rReg_63(src, cr);            // sarq  src, 63
  1.7685 +    sarL_rReg_2(dst, cr);             // sarq  rdx, 2
  1.7686 +    subL_rReg(dst, src, cr);          // subl  rdx, src
  1.7687 +  %}
  1.7688 +%}
  1.7689 +
  1.7690 +//-----------------------------------------------------------------------------
  1.7691 +
  1.7692 +instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
  1.7693 +                   rFlagsReg cr)
  1.7694 +%{
  1.7695 +  match(Set rdx (ModI rax div));
  1.7696 +  effect(KILL rax, KILL cr);
  1.7697 +
  1.7698 +  ins_cost(300); // XXX
  1.7699 +  format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
  1.7700 +            "jne,s   normal\n\t"
  1.7701 +            "xorl    rdx, rdx\n\t"
  1.7702 +            "cmpl    $div, -1\n\t"
  1.7703 +            "je,s    done\n"
  1.7704 +    "normal: cdql\n\t"
  1.7705 +            "idivl   $div\n"
  1.7706 +    "done:"        %}
  1.7707 +  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
  1.7708 +  ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
  1.7709 +  ins_pipe(ialu_reg_reg_alu0);
  1.7710 +%}
  1.7711 +
  1.7712 +instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
  1.7713 +                   rFlagsReg cr)
  1.7714 +%{
  1.7715 +  match(Set rdx (ModL rax div));
  1.7716 +  effect(KILL rax, KILL cr);
  1.7717 +
  1.7718 +  ins_cost(300); // XXX
  1.7719 +  format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
  1.7720 +            "cmpq    rax, rdx\n\t"
  1.7721 +            "jne,s   normal\n\t"
  1.7722 +            "xorl    rdx, rdx\n\t"
  1.7723 +            "cmpq    $div, -1\n\t"
  1.7724 +            "je,s    done\n"
  1.7725 +    "normal: cdqq\n\t"
  1.7726 +            "idivq   $div\n"
  1.7727 +    "done:"        %}
  1.7728 +  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
  1.7729 +  ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
  1.7730 +  ins_pipe(ialu_reg_reg_alu0);
  1.7731 +%}
  1.7732 +
  1.7733 +// Integer Shift Instructions
  1.7734 +// Shift Left by one
  1.7735 +instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
  1.7736 +%{
  1.7737 +  match(Set dst (LShiftI dst shift));
  1.7738 +  effect(KILL cr);
  1.7739 +
  1.7740 +  format %{ "sall    $dst, $shift" %}
  1.7741 +  opcode(0xD1, 0x4); /* D1 /4 */
  1.7742 +  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
  1.7743 +  ins_pipe(ialu_reg);
  1.7744 +%}
  1.7745 +
  1.7746 +// Shift Left by one
  1.7747 +instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
  1.7748 +%{
  1.7749 +  match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
  1.7750 +  effect(KILL cr);
  1.7751 +
  1.7752 +  format %{ "sall    $dst, $shift\t" %}
  1.7753 +  opcode(0xD1, 0x4); /* D1 /4 */
  1.7754 +  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
  1.7755 +  ins_pipe(ialu_mem_imm);
  1.7756 +%}
  1.7757 +
  1.7758 +// Shift Left by 8-bit immediate
  1.7759 +instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
  1.7760 +%{
  1.7761 +  match(Set dst (LShiftI dst shift));
  1.7762 +  effect(KILL cr);
  1.7763 +
  1.7764 +  format %{ "sall    $dst, $shift" %}
  1.7765 +  opcode(0xC1, 0x4); /* C1 /4 ib */
  1.7766 +  ins_encode(reg_opc_imm(dst, shift));
  1.7767 +  ins_pipe(ialu_reg);
  1.7768 +%}
  1.7769 +
  1.7770 +// Shift Left by 8-bit immediate
  1.7771 +instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
  1.7772 +%{
  1.7773 +  match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
  1.7774 +  effect(KILL cr);
  1.7775 +
  1.7776 +  format %{ "sall    $dst, $shift" %}
  1.7777 +  opcode(0xC1, 0x4); /* C1 /4 ib */
  1.7778 +  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
  1.7779 +  ins_pipe(ialu_mem_imm);
  1.7780 +%}
  1.7781 +
  1.7782 +// Shift Left by variable
  1.7783 +instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
  1.7784 +%{
  1.7785 +  match(Set dst (LShiftI dst shift));
  1.7786 +  effect(KILL cr);
  1.7787 +
  1.7788 +  format %{ "sall    $dst, $shift" %}
  1.7789 +  opcode(0xD3, 0x4); /* D3 /4 */
  1.7790 +  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
  1.7791 +  ins_pipe(ialu_reg_reg);
  1.7792 +%}
  1.7793 +
  1.7794 +// Shift Left by variable
  1.7795 +instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
  1.7796 +%{
  1.7797 +  match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
  1.7798 +  effect(KILL cr);
  1.7799 +
  1.7800 +  format %{ "sall    $dst, $shift" %}
  1.7801 +  opcode(0xD3, 0x4); /* D3 /4 */
  1.7802 +  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
  1.7803 +  ins_pipe(ialu_mem_reg);
  1.7804 +%}
  1.7805 +
  1.7806 +// Arithmetic shift right by one
  1.7807 +instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
  1.7808 +%{
  1.7809 +  match(Set dst (RShiftI dst shift));
  1.7810 +  effect(KILL cr);
  1.7811 +
  1.7812 +  format %{ "sarl    $dst, $shift" %}
  1.7813 +  opcode(0xD1, 0x7); /* D1 /7 */
  1.7814 +  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
  1.7815 +  ins_pipe(ialu_reg);
  1.7816 +%}
  1.7817 +
  1.7818 +// Arithmetic shift right by one
  1.7819 +instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
  1.7820 +%{
  1.7821 +  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
  1.7822 +  effect(KILL cr);
  1.7823 +
  1.7824 +  format %{ "sarl    $dst, $shift" %}
  1.7825 +  opcode(0xD1, 0x7); /* D1 /7 */
  1.7826 +  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
  1.7827 +  ins_pipe(ialu_mem_imm);
  1.7828 +%}
  1.7829 +
  1.7830 +// Arithmetic Shift Right by 8-bit immediate
  1.7831 +instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
  1.7832 +%{
  1.7833 +  match(Set dst (RShiftI dst shift));
  1.7834 +  effect(KILL cr);
  1.7835 +
  1.7836 +  format %{ "sarl    $dst, $shift" %}
  1.7837 +  opcode(0xC1, 0x7); /* C1 /7 ib */
  1.7838 +  ins_encode(reg_opc_imm(dst, shift));
  1.7839 +  ins_pipe(ialu_mem_imm);
  1.7840 +%}
  1.7841 +
  1.7842 +// Arithmetic Shift Right by 8-bit immediate
  1.7843 +instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
  1.7844 +%{
  1.7845 +  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
  1.7846 +  effect(KILL cr);
  1.7847 +
  1.7848 +  format %{ "sarl    $dst, $shift" %}
  1.7849 +  opcode(0xC1, 0x7); /* C1 /7 ib */
  1.7850 +  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
  1.7851 +  ins_pipe(ialu_mem_imm);
  1.7852 +%}
  1.7853 +
  1.7854 +// Arithmetic Shift Right by variable
  1.7855 +instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
  1.7856 +%{
  1.7857 +  match(Set dst (RShiftI dst shift));
  1.7858 +  effect(KILL cr);
  1.7859 +
  1.7860 +  format %{ "sarl    $dst, $shift" %}
  1.7861 +  opcode(0xD3, 0x7); /* D3 /7 */
  1.7862 +  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
  1.7863 +  ins_pipe(ialu_reg_reg);
  1.7864 +%}
  1.7865 +
  1.7866 +// Arithmetic Shift Right by variable
  1.7867 +instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
  1.7868 +%{
  1.7869 +  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
  1.7870 +  effect(KILL cr);
  1.7871 +
  1.7872 +  format %{ "sarl    $dst, $shift" %}
  1.7873 +  opcode(0xD3, 0x7); /* D3 /7 */
  1.7874 +  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
  1.7875 +  ins_pipe(ialu_mem_reg);
  1.7876 +%}
  1.7877 +
  1.7878 +// Logical shift right by one
  1.7879 +instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
  1.7880 +%{
  1.7881 +  match(Set dst (URShiftI dst shift));
  1.7882 +  effect(KILL cr);
  1.7883 +
  1.7884 +  format %{ "shrl    $dst, $shift" %}
  1.7885 +  opcode(0xD1, 0x5); /* D1 /5 */
  1.7886 +  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
  1.7887 +  ins_pipe(ialu_reg);
  1.7888 +%}
  1.7889 +
  1.7890 +// Logical shift right by one
  1.7891 +instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
  1.7892 +%{
  1.7893 +  match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
  1.7894 +  effect(KILL cr);
  1.7895 +
  1.7896 +  format %{ "shrl    $dst, $shift" %}
  1.7897 +  opcode(0xD1, 0x5); /* D1 /5 */
  1.7898 +  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
  1.7899 +  ins_pipe(ialu_mem_imm);
  1.7900 +%}
  1.7901 +
  1.7902 +// Logical Shift Right by 8-bit immediate
  1.7903 +instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
  1.7904 +%{
  1.7905 +  match(Set dst (URShiftI dst shift));
  1.7906 +  effect(KILL cr);
  1.7907 +
  1.7908 +  format %{ "shrl    $dst, $shift" %}
  1.7909 +  opcode(0xC1, 0x5); /* C1 /5 ib */
  1.7910 +  ins_encode(reg_opc_imm(dst, shift));
  1.7911 +  ins_pipe(ialu_reg);
  1.7912 +%}
  1.7913 +
  1.7914 +// Logical Shift Right by 8-bit immediate
  1.7915 +instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
  1.7916 +%{
  1.7917 +  match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
  1.7918 +  effect(KILL cr);
  1.7919 +
  1.7920 +  format %{ "shrl    $dst, $shift" %}
  1.7921 +  opcode(0xC1, 0x5); /* C1 /5 ib */
  1.7922 +  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
  1.7923 +  ins_pipe(ialu_mem_imm);
  1.7924 +%}
  1.7925 +
  1.7926 +// Logical Shift Right by variable
  1.7927 +instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
  1.7928 +%{
  1.7929 +  match(Set dst (URShiftI dst shift));
  1.7930 +  effect(KILL cr);
  1.7931 +
  1.7932 +  format %{ "shrl    $dst, $shift" %}
  1.7933 +  opcode(0xD3, 0x5); /* D3 /5 */
  1.7934 +  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
  1.7935 +  ins_pipe(ialu_reg_reg);
  1.7936 +%}
  1.7937 +
  1.7938 +// Logical Shift Right by variable
  1.7939 +instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
  1.7940 +%{
  1.7941 +  match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
  1.7942 +  effect(KILL cr);
  1.7943 +
  1.7944 +  format %{ "shrl    $dst, $shift" %}
  1.7945 +  opcode(0xD3, 0x5); /* D3 /5 */
  1.7946 +  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
  1.7947 +  ins_pipe(ialu_mem_reg);
  1.7948 +%}
  1.7949 +
  1.7950 +// Long Shift Instructions
  1.7951 +// Shift Left by one
  1.7952 +instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
  1.7953 +%{
  1.7954 +  match(Set dst (LShiftL dst shift));
  1.7955 +  effect(KILL cr);
  1.7956 +
  1.7957 +  format %{ "salq    $dst, $shift" %}
  1.7958 +  opcode(0xD1, 0x4); /* D1 /4 */
  1.7959 +  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
  1.7960 +  ins_pipe(ialu_reg);
  1.7961 +%}
  1.7962 +
  1.7963 +// Shift Left by one
  1.7964 +instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
  1.7965 +%{
  1.7966 +  match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
  1.7967 +  effect(KILL cr);
  1.7968 +
  1.7969 +  format %{ "salq    $dst, $shift" %}
  1.7970 +  opcode(0xD1, 0x4); /* D1 /4 */
  1.7971 +  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
  1.7972 +  ins_pipe(ialu_mem_imm);
  1.7973 +%}
  1.7974 +
  1.7975 +// Shift Left by 8-bit immediate
  1.7976 +instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
  1.7977 +%{
  1.7978 +  match(Set dst (LShiftL dst shift));
  1.7979 +  effect(KILL cr);
  1.7980 +
  1.7981 +  format %{ "salq    $dst, $shift" %}
  1.7982 +  opcode(0xC1, 0x4); /* C1 /4 ib */
  1.7983 +  ins_encode(reg_opc_imm_wide(dst, shift));
  1.7984 +  ins_pipe(ialu_reg);
  1.7985 +%}
  1.7986 +
  1.7987 +// Shift Left by 8-bit immediate
  1.7988 +instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
  1.7989 +%{
  1.7990 +  match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
  1.7991 +  effect(KILL cr);
  1.7992 +
  1.7993 +  format %{ "salq    $dst, $shift" %}
  1.7994 +  opcode(0xC1, 0x4); /* C1 /4 ib */
  1.7995 +  ins_encode(REX_mem_wide(dst), OpcP,
  1.7996 +             RM_opc_mem(secondary, dst), Con8or32(shift));
  1.7997 +  ins_pipe(ialu_mem_imm);
  1.7998 +%}
  1.7999 +
  1.8000 +// Shift Left by variable
  1.8001 +instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
  1.8002 +%{
  1.8003 +  match(Set dst (LShiftL dst shift));
  1.8004 +  effect(KILL cr);
  1.8005 +
  1.8006 +  format %{ "salq    $dst, $shift" %}
  1.8007 +  opcode(0xD3, 0x4); /* D3 /4 */
  1.8008 +  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
  1.8009 +  ins_pipe(ialu_reg_reg);
  1.8010 +%}
  1.8011 +
  1.8012 +// Shift Left by variable
  1.8013 +instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
  1.8014 +%{
  1.8015 +  match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
  1.8016 +  effect(KILL cr);
  1.8017 +
  1.8018 +  format %{ "salq    $dst, $shift" %}
  1.8019 +  opcode(0xD3, 0x4); /* D3 /4 */
  1.8020 +  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
  1.8021 +  ins_pipe(ialu_mem_reg);
  1.8022 +%}
  1.8023 +
  1.8024 +// Arithmetic shift right by one
  1.8025 +instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
  1.8026 +%{
  1.8027 +  match(Set dst (RShiftL dst shift));
  1.8028 +  effect(KILL cr);
  1.8029 +
  1.8030 +  format %{ "sarq    $dst, $shift" %}
  1.8031 +  opcode(0xD1, 0x7); /* D1 /7 */
  1.8032 +  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
  1.8033 +  ins_pipe(ialu_reg);
  1.8034 +%}
  1.8035 +
  1.8036 +// Arithmetic shift right by one
  1.8037 +instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
  1.8038 +%{
  1.8039 +  match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
  1.8040 +  effect(KILL cr);
  1.8041 +
  1.8042 +  format %{ "sarq    $dst, $shift" %}
  1.8043 +  opcode(0xD1, 0x7); /* D1 /7 */
  1.8044 +  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
  1.8045 +  ins_pipe(ialu_mem_imm);
  1.8046 +%}
  1.8047 +
  1.8048 +// Arithmetic Shift Right by 8-bit immediate
  1.8049 +instruct sarL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
  1.8050 +%{
  1.8051 +  match(Set dst (RShiftL dst shift));
  1.8052 +  effect(KILL cr);
  1.8053 +
  1.8054 +  format %{ "sarq    $dst, $shift" %}
  1.8055 +  opcode(0xC1, 0x7); /* C1 /7 ib */
  1.8056 +  ins_encode(reg_opc_imm_wide(dst, shift));
  1.8057 +  ins_pipe(ialu_mem_imm);
  1.8058 +%}
  1.8059 +
  1.8060 +// Arithmetic Shift Right by 8-bit immediate
  1.8061 +instruct sarL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
  1.8062 +%{
  1.8063 +  match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
  1.8064 +  effect(KILL cr);
  1.8065 +
  1.8066 +  format %{ "sarq    $dst, $shift" %}
  1.8067 +  opcode(0xC1, 0x7); /* C1 /7 ib */
  1.8068 +  ins_encode(REX_mem_wide(dst), OpcP,
  1.8069 +             RM_opc_mem(secondary, dst), Con8or32(shift));
  1.8070 +  ins_pipe(ialu_mem_imm);
  1.8071 +%}
  1.8072 +
  1.8073 +// Arithmetic Shift Right by variable
  1.8074 +instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
  1.8075 +%{
  1.8076 +  match(Set dst (RShiftL dst shift));
  1.8077 +  effect(KILL cr);
  1.8078 +
  1.8079 +  format %{ "sarq    $dst, $shift" %}
  1.8080 +  opcode(0xD3, 0x7); /* D3 /7 */
  1.8081 +  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
  1.8082 +  ins_pipe(ialu_reg_reg);
  1.8083 +%}
  1.8084 +
  1.8085 +// Arithmetic Shift Right by variable
  1.8086 +instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
  1.8087 +%{
  1.8088 +  match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
  1.8089 +  effect(KILL cr);
  1.8090 +
  1.8091 +  format %{ "sarq    $dst, $shift" %}
  1.8092 +  opcode(0xD3, 0x7); /* D3 /7 */
  1.8093 +  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
  1.8094 +  ins_pipe(ialu_mem_reg);
  1.8095 +%}
  1.8096 +
  1.8097 +// Logical shift right by one
  1.8098 +instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
  1.8099 +%{
  1.8100 +  match(Set dst (URShiftL dst shift));
  1.8101 +  effect(KILL cr);
  1.8102 +
  1.8103 +  format %{ "shrq    $dst, $shift" %}
  1.8104 +  opcode(0xD1, 0x5); /* D1 /5 */
  1.8105 +  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst ));
  1.8106 +  ins_pipe(ialu_reg);
  1.8107 +%}
  1.8108 +
  1.8109 +// Logical shift right by one
  1.8110 +instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
  1.8111 +%{
  1.8112 +  match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
  1.8113 +  effect(KILL cr);
  1.8114 +
  1.8115 +  format %{ "shrq    $dst, $shift" %}
  1.8116 +  opcode(0xD1, 0x5); /* D1 /5 */
  1.8117 +  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
  1.8118 +  ins_pipe(ialu_mem_imm);
  1.8119 +%}
  1.8120 +
  1.8121 +// Logical Shift Right by 8-bit immediate
  1.8122 +instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
  1.8123 +%{
  1.8124 +  match(Set dst (URShiftL dst shift));
  1.8125 +  effect(KILL cr);
  1.8126 +
  1.8127 +  format %{ "shrq    $dst, $shift" %}
  1.8128 +  opcode(0xC1, 0x5); /* C1 /5 ib */
  1.8129 +  ins_encode(reg_opc_imm_wide(dst, shift));
  1.8130 +  ins_pipe(ialu_reg);
  1.8131 +%}
  1.8132 +
  1.8133 +
  1.8134 +// Logical Shift Right by 8-bit immediate
  1.8135 +instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
  1.8136 +%{
  1.8137 +  match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
  1.8138 +  effect(KILL cr);
  1.8139 +
  1.8140 +  format %{ "shrq    $dst, $shift" %}
  1.8141 +  opcode(0xC1, 0x5); /* C1 /5 ib */
  1.8142 +  ins_encode(REX_mem_wide(dst), OpcP,
  1.8143 +             RM_opc_mem(secondary, dst), Con8or32(shift));
  1.8144 +  ins_pipe(ialu_mem_imm);
  1.8145 +%}
  1.8146 +
  1.8147 +// Logical Shift Right by variable
  1.8148 +instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
  1.8149 +%{
  1.8150 +  match(Set dst (URShiftL dst shift));
  1.8151 +  effect(KILL cr);
  1.8152 +
  1.8153 +  format %{ "shrq    $dst, $shift" %}
  1.8154 +  opcode(0xD3, 0x5); /* D3 /5 */
  1.8155 +  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
  1.8156 +  ins_pipe(ialu_reg_reg);
  1.8157 +%}
  1.8158 +
  1.8159 +// Logical Shift Right by variable
  1.8160 +instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
  1.8161 +%{
  1.8162 +  match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
  1.8163 +  effect(KILL cr);
  1.8164 +
  1.8165 +  format %{ "shrq    $dst, $shift" %}
  1.8166 +  opcode(0xD3, 0x5); /* D3 /5 */
  1.8167 +  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
  1.8168 +  ins_pipe(ialu_mem_reg);
  1.8169 +%}
  1.8170 +
  1.8171 +// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
  1.8172 +// This idiom is used by the compiler for the i2b bytecode.
  1.8173 +instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
  1.8174 +%{
  1.8175 +  match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
  1.8176 +
  1.8177 +  format %{ "movsbl  $dst, $src\t# i2b" %}
  1.8178 +  opcode(0x0F, 0xBE);
  1.8179 +  ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
  1.8180 +  ins_pipe(ialu_reg_reg);
  1.8181 +%}
  1.8182 +
  1.8183 +// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
  1.8184 +// This idiom is used by the compiler the i2s bytecode.
  1.8185 +instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
  1.8186 +%{
  1.8187 +  match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
  1.8188 +
  1.8189 +  format %{ "movswl  $dst, $src\t# i2s" %}
  1.8190 +  opcode(0x0F, 0xBF);
  1.8191 +  ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
  1.8192 +  ins_pipe(ialu_reg_reg);
  1.8193 +%}
  1.8194 +
  1.8195 +// ROL/ROR instructions
  1.8196 +
  1.8197 +// ROL expand
  1.8198 +instruct rolI_rReg_imm1(rRegI dst, rFlagsReg cr) %{
  1.8199 +  effect(KILL cr, USE_DEF dst);
  1.8200 +
  1.8201 +  format %{ "roll    $dst" %}
  1.8202 +  opcode(0xD1, 0x0); /* Opcode  D1 /0 */
  1.8203 +  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
  1.8204 +  ins_pipe(ialu_reg);
  1.8205 +%}
  1.8206 +
  1.8207 +instruct rolI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr) %{
  1.8208 +  effect(USE_DEF dst, USE shift, KILL cr);
  1.8209 +
  1.8210 +  format %{ "roll    $dst, $shift" %}
  1.8211 +  opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
  1.8212 +  ins_encode( reg_opc_imm(dst, shift) );
  1.8213 +  ins_pipe(ialu_reg);
  1.8214 +%}
  1.8215 +
  1.8216 +instruct rolI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
  1.8217 +%{
  1.8218 +  effect(USE_DEF dst, USE shift, KILL cr);
  1.8219 +
  1.8220 +  format %{ "roll    $dst, $shift" %}
  1.8221 +  opcode(0xD3, 0x0); /* Opcode D3 /0 */
  1.8222 +  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
  1.8223 +  ins_pipe(ialu_reg_reg);
  1.8224 +%}
  1.8225 +// end of ROL expand
  1.8226 +
  1.8227 +// Rotate Left by one
  1.8228 +instruct rolI_rReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
  1.8229 +%{
  1.8230 +  match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
  1.8231 +
  1.8232 +  expand %{
  1.8233 +    rolI_rReg_imm1(dst, cr);
  1.8234 +  %}
  1.8235 +%}
  1.8236 +
  1.8237 +// Rotate Left by 8-bit immediate
  1.8238 +instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
  1.8239 +%{
  1.8240 +  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
  1.8241 +  match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
  1.8242 +
  1.8243 +  expand %{
  1.8244 +    rolI_rReg_imm8(dst, lshift, cr);
  1.8245 +  %}
  1.8246 +%}
  1.8247 +
  1.8248 +// Rotate Left by variable
  1.8249 +instruct rolI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
  1.8250 +%{
  1.8251 +  match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
  1.8252 +
  1.8253 +  expand %{
  1.8254 +    rolI_rReg_CL(dst, shift, cr);
  1.8255 +  %}
  1.8256 +%}
  1.8257 +
  1.8258 +// Rotate Left by variable
  1.8259 +instruct rolI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
  1.8260 +%{
  1.8261 +  match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
  1.8262 +
  1.8263 +  expand %{
  1.8264 +    rolI_rReg_CL(dst, shift, cr);
  1.8265 +  %}
  1.8266 +%}
  1.8267 +
  1.8268 +// ROR expand
  1.8269 +instruct rorI_rReg_imm1(rRegI dst, rFlagsReg cr)
  1.8270 +%{
  1.8271 +  effect(USE_DEF dst, KILL cr);
  1.8272 +
  1.8273 +  format %{ "rorl    $dst" %}
  1.8274 +  opcode(0xD1, 0x1); /* D1 /1 */
  1.8275 +  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
  1.8276 +  ins_pipe(ialu_reg);
  1.8277 +%}
  1.8278 +
  1.8279 +instruct rorI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr)
  1.8280 +%{
  1.8281 +  effect(USE_DEF dst, USE shift, KILL cr);
  1.8282 +
  1.8283 +  format %{ "rorl    $dst, $shift" %}
  1.8284 +  opcode(0xC1, 0x1); /* C1 /1 ib */
  1.8285 +  ins_encode(reg_opc_imm(dst, shift));
  1.8286 +  ins_pipe(ialu_reg);
  1.8287 +%}
  1.8288 +
  1.8289 +instruct rorI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
  1.8290 +%{
  1.8291 +  effect(USE_DEF dst, USE shift, KILL cr);
  1.8292 +
  1.8293 +  format %{ "rorl    $dst, $shift" %}
  1.8294 +  opcode(0xD3, 0x1); /* D3 /1 */
  1.8295 +  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
  1.8296 +  ins_pipe(ialu_reg_reg);
  1.8297 +%}
  1.8298 +// end of ROR expand
  1.8299 +
  1.8300 +// Rotate Right by one
  1.8301 +instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
  1.8302 +%{
  1.8303 +  match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
  1.8304 +
  1.8305 +  expand %{
  1.8306 +    rorI_rReg_imm1(dst, cr);
  1.8307 +  %}
  1.8308 +%}
  1.8309 +
  1.8310 +// Rotate Right by 8-bit immediate
  1.8311 +instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
  1.8312 +%{
  1.8313 +  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
  1.8314 +  match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
  1.8315 +
  1.8316 +  expand %{
  1.8317 +    rorI_rReg_imm8(dst, rshift, cr);
  1.8318 +  %}
  1.8319 +%}
  1.8320 +
  1.8321 +// Rotate Right by variable
  1.8322 +instruct rorI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
  1.8323 +%{
  1.8324 +  match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
  1.8325 +
  1.8326 +  expand %{
  1.8327 +    rorI_rReg_CL(dst, shift, cr);
  1.8328 +  %}
  1.8329 +%}
  1.8330 +
  1.8331 +// Rotate Right by variable
  1.8332 +instruct rorI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
  1.8333 +%{
  1.8334 +  match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
  1.8335 +
  1.8336 +  expand %{
  1.8337 +    rorI_rReg_CL(dst, shift, cr);
  1.8338 +  %}
  1.8339 +%}
  1.8340 +
  1.8341 +// for long rotate
  1.8342 +// ROL expand
  1.8343 +instruct rolL_rReg_imm1(rRegL dst, rFlagsReg cr) %{
  1.8344 +  effect(USE_DEF dst, KILL cr);
  1.8345 +
  1.8346 +  format %{ "rolq    $dst" %}
  1.8347 +  opcode(0xD1, 0x0); /* Opcode  D1 /0 */
  1.8348 +  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
  1.8349 +  ins_pipe(ialu_reg);
  1.8350 +%}
  1.8351 +
  1.8352 +instruct rolL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr) %{
  1.8353 +  effect(USE_DEF dst, USE shift, KILL cr);
  1.8354 +
  1.8355 +  format %{ "rolq    $dst, $shift" %}
  1.8356 +  opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
  1.8357 +  ins_encode( reg_opc_imm_wide(dst, shift) );
  1.8358 +  ins_pipe(ialu_reg);
  1.8359 +%}
  1.8360 +
  1.8361 +instruct rolL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
  1.8362 +%{
  1.8363 +  effect(USE_DEF dst, USE shift, KILL cr);
  1.8364 +
  1.8365 +  format %{ "rolq    $dst, $shift" %}
  1.8366 +  opcode(0xD3, 0x0); /* Opcode D3 /0 */
  1.8367 +  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
  1.8368 +  ins_pipe(ialu_reg_reg);
  1.8369 +%}
  1.8370 +// end of ROL expand
  1.8371 +
  1.8372 +// Rotate Left by one
  1.8373 +instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
  1.8374 +%{
  1.8375 +  match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
  1.8376 +
  1.8377 +  expand %{
  1.8378 +    rolL_rReg_imm1(dst, cr);
  1.8379 +  %}
  1.8380 +%}
  1.8381 +
  1.8382 +// Rotate Left by 8-bit immediate
  1.8383 +instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
  1.8384 +%{
  1.8385 +  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
  1.8386 +  match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
  1.8387 +
  1.8388 +  expand %{
  1.8389 +    rolL_rReg_imm8(dst, lshift, cr);
  1.8390 +  %}
  1.8391 +%}
  1.8392 +
  1.8393 +// Rotate Left by variable
  1.8394 +instruct rolL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
  1.8395 +%{
  1.8396 +  match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift))));
  1.8397 +
  1.8398 +  expand %{
  1.8399 +    rolL_rReg_CL(dst, shift, cr);
  1.8400 +  %}
  1.8401 +%}
  1.8402 +
  1.8403 +// Rotate Left by variable
  1.8404 +instruct rolL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
  1.8405 +%{
  1.8406 +  match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift))));
  1.8407 +
  1.8408 +  expand %{
  1.8409 +    rolL_rReg_CL(dst, shift, cr);
  1.8410 +  %}
  1.8411 +%}
  1.8412 +
  1.8413 +// ROR expand
  1.8414 +instruct rorL_rReg_imm1(rRegL dst, rFlagsReg cr)
  1.8415 +%{
  1.8416 +  effect(USE_DEF dst, KILL cr);
  1.8417 +
  1.8418 +  format %{ "rorq    $dst" %}
  1.8419 +  opcode(0xD1, 0x1); /* D1 /1 */
  1.8420 +  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
  1.8421 +  ins_pipe(ialu_reg);
  1.8422 +%}
  1.8423 +
  1.8424 +instruct rorL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr)
  1.8425 +%{
  1.8426 +  effect(USE_DEF dst, USE shift, KILL cr);
  1.8427 +
  1.8428 +  format %{ "rorq    $dst, $shift" %}
  1.8429 +  opcode(0xC1, 0x1); /* C1 /1 ib */
  1.8430 +  ins_encode(reg_opc_imm_wide(dst, shift));
  1.8431 +  ins_pipe(ialu_reg);
  1.8432 +%}
  1.8433 +
  1.8434 +instruct rorL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
  1.8435 +%{
  1.8436 +  effect(USE_DEF dst, USE shift, KILL cr);
  1.8437 +
  1.8438 +  format %{ "rorq    $dst, $shift" %}
  1.8439 +  opcode(0xD3, 0x1); /* D3 /1 */
  1.8440 +  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
  1.8441 +  ins_pipe(ialu_reg_reg);
  1.8442 +%}
  1.8443 +// end of ROR expand
  1.8444 +
  1.8445 +// Rotate Right by one
  1.8446 +instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
  1.8447 +%{
  1.8448 +  match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
  1.8449 +
  1.8450 +  expand %{
  1.8451 +    rorL_rReg_imm1(dst, cr);
  1.8452 +  %}
  1.8453 +%}
  1.8454 +
  1.8455 +// Rotate Right by 8-bit immediate
  1.8456 +instruct rorL_rReg_i8(rRegL dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
  1.8457 +%{
  1.8458 +  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
  1.8459 +  match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
  1.8460 +
  1.8461 +  expand %{
  1.8462 +    rorL_rReg_imm8(dst, rshift, cr);
  1.8463 +  %}
  1.8464 +%}
  1.8465 +
  1.8466 +// Rotate Right by variable
  1.8467 +instruct rorL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
  1.8468 +%{
  1.8469 +  match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift))));
  1.8470 +
  1.8471 +  expand %{
  1.8472 +    rorL_rReg_CL(dst, shift, cr);
  1.8473 +  %}
  1.8474 +%}
  1.8475 +
  1.8476 +// Rotate Right by variable
  1.8477 +instruct rorL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
  1.8478 +%{
  1.8479 +  match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift))));
  1.8480 +
  1.8481 +  expand %{
  1.8482 +    rorL_rReg_CL(dst, shift, cr);
  1.8483 +  %}
  1.8484 +%}
  1.8485 +
  1.8486 +// Logical Instructions
  1.8487 +
  1.8488 +// Integer Logical Instructions
  1.8489 +
  1.8490 +// And Instructions
  1.8491 +// And Register with Register
  1.8492 +instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
  1.8493 +%{
  1.8494 +  match(Set dst (AndI dst src));
  1.8495 +  effect(KILL cr);
  1.8496 +
  1.8497 +  format %{ "andl    $dst, $src\t# int" %}
  1.8498 +  opcode(0x23);
  1.8499 +  ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
  1.8500 +  ins_pipe(ialu_reg_reg);
  1.8501 +%}
  1.8502 +
  1.8503 +// And Register with Immediate 255
  1.8504 +instruct andI_rReg_imm255(rRegI dst, immI_255 src)
  1.8505 +%{
  1.8506 +  match(Set dst (AndI dst src));
  1.8507 +
  1.8508 +  format %{ "movzbl  $dst, $dst\t# int & 0xFF" %}
  1.8509 +  opcode(0x0F, 0xB6);
  1.8510 +  ins_encode(REX_reg_breg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
  1.8511 +  ins_pipe(ialu_reg);
  1.8512 +%}
  1.8513 +
  1.8514 +// And Register with Immediate 255 and promote to long
  1.8515 +instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
  1.8516 +%{
  1.8517 +  match(Set dst (ConvI2L (AndI src mask)));
  1.8518 +
  1.8519 +  format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
  1.8520 +  opcode(0x0F, 0xB6);
  1.8521 +  ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
  1.8522 +  ins_pipe(ialu_reg);
  1.8523 +%}
  1.8524 +
  1.8525 +// And Register with Immediate 65535
  1.8526 +instruct andI_rReg_imm65535(rRegI dst, immI_65535 src)
  1.8527 +%{
  1.8528 +  match(Set dst (AndI dst src));
  1.8529 +
  1.8530 +  format %{ "movzwl  $dst, $dst\t# int & 0xFFFF" %}
  1.8531 +  opcode(0x0F, 0xB7);
  1.8532 +  ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
  1.8533 +  ins_pipe(ialu_reg);
  1.8534 +%}
  1.8535 +
  1.8536 +// And Register with Immediate 65535 and promote to long
  1.8537 +instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
  1.8538 +%{
  1.8539 +  match(Set dst (ConvI2L (AndI src mask)));
  1.8540 +
  1.8541 +  format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
  1.8542 +  opcode(0x0F, 0xB7);
  1.8543 +  ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
  1.8544 +  ins_pipe(ialu_reg);
  1.8545 +%}
  1.8546 +
  1.8547 +// And Register with Immediate
  1.8548 +instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
  1.8549 +%{
  1.8550 +  match(Set dst (AndI dst src));
  1.8551 +  effect(KILL cr);
  1.8552 +
  1.8553 +  format %{ "andl    $dst, $src\t# int" %}
  1.8554 +  opcode(0x81, 0x04); /* Opcode 81 /4 */
  1.8555 +  ins_encode(OpcSErm(dst, src), Con8or32(src));
  1.8556 +  ins_pipe(ialu_reg);
  1.8557 +%}
  1.8558 +
  1.8559 +// And Register with Memory
  1.8560 +instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
  1.8561 +%{
  1.8562 +  match(Set dst (AndI dst (LoadI src)));
  1.8563 +  effect(KILL cr);
  1.8564 +
  1.8565 +  ins_cost(125);
  1.8566 +  format %{ "andl    $dst, $src\t# int" %}
  1.8567 +  opcode(0x23);
  1.8568 +  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
  1.8569 +  ins_pipe(ialu_reg_mem);
  1.8570 +%}
  1.8571 +
  1.8572 +// And Memory with Register
  1.8573 +instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
  1.8574 +%{
  1.8575 +  match(Set dst (StoreI dst (AndI (LoadI dst) src)));
  1.8576 +  effect(KILL cr);
  1.8577 +
  1.8578 +  ins_cost(150);
  1.8579 +  format %{ "andl    $dst, $src\t# int" %}
  1.8580 +  opcode(0x21); /* Opcode 21 /r */
  1.8581 +  ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
  1.8582 +  ins_pipe(ialu_mem_reg);
  1.8583 +%}
  1.8584 +
  1.8585 +// And Memory with Immediate
  1.8586 +instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
  1.8587 +%{
  1.8588 +  match(Set dst (StoreI dst (AndI (LoadI dst) src)));
  1.8589 +  effect(KILL cr);
  1.8590 +
  1.8591 +  ins_cost(125);
  1.8592 +  format %{ "andl    $dst, $src\t# int" %}
  1.8593 +  opcode(0x81, 0x4); /* Opcode 81 /4 id */
  1.8594 +  ins_encode(REX_mem(dst), OpcSE(src),
  1.8595 +             RM_opc_mem(secondary, dst), Con8or32(src));
  1.8596 +  ins_pipe(ialu_mem_imm);
  1.8597 +%}
  1.8598 +
  1.8599 +// BMI1 instructions
  1.8600 +instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
  1.8601 +  match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
  1.8602 +  predicate(UseBMI1Instructions);
  1.8603 +  effect(KILL cr);
  1.8604 +
  1.8605 +  ins_cost(125);
  1.8606 +  format %{ "andnl  $dst, $src1, $src2" %}
  1.8607 +
  1.8608 +  ins_encode %{
  1.8609 +    __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
  1.8610 +  %}
  1.8611 +  ins_pipe(ialu_reg_mem);
  1.8612 +%}
  1.8613 +
  1.8614 +instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
  1.8615 +  match(Set dst (AndI (XorI src1 minus_1) src2));
  1.8616 +  predicate(UseBMI1Instructions);
  1.8617 +  effect(KILL cr);
  1.8618 +
  1.8619 +  format %{ "andnl  $dst, $src1, $src2" %}
  1.8620 +
  1.8621 +  ins_encode %{
  1.8622 +    __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
  1.8623 +  %}
  1.8624 +  ins_pipe(ialu_reg);
  1.8625 +%}
  1.8626 +
  1.8627 +instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, rFlagsReg cr) %{
  1.8628 +  match(Set dst (AndI (SubI imm_zero src) src));
  1.8629 +  predicate(UseBMI1Instructions);
  1.8630 +  effect(KILL cr);
  1.8631 +
  1.8632 +  format %{ "blsil  $dst, $src" %}
  1.8633 +
  1.8634 +  ins_encode %{
  1.8635 +    __ blsil($dst$$Register, $src$$Register);
  1.8636 +  %}
  1.8637 +  ins_pipe(ialu_reg);
  1.8638 +%}
  1.8639 +
  1.8640 +instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, rFlagsReg cr) %{
  1.8641 +  match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
  1.8642 +  predicate(UseBMI1Instructions);
  1.8643 +  effect(KILL cr);
  1.8644 +
  1.8645 +  ins_cost(125);
  1.8646 +  format %{ "blsil  $dst, $src" %}
  1.8647 +
  1.8648 +  ins_encode %{
  1.8649 +    __ blsil($dst$$Register, $src$$Address);
  1.8650 +  %}
  1.8651 +  ins_pipe(ialu_reg_mem);
  1.8652 +%}
  1.8653 +
  1.8654 +instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
  1.8655 +%{
  1.8656 +  match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
  1.8657 +  predicate(UseBMI1Instructions);
  1.8658 +  effect(KILL cr);
  1.8659 +
  1.8660 +  ins_cost(125);
  1.8661 +  format %{ "blsmskl $dst, $src" %}
  1.8662 +
  1.8663 +  ins_encode %{
  1.8664 +    __ blsmskl($dst$$Register, $src$$Address);
  1.8665 +  %}
  1.8666 +  ins_pipe(ialu_reg_mem);
  1.8667 +%}
  1.8668 +
  1.8669 +instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
  1.8670 +%{
  1.8671 +  match(Set dst (XorI (AddI src minus_1) src));
  1.8672 +  predicate(UseBMI1Instructions);
  1.8673 +  effect(KILL cr);
  1.8674 +
  1.8675 +  format %{ "blsmskl $dst, $src" %}
  1.8676 +
  1.8677 +  ins_encode %{
  1.8678 +    __ blsmskl($dst$$Register, $src$$Register);
  1.8679 +  %}
  1.8680 +
  1.8681 +  ins_pipe(ialu_reg);
  1.8682 +%}
  1.8683 +
  1.8684 +instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
  1.8685 +%{
  1.8686 +  match(Set dst (AndI (AddI src minus_1) src) );
  1.8687 +  predicate(UseBMI1Instructions);
  1.8688 +  effect(KILL cr);
  1.8689 +
  1.8690 +  format %{ "blsrl  $dst, $src" %}
  1.8691 +
  1.8692 +  ins_encode %{
  1.8693 +    __ blsrl($dst$$Register, $src$$Register);
  1.8694 +  %}
  1.8695 +
  1.8696 +  ins_pipe(ialu_reg_mem);
  1.8697 +%}
  1.8698 +
  1.8699 +instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
  1.8700 +%{
  1.8701 +  match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
  1.8702 +  predicate(UseBMI1Instructions);
  1.8703 +  effect(KILL cr);
  1.8704 +
  1.8705 +  ins_cost(125);
  1.8706 +  format %{ "blsrl  $dst, $src" %}
  1.8707 +
  1.8708 +  ins_encode %{
  1.8709 +    __ blsrl($dst$$Register, $src$$Address);
  1.8710 +  %}
  1.8711 +
  1.8712 +  ins_pipe(ialu_reg);
  1.8713 +%}
  1.8714 +
  1.8715 +// Or Instructions
  1.8716 +// Or Register with Register
  1.8717 +instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
  1.8718 +%{
  1.8719 +  match(Set dst (OrI dst src));
  1.8720 +  effect(KILL cr);
  1.8721 +
  1.8722 +  format %{ "orl     $dst, $src\t# int" %}
  1.8723 +  opcode(0x0B);
  1.8724 +  ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
  1.8725 +  ins_pipe(ialu_reg_reg);
  1.8726 +%}
  1.8727 +
  1.8728 +// Or Register with Immediate
  1.8729 +instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
  1.8730 +%{
  1.8731 +  match(Set dst (OrI dst src));
  1.8732 +  effect(KILL cr);
  1.8733 +
  1.8734 +  format %{ "orl     $dst, $src\t# int" %}
  1.8735 +  opcode(0x81, 0x01); /* Opcode 81 /1 id */
  1.8736 +  ins_encode(OpcSErm(dst, src), Con8or32(src));
  1.8737 +  ins_pipe(ialu_reg);
  1.8738 +%}
  1.8739 +
  1.8740 +// Or Register with Memory
  1.8741 +instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
  1.8742 +%{
  1.8743 +  match(Set dst (OrI dst (LoadI src)));
  1.8744 +  effect(KILL cr);
  1.8745 +
  1.8746 +  ins_cost(125);
  1.8747 +  format %{ "orl     $dst, $src\t# int" %}
  1.8748 +  opcode(0x0B);
  1.8749 +  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
  1.8750 +  ins_pipe(ialu_reg_mem);
  1.8751 +%}
  1.8752 +
  1.8753 +// Or Memory with Register
  1.8754 +instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
  1.8755 +%{
  1.8756 +  match(Set dst (StoreI dst (OrI (LoadI dst) src)));
  1.8757 +  effect(KILL cr);
  1.8758 +
  1.8759 +  ins_cost(150);
  1.8760 +  format %{ "orl     $dst, $src\t# int" %}
  1.8761 +  opcode(0x09); /* Opcode 09 /r */
  1.8762 +  ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
  1.8763 +  ins_pipe(ialu_mem_reg);
  1.8764 +%}
  1.8765 +
  1.8766 +// Or Memory with Immediate
  1.8767 +instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
  1.8768 +%{
  1.8769 +  match(Set dst (StoreI dst (OrI (LoadI dst) src)));
  1.8770 +  effect(KILL cr);
  1.8771 +
  1.8772 +  ins_cost(125);
  1.8773 +  format %{ "orl     $dst, $src\t# int" %}
  1.8774 +  opcode(0x81, 0x1); /* Opcode 81 /1 id */
  1.8775 +  ins_encode(REX_mem(dst), OpcSE(src),
  1.8776 +             RM_opc_mem(secondary, dst), Con8or32(src));
  1.8777 +  ins_pipe(ialu_mem_imm);
  1.8778 +%}
  1.8779 +
  1.8780 +// Xor Instructions
  1.8781 +// Xor Register with Register
  1.8782 +instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
  1.8783 +%{
  1.8784 +  match(Set dst (XorI dst src));
  1.8785 +  effect(KILL cr);
  1.8786 +
  1.8787 +  format %{ "xorl    $dst, $src\t# int" %}
  1.8788 +  opcode(0x33);
  1.8789 +  ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
  1.8790 +  ins_pipe(ialu_reg_reg);
  1.8791 +%}
  1.8792 +
  1.8793 +// Xor Register with Immediate -1
  1.8794 +instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
  1.8795 +  match(Set dst (XorI dst imm));
  1.8796 +
  1.8797 +  format %{ "not    $dst" %}
  1.8798 +  ins_encode %{
  1.8799 +     __ notl($dst$$Register);
  1.8800 +  %}
  1.8801 +  ins_pipe(ialu_reg);
  1.8802 +%}
  1.8803 +
  1.8804 +// Xor Register with Immediate
  1.8805 +instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
  1.8806 +%{
  1.8807 +  match(Set dst (XorI dst src));
  1.8808 +  effect(KILL cr);
  1.8809 +
  1.8810 +  format %{ "xorl    $dst, $src\t# int" %}
  1.8811 +  opcode(0x81, 0x06); /* Opcode 81 /6 id */
  1.8812 +  ins_encode(OpcSErm(dst, src), Con8or32(src));
  1.8813 +  ins_pipe(ialu_reg);
  1.8814 +%}
  1.8815 +
  1.8816 +// Xor Register with Memory
  1.8817 +instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
  1.8818 +%{
  1.8819 +  match(Set dst (XorI dst (LoadI src)));
  1.8820 +  effect(KILL cr);
  1.8821 +
  1.8822 +  ins_cost(125);
  1.8823 +  format %{ "xorl    $dst, $src\t# int" %}
  1.8824 +  opcode(0x33);
  1.8825 +  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
  1.8826 +  ins_pipe(ialu_reg_mem);
  1.8827 +%}
  1.8828 +
  1.8829 +// Xor Memory with Register
  1.8830 +instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
  1.8831 +%{
  1.8832 +  match(Set dst (StoreI dst (XorI (LoadI dst) src)));
  1.8833 +  effect(KILL cr);
  1.8834 +
  1.8835 +  ins_cost(150);
  1.8836 +  format %{ "xorl    $dst, $src\t# int" %}
  1.8837 +  opcode(0x31); /* Opcode 31 /r */
  1.8838 +  ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
  1.8839 +  ins_pipe(ialu_mem_reg);
  1.8840 +%}
  1.8841 +
  1.8842 +// Xor Memory with Immediate
  1.8843 +instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
  1.8844 +%{
  1.8845 +  match(Set dst (StoreI dst (XorI (LoadI dst) src)));
  1.8846 +  effect(KILL cr);
  1.8847 +
  1.8848 +  ins_cost(125);
  1.8849 +  format %{ "xorl    $dst, $src\t# int" %}
  1.8850 +  opcode(0x81, 0x6); /* Opcode 81 /6 id */
  1.8851 +  ins_encode(REX_mem(dst), OpcSE(src),
  1.8852 +             RM_opc_mem(secondary, dst), Con8or32(src));
  1.8853 +  ins_pipe(ialu_mem_imm);
  1.8854 +%}
  1.8855 +
  1.8856 +
  1.8857 +// Long Logical Instructions
  1.8858 +
  1.8859 +// And Instructions
  1.8860 +// And Register with Register
  1.8861 +instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
  1.8862 +%{
  1.8863 +  match(Set dst (AndL dst src));
  1.8864 +  effect(KILL cr);
  1.8865 +
  1.8866 +  format %{ "andq    $dst, $src\t# long" %}
  1.8867 +  opcode(0x23);
  1.8868 +  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
  1.8869 +  ins_pipe(ialu_reg_reg);
  1.8870 +%}
  1.8871 +
  1.8872 +// And Register with Immediate 255
  1.8873 +instruct andL_rReg_imm255(rRegL dst, immL_255 src)
  1.8874 +%{
  1.8875 +  match(Set dst (AndL dst src));
  1.8876 +
  1.8877 +  format %{ "movzbq  $dst, $dst\t# long & 0xFF" %}
  1.8878 +  opcode(0x0F, 0xB6);
  1.8879 +  ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
  1.8880 +  ins_pipe(ialu_reg);
  1.8881 +%}
  1.8882 +
  1.8883 +// And Register with Immediate 65535
  1.8884 +instruct andL_rReg_imm65535(rRegL dst, immL_65535 src)
  1.8885 +%{
  1.8886 +  match(Set dst (AndL dst src));
  1.8887 +
  1.8888 +  format %{ "movzwq  $dst, $dst\t# long & 0xFFFF" %}
  1.8889 +  opcode(0x0F, 0xB7);
  1.8890 +  ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
  1.8891 +  ins_pipe(ialu_reg);
  1.8892 +%}
  1.8893 +
  1.8894 +// And Register with Immediate
  1.8895 +instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
  1.8896 +%{
  1.8897 +  match(Set dst (AndL dst src));
  1.8898 +  effect(KILL cr);
  1.8899 +
  1.8900 +  format %{ "andq    $dst, $src\t# long" %}
  1.8901 +  opcode(0x81, 0x04); /* Opcode 81 /4 */
  1.8902 +  ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
  1.8903 +  ins_pipe(ialu_reg);
  1.8904 +%}
  1.8905 +
  1.8906 +// And Register with Memory
  1.8907 +instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
  1.8908 +%{
  1.8909 +  match(Set dst (AndL dst (LoadL src)));
  1.8910 +  effect(KILL cr);
  1.8911 +
  1.8912 +  ins_cost(125);
  1.8913 +  format %{ "andq    $dst, $src\t# long" %}
  1.8914 +  opcode(0x23);
  1.8915 +  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
  1.8916 +  ins_pipe(ialu_reg_mem);
  1.8917 +%}
  1.8918 +
  1.8919 +// And Memory with Register
  1.8920 +instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
  1.8921 +%{
  1.8922 +  match(Set dst (StoreL dst (AndL (LoadL dst) src)));
  1.8923 +  effect(KILL cr);
  1.8924 +
  1.8925 +  ins_cost(150);
  1.8926 +  format %{ "andq    $dst, $src\t# long" %}
  1.8927 +  opcode(0x21); /* Opcode 21 /r */
  1.8928 +  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
  1.8929 +  ins_pipe(ialu_mem_reg);
  1.8930 +%}
  1.8931 +
  1.8932 +// And Memory with Immediate
  1.8933 +instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
  1.8934 +%{
  1.8935 +  match(Set dst (StoreL dst (AndL (LoadL dst) src)));
  1.8936 +  effect(KILL cr);
  1.8937 +
  1.8938 +  ins_cost(125);
  1.8939 +  format %{ "andq    $dst, $src\t# long" %}
  1.8940 +  opcode(0x81, 0x4); /* Opcode 81 /4 id */
  1.8941 +  ins_encode(REX_mem_wide(dst), OpcSE(src),
  1.8942 +             RM_opc_mem(secondary, dst), Con8or32(src));
  1.8943 +  ins_pipe(ialu_mem_imm);
  1.8944 +%}
  1.8945 +
  1.8946 +// BMI1 instructions
  1.8947 +instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
  1.8948 +  match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
  1.8949 +  predicate(UseBMI1Instructions);
  1.8950 +  effect(KILL cr);
  1.8951 +
  1.8952 +  ins_cost(125);
  1.8953 +  format %{ "andnq  $dst, $src1, $src2" %}
  1.8954 +
  1.8955 +  ins_encode %{
  1.8956 +    __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
  1.8957 +  %}
  1.8958 +  ins_pipe(ialu_reg_mem);
  1.8959 +%}
  1.8960 +
  1.8961 +instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
  1.8962 +  match(Set dst (AndL (XorL src1 minus_1) src2));
  1.8963 +  predicate(UseBMI1Instructions);
  1.8964 +  effect(KILL cr);
  1.8965 +
  1.8966 +  format %{ "andnq  $dst, $src1, $src2" %}
  1.8967 +
  1.8968 +  ins_encode %{
  1.8969 +  __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
  1.8970 +  %}
  1.8971 +  ins_pipe(ialu_reg_mem);
  1.8972 +%}
  1.8973 +
  1.8974 +instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
  1.8975 +  match(Set dst (AndL (SubL imm_zero src) src));
  1.8976 +  predicate(UseBMI1Instructions);
  1.8977 +  effect(KILL cr);
  1.8978 +
  1.8979 +  format %{ "blsiq  $dst, $src" %}
  1.8980 +
  1.8981 +  ins_encode %{
  1.8982 +    __ blsiq($dst$$Register, $src$$Register);
  1.8983 +  %}
  1.8984 +  ins_pipe(ialu_reg);
  1.8985 +%}
  1.8986 +
  1.8987 +instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
  1.8988 +  match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
  1.8989 +  predicate(UseBMI1Instructions);
  1.8990 +  effect(KILL cr);
  1.8991 +
  1.8992 +  ins_cost(125);
  1.8993 +  format %{ "blsiq  $dst, $src" %}
  1.8994 +
  1.8995 +  ins_encode %{
  1.8996 +    __ blsiq($dst$$Register, $src$$Address);
  1.8997 +  %}
  1.8998 +  ins_pipe(ialu_reg_mem);
  1.8999 +%}
  1.9000 +
  1.9001 +instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
  1.9002 +%{
  1.9003 +  match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
  1.9004 +  predicate(UseBMI1Instructions);
  1.9005 +  effect(KILL cr);
  1.9006 +
  1.9007 +  ins_cost(125);
  1.9008 +  format %{ "blsmskq $dst, $src" %}
  1.9009 +
  1.9010 +  ins_encode %{
  1.9011 +    __ blsmskq($dst$$Register, $src$$Address);
  1.9012 +  %}
  1.9013 +  ins_pipe(ialu_reg_mem);
  1.9014 +%}
  1.9015 +
  1.9016 +instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
  1.9017 +%{
  1.9018 +  match(Set dst (XorL (AddL src minus_1) src));
  1.9019 +  predicate(UseBMI1Instructions);
  1.9020 +  effect(KILL cr);
  1.9021 +
  1.9022 +  format %{ "blsmskq $dst, $src" %}
  1.9023 +
  1.9024 +  ins_encode %{
  1.9025 +    __ blsmskq($dst$$Register, $src$$Register);
  1.9026 +  %}
  1.9027 +
  1.9028 +  ins_pipe(ialu_reg);
  1.9029 +%}
  1.9030 +
  1.9031 +instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
  1.9032 +%{
  1.9033 +  match(Set dst (AndL (AddL src minus_1) src) );
  1.9034 +  predicate(UseBMI1Instructions);
  1.9035 +  effect(KILL cr);
  1.9036 +
  1.9037 +  format %{ "blsrq  $dst, $src" %}
  1.9038 +
  1.9039 +  ins_encode %{
  1.9040 +    __ blsrq($dst$$Register, $src$$Register);
  1.9041 +  %}
  1.9042 +
  1.9043 +  ins_pipe(ialu_reg);
  1.9044 +%}
  1.9045 +
  1.9046 +instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
  1.9047 +%{
  1.9048 +  match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
  1.9049 +  predicate(UseBMI1Instructions);
  1.9050 +  effect(KILL cr);
  1.9051 +
  1.9052 +  ins_cost(125);
  1.9053 +  format %{ "blsrq  $dst, $src" %}
  1.9054 +
  1.9055 +  ins_encode %{
  1.9056 +    __ blsrq($dst$$Register, $src$$Address);
  1.9057 +  %}
  1.9058 +
  1.9059 +  ins_pipe(ialu_reg);
  1.9060 +%}
  1.9061 +
  1.9062 +// Or Instructions
  1.9063 +// Or Register with Register
  1.9064 +instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
  1.9065 +%{
  1.9066 +  match(Set dst (OrL dst src));
  1.9067 +  effect(KILL cr);
  1.9068 +
  1.9069 +  format %{ "orq     $dst, $src\t# long" %}
  1.9070 +  opcode(0x0B);
  1.9071 +  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
  1.9072 +  ins_pipe(ialu_reg_reg);
  1.9073 +%}
  1.9074 +
  1.9075 +// Use any_RegP to match R15 (TLS register) without spilling.
  1.9076 +instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
  1.9077 +  match(Set dst (OrL dst (CastP2X src)));
  1.9078 +  effect(KILL cr);
  1.9079 +
  1.9080 +  format %{ "orq     $dst, $src\t# long" %}
  1.9081 +  opcode(0x0B);
  1.9082 +  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
  1.9083 +  ins_pipe(ialu_reg_reg);
  1.9084 +%}
  1.9085 +
  1.9086 +
  1.9087 +// Or Register with Immediate
  1.9088 +instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
  1.9089 +%{
  1.9090 +  match(Set dst (OrL dst src));
  1.9091 +  effect(KILL cr);
  1.9092 +
  1.9093 +  format %{ "orq     $dst, $src\t# long" %}
  1.9094 +  opcode(0x81, 0x01); /* Opcode 81 /1 id */
  1.9095 +  ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
  1.9096 +  ins_pipe(ialu_reg);
  1.9097 +%}
  1.9098 +
  1.9099 +// Or Register with Memory
  1.9100 +instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
  1.9101 +%{
  1.9102 +  match(Set dst (OrL dst (LoadL src)));
  1.9103 +  effect(KILL cr);
  1.9104 +
  1.9105 +  ins_cost(125);
  1.9106 +  format %{ "orq     $dst, $src\t# long" %}
  1.9107 +  opcode(0x0B);
  1.9108 +  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
  1.9109 +  ins_pipe(ialu_reg_mem);
  1.9110 +%}
  1.9111 +
  1.9112 +// Or Memory with Register
  1.9113 +instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
  1.9114 +%{
  1.9115 +  match(Set dst (StoreL dst (OrL (LoadL dst) src)));
  1.9116 +  effect(KILL cr);
  1.9117 +
  1.9118 +  ins_cost(150);
  1.9119 +  format %{ "orq     $dst, $src\t# long" %}
  1.9120 +  opcode(0x09); /* Opcode 09 /r */
  1.9121 +  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
  1.9122 +  ins_pipe(ialu_mem_reg);
  1.9123 +%}
  1.9124 +
  1.9125 +// Or Memory with Immediate
  1.9126 +instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
  1.9127 +%{
  1.9128 +  match(Set dst (StoreL dst (OrL (LoadL dst) src)));
  1.9129 +  effect(KILL cr);
  1.9130 +
  1.9131 +  ins_cost(125);
  1.9132 +  format %{ "orq     $dst, $src\t# long" %}
  1.9133 +  opcode(0x81, 0x1); /* Opcode 81 /1 id */
  1.9134 +  ins_encode(REX_mem_wide(dst), OpcSE(src),
  1.9135 +             RM_opc_mem(secondary, dst), Con8or32(src));
  1.9136 +  ins_pipe(ialu_mem_imm);
  1.9137 +%}
  1.9138 +
  1.9139 +// Xor Instructions
  1.9140 +// Xor Register with Register
  1.9141 +instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
  1.9142 +%{
  1.9143 +  match(Set dst (XorL dst src));
  1.9144 +  effect(KILL cr);
  1.9145 +
  1.9146 +  format %{ "xorq    $dst, $src\t# long" %}
  1.9147 +  opcode(0x33);
  1.9148 +  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
  1.9149 +  ins_pipe(ialu_reg_reg);
  1.9150 +%}
  1.9151 +
  1.9152 +// Xor Register with Immediate -1
  1.9153 +instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
  1.9154 +  match(Set dst (XorL dst imm));
  1.9155 +
  1.9156 +  format %{ "notq   $dst" %}
  1.9157 +  ins_encode %{
  1.9158 +     __ notq($dst$$Register);
  1.9159 +  %}
  1.9160 +  ins_pipe(ialu_reg);
  1.9161 +%}
  1.9162 +
  1.9163 +// Xor Register with Immediate
  1.9164 +instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
  1.9165 +%{
  1.9166 +  match(Set dst (XorL dst src));
  1.9167 +  effect(KILL cr);
  1.9168 +
  1.9169 +  format %{ "xorq    $dst, $src\t# long" %}
  1.9170 +  opcode(0x81, 0x06); /* Opcode 81 /6 id */
  1.9171 +  ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
  1.9172 +  ins_pipe(ialu_reg);
  1.9173 +%}
  1.9174 +
  1.9175 +// Xor Register with Memory
  1.9176 +instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
  1.9177 +%{
  1.9178 +  match(Set dst (XorL dst (LoadL src)));
  1.9179 +  effect(KILL cr);
  1.9180 +
  1.9181 +  ins_cost(125);
  1.9182 +  format %{ "xorq    $dst, $src\t# long" %}
  1.9183 +  opcode(0x33);
  1.9184 +  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
  1.9185 +  ins_pipe(ialu_reg_mem);
  1.9186 +%}
  1.9187 +
  1.9188 +// Xor Memory with Register
  1.9189 +instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
  1.9190 +%{
  1.9191 +  match(Set dst (StoreL dst (XorL (LoadL dst) src)));
  1.9192 +  effect(KILL cr);
  1.9193 +
  1.9194 +  ins_cost(150);
  1.9195 +  format %{ "xorq    $dst, $src\t# long" %}
  1.9196 +  opcode(0x31); /* Opcode 31 /r */
  1.9197 +  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
  1.9198 +  ins_pipe(ialu_mem_reg);
  1.9199 +%}
  1.9200 +
  1.9201 +// Xor Memory with Immediate
  1.9202 +instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
  1.9203 +%{
  1.9204 +  match(Set dst (StoreL dst (XorL (LoadL dst) src)));
  1.9205 +  effect(KILL cr);
  1.9206 +
  1.9207 +  ins_cost(125);
  1.9208 +  format %{ "xorq    $dst, $src\t# long" %}
  1.9209 +  opcode(0x81, 0x6); /* Opcode 81 /6 id */
  1.9210 +  ins_encode(REX_mem_wide(dst), OpcSE(src),
  1.9211 +             RM_opc_mem(secondary, dst), Con8or32(src));
  1.9212 +  ins_pipe(ialu_mem_imm);
  1.9213 +%}
  1.9214 +
  1.9215 +// Convert Int to Boolean
  1.9216 +instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
  1.9217 +%{
  1.9218 +  match(Set dst (Conv2B src));
  1.9219 +  effect(KILL cr);
  1.9220 +
  1.9221 +  format %{ "testl   $src, $src\t# ci2b\n\t"
  1.9222 +            "setnz   $dst\n\t"
  1.9223 +            "movzbl  $dst, $dst" %}
  1.9224 +  ins_encode(REX_reg_reg(src, src), opc_reg_reg(0x85, src, src), // testl
  1.9225 +             setNZ_reg(dst),
  1.9226 +             REX_reg_breg(dst, dst), // movzbl
  1.9227 +             Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
  1.9228 +  ins_pipe(pipe_slow); // XXX
  1.9229 +%}
  1.9230 +
  1.9231 +// Convert Pointer to Boolean
  1.9232 +instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
  1.9233 +%{
  1.9234 +  match(Set dst (Conv2B src));
  1.9235 +  effect(KILL cr);
  1.9236 +
  1.9237 +  format %{ "testq   $src, $src\t# cp2b\n\t"
  1.9238 +            "setnz   $dst\n\t"
  1.9239 +            "movzbl  $dst, $dst" %}
  1.9240 +  ins_encode(REX_reg_reg_wide(src, src), opc_reg_reg(0x85, src, src), // testq
  1.9241 +             setNZ_reg(dst),
  1.9242 +             REX_reg_breg(dst, dst), // movzbl
  1.9243 +             Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
  1.9244 +  ins_pipe(pipe_slow); // XXX
  1.9245 +%}
  1.9246 +
  1.9247 +instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
  1.9248 +%{
  1.9249 +  match(Set dst (CmpLTMask p q));
  1.9250 +  effect(KILL cr);
  1.9251 +
  1.9252 +  ins_cost(400);
  1.9253 +  format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
  1.9254 +            "setlt   $dst\n\t"
  1.9255 +            "movzbl  $dst, $dst\n\t"
  1.9256 +            "negl    $dst" %}
  1.9257 +  ins_encode(REX_reg_reg(p, q), opc_reg_reg(0x3B, p, q), // cmpl
  1.9258 +             setLT_reg(dst),
  1.9259 +             REX_reg_breg(dst, dst), // movzbl
  1.9260 +             Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst),
  1.9261 +             neg_reg(dst));
  1.9262 +  ins_pipe(pipe_slow);
  1.9263 +%}
  1.9264 +
  1.9265 +instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
  1.9266 +%{
  1.9267 +  match(Set dst (CmpLTMask dst zero));
  1.9268 +  effect(KILL cr);
  1.9269 +
  1.9270 +  ins_cost(100);
  1.9271 +  format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
  1.9272 +  ins_encode %{
  1.9273 +  __ sarl($dst$$Register, 31);
  1.9274 +  %}
  1.9275 +  ins_pipe(ialu_reg);
  1.9276 +%}
  1.9277 +
  1.9278 +/* Better to save a register than avoid a branch */
  1.9279 +instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
  1.9280 +%{
  1.9281 +  match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
  1.9282 +  effect(KILL cr);
  1.9283 +  ins_cost(300);
  1.9284 +  format %{ "subl   $p,$q\t# cadd_cmpLTMask\n\t"
  1.9285 +            "jge    done\n\t"
  1.9286 +            "addl   $p,$y\n"
  1.9287 +            "done:  " %}
  1.9288 +  ins_encode %{
  1.9289 +    Register Rp = $p$$Register;
  1.9290 +    Register Rq = $q$$Register;
  1.9291 +    Register Ry = $y$$Register;
  1.9292 +    Label done;
  1.9293 +    __ subl(Rp, Rq);
  1.9294 +    __ jccb(Assembler::greaterEqual, done);
  1.9295 +    __ addl(Rp, Ry);
  1.9296 +    __ bind(done);
  1.9297 +  %}
  1.9298 +  ins_pipe(pipe_cmplt);
  1.9299 +%}
  1.9300 +
  1.9301 +/* Better to save a register than avoid a branch */
  1.9302 +instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
  1.9303 +%{
  1.9304 +  match(Set y (AndI (CmpLTMask p q) y));
  1.9305 +  effect(KILL cr);
  1.9306 +
  1.9307 +  ins_cost(300);
  1.9308 +
  1.9309 +  format %{ "cmpl     $p, $q\t# and_cmpLTMask\n\t"
  1.9310 +            "jlt      done\n\t"
  1.9311 +            "xorl     $y, $y\n"
  1.9312 +            "done:  " %}
  1.9313 +  ins_encode %{
  1.9314 +    Register Rp = $p$$Register;
  1.9315 +    Register Rq = $q$$Register;
  1.9316 +    Register Ry = $y$$Register;
  1.9317 +    Label done;
  1.9318 +    __ cmpl(Rp, Rq);
  1.9319 +    __ jccb(Assembler::less, done);
  1.9320 +    __ xorl(Ry, Ry);
  1.9321 +    __ bind(done);
  1.9322 +  %}
  1.9323 +  ins_pipe(pipe_cmplt);
  1.9324 +%}
  1.9325 +
  1.9326 +
  1.9327 +//---------- FP Instructions------------------------------------------------
  1.9328 +
  1.9329 +instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
  1.9330 +%{
  1.9331 +  match(Set cr (CmpF src1 src2));
  1.9332 +
  1.9333 +  ins_cost(145);
  1.9334 +  format %{ "ucomiss $src1, $src2\n\t"
  1.9335 +            "jnp,s   exit\n\t"
  1.9336 +            "pushfq\t# saw NaN, set CF\n\t"
  1.9337 +            "andq    [rsp], #0xffffff2b\n\t"
  1.9338 +            "popfq\n"
  1.9339 +    "exit:" %}
  1.9340 +  ins_encode %{
  1.9341 +    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
  1.9342 +    emit_cmpfp_fixup(_masm);
  1.9343 +  %}
  1.9344 +  ins_pipe(pipe_slow);
  1.9345 +%}
  1.9346 +
  1.9347 +instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
  1.9348 +  match(Set cr (CmpF src1 src2));
  1.9349 +
  1.9350 +  ins_cost(100);
  1.9351 +  format %{ "ucomiss $src1, $src2" %}
  1.9352 +  ins_encode %{
  1.9353 +    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
  1.9354 +  %}
  1.9355 +  ins_pipe(pipe_slow);
  1.9356 +%}
  1.9357 +
  1.9358 +instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
  1.9359 +%{
  1.9360 +  match(Set cr (CmpF src1 (LoadF src2)));
  1.9361 +
  1.9362 +  ins_cost(145);
  1.9363 +  format %{ "ucomiss $src1, $src2\n\t"
  1.9364 +            "jnp,s   exit\n\t"
  1.9365 +            "pushfq\t# saw NaN, set CF\n\t"
  1.9366 +            "andq    [rsp], #0xffffff2b\n\t"
  1.9367 +            "popfq\n"
  1.9368 +    "exit:" %}
  1.9369 +  ins_encode %{
  1.9370 +    __ ucomiss($src1$$XMMRegister, $src2$$Address);
  1.9371 +    emit_cmpfp_fixup(_masm);
  1.9372 +  %}
  1.9373 +  ins_pipe(pipe_slow);
  1.9374 +%}
  1.9375 +
  1.9376 +instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
  1.9377 +  match(Set cr (CmpF src1 (LoadF src2)));
  1.9378 +
  1.9379 +  ins_cost(100);
  1.9380 +  format %{ "ucomiss $src1, $src2" %}
  1.9381 +  ins_encode %{
  1.9382 +    __ ucomiss($src1$$XMMRegister, $src2$$Address);
  1.9383 +  %}
  1.9384 +  ins_pipe(pipe_slow);
  1.9385 +%}
  1.9386 +
  1.9387 +instruct cmpF_cc_imm(rFlagsRegU cr, regF src, immF con) %{
  1.9388 +  match(Set cr (CmpF src con));
  1.9389 +
  1.9390 +  ins_cost(145);
  1.9391 +  format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
  1.9392 +            "jnp,s   exit\n\t"
  1.9393 +            "pushfq\t# saw NaN, set CF\n\t"
  1.9394 +            "andq    [rsp], #0xffffff2b\n\t"
  1.9395 +            "popfq\n"
  1.9396 +    "exit:" %}
  1.9397 +  ins_encode %{
  1.9398 +    __ ucomiss($src$$XMMRegister, $constantaddress($con));
  1.9399 +    emit_cmpfp_fixup(_masm);
  1.9400 +  %}
  1.9401 +  ins_pipe(pipe_slow);
  1.9402 +%}
  1.9403 +
  1.9404 +instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
  1.9405 +  match(Set cr (CmpF src con));
  1.9406 +  ins_cost(100);
  1.9407 +  format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
  1.9408 +  ins_encode %{
  1.9409 +    __ ucomiss($src$$XMMRegister, $constantaddress($con));
  1.9410 +  %}
  1.9411 +  ins_pipe(pipe_slow);
  1.9412 +%}
  1.9413 +
  1.9414 +instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
  1.9415 +%{
  1.9416 +  match(Set cr (CmpD src1 src2));
  1.9417 +
  1.9418 +  ins_cost(145);
  1.9419 +  format %{ "ucomisd $src1, $src2\n\t"
  1.9420 +            "jnp,s   exit\n\t"
  1.9421 +            "pushfq\t# saw NaN, set CF\n\t"
  1.9422 +            "andq    [rsp], #0xffffff2b\n\t"
  1.9423 +            "popfq\n"
  1.9424 +    "exit:" %}
  1.9425 +  ins_encode %{
  1.9426 +    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
  1.9427 +    emit_cmpfp_fixup(_masm);
  1.9428 +  %}
  1.9429 +  ins_pipe(pipe_slow);
  1.9430 +%}
  1.9431 +
  1.9432 +instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
  1.9433 +  match(Set cr (CmpD src1 src2));
  1.9434 +
  1.9435 +  ins_cost(100);
  1.9436 +  format %{ "ucomisd $src1, $src2 test" %}
  1.9437 +  ins_encode %{
  1.9438 +    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
  1.9439 +  %}
  1.9440 +  ins_pipe(pipe_slow);
  1.9441 +%}
  1.9442 +
  1.9443 +instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
  1.9444 +%{
  1.9445 +  match(Set cr (CmpD src1 (LoadD src2)));
  1.9446 +
  1.9447 +  ins_cost(145);
  1.9448 +  format %{ "ucomisd $src1, $src2\n\t"
  1.9449 +            "jnp,s   exit\n\t"
  1.9450 +            "pushfq\t# saw NaN, set CF\n\t"
  1.9451 +            "andq    [rsp], #0xffffff2b\n\t"
  1.9452 +            "popfq\n"
  1.9453 +    "exit:" %}
  1.9454 +  ins_encode %{
  1.9455 +    __ ucomisd($src1$$XMMRegister, $src2$$Address);
  1.9456 +    emit_cmpfp_fixup(_masm);
  1.9457 +  %}
  1.9458 +  ins_pipe(pipe_slow);
  1.9459 +%}
  1.9460 +
  1.9461 +instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
  1.9462 +  match(Set cr (CmpD src1 (LoadD src2)));
  1.9463 +
  1.9464 +  ins_cost(100);
  1.9465 +  format %{ "ucomisd $src1, $src2" %}
  1.9466 +  ins_encode %{
  1.9467 +    __ ucomisd($src1$$XMMRegister, $src2$$Address);
  1.9468 +  %}
  1.9469 +  ins_pipe(pipe_slow);
  1.9470 +%}
  1.9471 +
  1.9472 +instruct cmpD_cc_imm(rFlagsRegU cr, regD src, immD con) %{
  1.9473 +  match(Set cr (CmpD src con));
  1.9474 +
  1.9475 +  ins_cost(145);
  1.9476 +  format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
  1.9477 +            "jnp,s   exit\n\t"
  1.9478 +            "pushfq\t# saw NaN, set CF\n\t"
  1.9479 +            "andq    [rsp], #0xffffff2b\n\t"
  1.9480 +            "popfq\n"
  1.9481 +    "exit:" %}
  1.9482 +  ins_encode %{
  1.9483 +    __ ucomisd($src$$XMMRegister, $constantaddress($con));
  1.9484 +    emit_cmpfp_fixup(_masm);
  1.9485 +  %}
  1.9486 +  ins_pipe(pipe_slow);
  1.9487 +%}
  1.9488 +
  1.9489 +instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
  1.9490 +  match(Set cr (CmpD src con));
  1.9491 +  ins_cost(100);
  1.9492 +  format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
  1.9493 +  ins_encode %{
  1.9494 +    __ ucomisd($src$$XMMRegister, $constantaddress($con));
  1.9495 +  %}
  1.9496 +  ins_pipe(pipe_slow);
  1.9497 +%}
  1.9498 +
  1.9499 +// Compare into -1,0,1
  1.9500 +instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
  1.9501 +%{
  1.9502 +  match(Set dst (CmpF3 src1 src2));
  1.9503 +  effect(KILL cr);
  1.9504 +
  1.9505 +  ins_cost(275);
  1.9506 +  format %{ "ucomiss $src1, $src2\n\t"
  1.9507 +            "movl    $dst, #-1\n\t"
  1.9508 +            "jp,s    done\n\t"
  1.9509 +            "jb,s    done\n\t"
  1.9510 +            "setne   $dst\n\t"
  1.9511 +            "movzbl  $dst, $dst\n"
  1.9512 +    "done:" %}
  1.9513 +  ins_encode %{
  1.9514 +    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
  1.9515 +    emit_cmpfp3(_masm, $dst$$Register);
  1.9516 +  %}
  1.9517 +  ins_pipe(pipe_slow);
  1.9518 +%}
  1.9519 +
  1.9520 +// Compare into -1,0,1
  1.9521 +instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
  1.9522 +%{
  1.9523 +  match(Set dst (CmpF3 src1 (LoadF src2)));
  1.9524 +  effect(KILL cr);
  1.9525 +
  1.9526 +  ins_cost(275);
  1.9527 +  format %{ "ucomiss $src1, $src2\n\t"
  1.9528 +            "movl    $dst, #-1\n\t"
  1.9529 +            "jp,s    done\n\t"
  1.9530 +            "jb,s    done\n\t"
  1.9531 +            "setne   $dst\n\t"
  1.9532 +            "movzbl  $dst, $dst\n"
  1.9533 +    "done:" %}
  1.9534 +  ins_encode %{
  1.9535 +    __ ucomiss($src1$$XMMRegister, $src2$$Address);
  1.9536 +    emit_cmpfp3(_masm, $dst$$Register);
  1.9537 +  %}
  1.9538 +  ins_pipe(pipe_slow);
  1.9539 +%}
  1.9540 +
  1.9541 +// Compare into -1,0,1
  1.9542 +instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
  1.9543 +  match(Set dst (CmpF3 src con));
  1.9544 +  effect(KILL cr);
  1.9545 +
  1.9546 +  ins_cost(275);
  1.9547 +  format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
  1.9548 +            "movl    $dst, #-1\n\t"
  1.9549 +            "jp,s    done\n\t"
  1.9550 +            "jb,s    done\n\t"
  1.9551 +            "setne   $dst\n\t"
  1.9552 +            "movzbl  $dst, $dst\n"
  1.9553 +    "done:" %}
  1.9554 +  ins_encode %{
  1.9555 +    __ ucomiss($src$$XMMRegister, $constantaddress($con));
  1.9556 +    emit_cmpfp3(_masm, $dst$$Register);
  1.9557 +  %}
  1.9558 +  ins_pipe(pipe_slow);
  1.9559 +%}
  1.9560 +
  1.9561 +// Compare into -1,0,1
  1.9562 +instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
  1.9563 +%{
  1.9564 +  match(Set dst (CmpD3 src1 src2));
  1.9565 +  effect(KILL cr);
  1.9566 +
  1.9567 +  ins_cost(275);
  1.9568 +  format %{ "ucomisd $src1, $src2\n\t"
  1.9569 +            "movl    $dst, #-1\n\t"
  1.9570 +            "jp,s    done\n\t"
  1.9571 +            "jb,s    done\n\t"
  1.9572 +            "setne   $dst\n\t"
  1.9573 +            "movzbl  $dst, $dst\n"
  1.9574 +    "done:" %}
  1.9575 +  ins_encode %{
  1.9576 +    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
  1.9577 +    emit_cmpfp3(_masm, $dst$$Register);
  1.9578 +  %}
  1.9579 +  ins_pipe(pipe_slow);
  1.9580 +%}
  1.9581 +
  1.9582 +// Compare into -1,0,1
  1.9583 +instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
  1.9584 +%{
  1.9585 +  match(Set dst (CmpD3 src1 (LoadD src2)));
  1.9586 +  effect(KILL cr);
  1.9587 +
  1.9588 +  ins_cost(275);
  1.9589 +  format %{ "ucomisd $src1, $src2\n\t"
  1.9590 +            "movl    $dst, #-1\n\t"
  1.9591 +            "jp,s    done\n\t"
  1.9592 +            "jb,s    done\n\t"
  1.9593 +            "setne   $dst\n\t"
  1.9594 +            "movzbl  $dst, $dst\n"
  1.9595 +    "done:" %}
  1.9596 +  ins_encode %{
  1.9597 +    __ ucomisd($src1$$XMMRegister, $src2$$Address);
  1.9598 +    emit_cmpfp3(_masm, $dst$$Register);
  1.9599 +  %}
  1.9600 +  ins_pipe(pipe_slow);
  1.9601 +%}
  1.9602 +
  1.9603 +// Compare into -1,0,1
  1.9604 +instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
  1.9605 +  match(Set dst (CmpD3 src con));
  1.9606 +  effect(KILL cr);
  1.9607 +
  1.9608 +  ins_cost(275);
  1.9609 +  format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
  1.9610 +            "movl    $dst, #-1\n\t"
  1.9611 +            "jp,s    done\n\t"
  1.9612 +            "jb,s    done\n\t"
  1.9613 +            "setne   $dst\n\t"
  1.9614 +            "movzbl  $dst, $dst\n"
  1.9615 +    "done:" %}
  1.9616 +  ins_encode %{
  1.9617 +    __ ucomisd($src$$XMMRegister, $constantaddress($con));
  1.9618 +    emit_cmpfp3(_masm, $dst$$Register);
  1.9619 +  %}
  1.9620 +  ins_pipe(pipe_slow);
  1.9621 +%}
  1.9622 +
  1.9623 +// -----------Trig and Trancendental Instructions------------------------------
  1.9624 +instruct cosD_reg(regD dst) %{
  1.9625 +  match(Set dst (CosD dst));
  1.9626 +
  1.9627 +  format %{ "dcos   $dst\n\t" %}
  1.9628 +  opcode(0xD9, 0xFF);
  1.9629 +  ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
  1.9630 +  ins_pipe( pipe_slow );
  1.9631 +%}
  1.9632 +
  1.9633 +instruct sinD_reg(regD dst) %{
  1.9634 +  match(Set dst (SinD dst));
  1.9635 +
  1.9636 +  format %{ "dsin   $dst\n\t" %}
  1.9637 +  opcode(0xD9, 0xFE);
  1.9638 +  ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
  1.9639 +  ins_pipe( pipe_slow );
  1.9640 +%}
  1.9641 +
  1.9642 +instruct tanD_reg(regD dst) %{
  1.9643 +  match(Set dst (TanD dst));
  1.9644 +
  1.9645 +  format %{ "dtan   $dst\n\t" %}
  1.9646 +  ins_encode( Push_SrcXD(dst),
  1.9647 +              Opcode(0xD9), Opcode(0xF2),   //fptan
  1.9648 +              Opcode(0xDD), Opcode(0xD8),   //fstp st
  1.9649 +              Push_ResultXD(dst) );
  1.9650 +  ins_pipe( pipe_slow );
  1.9651 +%}
  1.9652 +
  1.9653 +instruct log10D_reg(regD dst) %{
  1.9654 +  // The source and result Double operands in XMM registers
  1.9655 +  match(Set dst (Log10D dst));
  1.9656 +  // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
  1.9657 +  // fyl2x        ; compute log_10(2) * log_2(x)
  1.9658 +  format %{ "fldlg2\t\t\t#Log10\n\t"
  1.9659 +            "fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
  1.9660 +         %}
  1.9661 +   ins_encode(Opcode(0xD9), Opcode(0xEC),   // fldlg2
  1.9662 +              Push_SrcXD(dst),
  1.9663 +              Opcode(0xD9), Opcode(0xF1),   // fyl2x
  1.9664 +              Push_ResultXD(dst));
  1.9665 +
  1.9666 +  ins_pipe( pipe_slow );
  1.9667 +%}
  1.9668 +
  1.9669 +instruct logD_reg(regD dst) %{
  1.9670 +  // The source and result Double operands in XMM registers
  1.9671 +  match(Set dst (LogD dst));
  1.9672 +  // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
  1.9673 +  // fyl2x        ; compute log_e(2) * log_2(x)
  1.9674 +  format %{ "fldln2\t\t\t#Log_e\n\t"
  1.9675 +            "fyl2x\t\t\t# Q=Log_e*Log_2(x)\n\t"
  1.9676 +         %}
  1.9677 +  ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
  1.9678 +              Push_SrcXD(dst),
  1.9679 +              Opcode(0xD9), Opcode(0xF1),   // fyl2x
  1.9680 +              Push_ResultXD(dst));
  1.9681 +  ins_pipe( pipe_slow );
  1.9682 +%}
  1.9683 +
  1.9684 +instruct powD_reg(regD dst, regD src0, regD src1, rax_RegI rax, rdx_RegI rdx, rcx_RegI rcx, rFlagsReg cr) %{
  1.9685 +  match(Set dst (PowD src0 src1));  // Raise src0 to the src1'th power
  1.9686 +  effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
  1.9687 +  format %{ "fast_pow $src0 $src1 -> $dst  // KILL $rax, $rcx, $rdx" %}
  1.9688 +  ins_encode %{
  1.9689 +    __ subptr(rsp, 8);
  1.9690 +    __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
  1.9691 +    __ fld_d(Address(rsp, 0));
  1.9692 +    __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
  1.9693 +    __ fld_d(Address(rsp, 0));
  1.9694 +    __ fast_pow();
  1.9695 +    __ fstp_d(Address(rsp, 0));
  1.9696 +    __ movdbl($dst$$XMMRegister, Address(rsp, 0));
  1.9697 +    __ addptr(rsp, 8);
  1.9698 +  %}
  1.9699 +  ins_pipe( pipe_slow );
  1.9700 +%}
  1.9701 +
  1.9702 +instruct expD_reg(regD dst, regD src, rax_RegI rax, rdx_RegI rdx, rcx_RegI rcx, rFlagsReg cr) %{
  1.9703 +  match(Set dst (ExpD src));
  1.9704 +  effect(KILL rax, KILL rcx, KILL rdx, KILL cr);
  1.9705 +  format %{ "fast_exp $dst -> $src  // KILL $rax, $rcx, $rdx" %}
  1.9706 +  ins_encode %{
  1.9707 +    __ subptr(rsp, 8);
  1.9708 +    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
  1.9709 +    __ fld_d(Address(rsp, 0));
  1.9710 +    __ fast_exp();
  1.9711 +    __ fstp_d(Address(rsp, 0));
  1.9712 +    __ movdbl($dst$$XMMRegister, Address(rsp, 0));
  1.9713 +    __ addptr(rsp, 8);
  1.9714 +  %}
  1.9715 +  ins_pipe( pipe_slow );
  1.9716 +%}
  1.9717 +
  1.9718 +//----------Arithmetic Conversion Instructions---------------------------------
  1.9719 +
  1.9720 +instruct roundFloat_nop(regF dst)
  1.9721 +%{
  1.9722 +  match(Set dst (RoundFloat dst));
  1.9723 +
  1.9724 +  ins_cost(0);
  1.9725 +  ins_encode();
  1.9726 +  ins_pipe(empty);
  1.9727 +%}
  1.9728 +
  1.9729 +instruct roundDouble_nop(regD dst)
  1.9730 +%{
  1.9731 +  match(Set dst (RoundDouble dst));
  1.9732 +
  1.9733 +  ins_cost(0);
  1.9734 +  ins_encode();
  1.9735 +  ins_pipe(empty);
  1.9736 +%}
  1.9737 +
  1.9738 +instruct convF2D_reg_reg(regD dst, regF src)
  1.9739 +%{
  1.9740 +  match(Set dst (ConvF2D src));
  1.9741 +
  1.9742 +  format %{ "cvtss2sd $dst, $src" %}
  1.9743 +  ins_encode %{
  1.9744 +    __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
  1.9745 +  %}
  1.9746 +  ins_pipe(pipe_slow); // XXX
  1.9747 +%}
  1.9748 +
  1.9749 +instruct convF2D_reg_mem(regD dst, memory src)
  1.9750 +%{
  1.9751 +  match(Set dst (ConvF2D (LoadF src)));
  1.9752 +
  1.9753 +  format %{ "cvtss2sd $dst, $src" %}
  1.9754 +  ins_encode %{
  1.9755 +    __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
  1.9756 +  %}
  1.9757 +  ins_pipe(pipe_slow); // XXX
  1.9758 +%}
  1.9759 +
  1.9760 +instruct convD2F_reg_reg(regF dst, regD src)
  1.9761 +%{
  1.9762 +  match(Set dst (ConvD2F src));
  1.9763 +
  1.9764 +  format %{ "cvtsd2ss $dst, $src" %}
  1.9765 +  ins_encode %{
  1.9766 +    __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
  1.9767 +  %}
  1.9768 +  ins_pipe(pipe_slow); // XXX
  1.9769 +%}
  1.9770 +
  1.9771 +instruct convD2F_reg_mem(regF dst, memory src)
  1.9772 +%{
  1.9773 +  match(Set dst (ConvD2F (LoadD src)));
  1.9774 +
  1.9775 +  format %{ "cvtsd2ss $dst, $src" %}
  1.9776 +  ins_encode %{
  1.9777 +    __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
  1.9778 +  %}
  1.9779 +  ins_pipe(pipe_slow); // XXX
  1.9780 +%}
  1.9781 +
  1.9782 +// XXX do mem variants
  1.9783 +instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
  1.9784 +%{
  1.9785 +  match(Set dst (ConvF2I src));
  1.9786 +  effect(KILL cr);
  1.9787 +
  1.9788 +  format %{ "cvttss2sil $dst, $src\t# f2i\n\t"
  1.9789 +            "cmpl    $dst, #0x80000000\n\t"
  1.9790 +            "jne,s   done\n\t"
  1.9791 +            "subq    rsp, #8\n\t"
  1.9792 +            "movss   [rsp], $src\n\t"
  1.9793 +            "call    f2i_fixup\n\t"
  1.9794 +            "popq    $dst\n"
  1.9795 +    "done:   "%}
  1.9796 +  ins_encode %{
  1.9797 +    Label done;
  1.9798 +    __ cvttss2sil($dst$$Register, $src$$XMMRegister);
  1.9799 +    __ cmpl($dst$$Register, 0x80000000);
  1.9800 +    __ jccb(Assembler::notEqual, done);
  1.9801 +    __ subptr(rsp, 8);
  1.9802 +    __ movflt(Address(rsp, 0), $src$$XMMRegister);
  1.9803 +    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2i_fixup())));
  1.9804 +    __ pop($dst$$Register);
  1.9805 +    __ bind(done);
  1.9806 +  %}
  1.9807 +  ins_pipe(pipe_slow);
  1.9808 +%}
  1.9809 +
  1.9810 +instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
  1.9811 +%{
  1.9812 +  match(Set dst (ConvF2L src));
  1.9813 +  effect(KILL cr);
  1.9814 +
  1.9815 +  format %{ "cvttss2siq $dst, $src\t# f2l\n\t"
  1.9816 +            "cmpq    $dst, [0x8000000000000000]\n\t"
  1.9817 +            "jne,s   done\n\t"
  1.9818 +            "subq    rsp, #8\n\t"
  1.9819 +            "movss   [rsp], $src\n\t"
  1.9820 +            "call    f2l_fixup\n\t"
  1.9821 +            "popq    $dst\n"
  1.9822 +    "done:   "%}
  1.9823 +  ins_encode %{
  1.9824 +    Label done;
  1.9825 +    __ cvttss2siq($dst$$Register, $src$$XMMRegister);
  1.9826 +    __ cmp64($dst$$Register,
  1.9827 +             ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
  1.9828 +    __ jccb(Assembler::notEqual, done);
  1.9829 +    __ subptr(rsp, 8);
  1.9830 +    __ movflt(Address(rsp, 0), $src$$XMMRegister);
  1.9831 +    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2l_fixup())));
  1.9832 +    __ pop($dst$$Register);
  1.9833 +    __ bind(done);
  1.9834 +  %}
  1.9835 +  ins_pipe(pipe_slow);
  1.9836 +%}
  1.9837 +
  1.9838 +instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
  1.9839 +%{
  1.9840 +  match(Set dst (ConvD2I src));
  1.9841 +  effect(KILL cr);
  1.9842 +
  1.9843 +  format %{ "cvttsd2sil $dst, $src\t# d2i\n\t"
  1.9844 +            "cmpl    $dst, #0x80000000\n\t"
  1.9845 +            "jne,s   done\n\t"
  1.9846 +            "subq    rsp, #8\n\t"
  1.9847 +            "movsd   [rsp], $src\n\t"
  1.9848 +            "call    d2i_fixup\n\t"
  1.9849 +            "popq    $dst\n"
  1.9850 +    "done:   "%}
  1.9851 +  ins_encode %{
  1.9852 +    Label done;
  1.9853 +    __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
  1.9854 +    __ cmpl($dst$$Register, 0x80000000);
  1.9855 +    __ jccb(Assembler::notEqual, done);
  1.9856 +    __ subptr(rsp, 8);
  1.9857 +    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
  1.9858 +    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_fixup())));
  1.9859 +    __ pop($dst$$Register);
  1.9860 +    __ bind(done);
  1.9861 +  %}
  1.9862 +  ins_pipe(pipe_slow);
  1.9863 +%}
  1.9864 +
  1.9865 +instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
  1.9866 +%{
  1.9867 +  match(Set dst (ConvD2L src));
  1.9868 +  effect(KILL cr);
  1.9869 +
  1.9870 +  format %{ "cvttsd2siq $dst, $src\t# d2l\n\t"
  1.9871 +            "cmpq    $dst, [0x8000000000000000]\n\t"
  1.9872 +            "jne,s   done\n\t"
  1.9873 +            "subq    rsp, #8\n\t"
  1.9874 +            "movsd   [rsp], $src\n\t"
  1.9875 +            "call    d2l_fixup\n\t"
  1.9876 +            "popq    $dst\n"
  1.9877 +    "done:   "%}
  1.9878 +  ins_encode %{
  1.9879 +    Label done;
  1.9880 +    __ cvttsd2siq($dst$$Register, $src$$XMMRegister);
  1.9881 +    __ cmp64($dst$$Register,
  1.9882 +             ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
  1.9883 +    __ jccb(Assembler::notEqual, done);
  1.9884 +    __ subptr(rsp, 8);
  1.9885 +    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
  1.9886 +    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_fixup())));
  1.9887 +    __ pop($dst$$Register);
  1.9888 +    __ bind(done);
  1.9889 +  %}
  1.9890 +  ins_pipe(pipe_slow);
  1.9891 +%}
  1.9892 +
  1.9893 +instruct convI2F_reg_reg(regF dst, rRegI src)
  1.9894 +%{
  1.9895 +  predicate(!UseXmmI2F);
  1.9896 +  match(Set dst (ConvI2F src));
  1.9897 +
  1.9898 +  format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
  1.9899 +  ins_encode %{
  1.9900 +    __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
  1.9901 +  %}
  1.9902 +  ins_pipe(pipe_slow); // XXX
  1.9903 +%}
  1.9904 +
  1.9905 +instruct convI2F_reg_mem(regF dst, memory src)
  1.9906 +%{
  1.9907 +  match(Set dst (ConvI2F (LoadI src)));
  1.9908 +
  1.9909 +  format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
  1.9910 +  ins_encode %{
  1.9911 +    __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
  1.9912 +  %}
  1.9913 +  ins_pipe(pipe_slow); // XXX
  1.9914 +%}
  1.9915 +
  1.9916 +instruct convI2D_reg_reg(regD dst, rRegI src)
  1.9917 +%{
  1.9918 +  predicate(!UseXmmI2D);
  1.9919 +  match(Set dst (ConvI2D src));
  1.9920 +
  1.9921 +  format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
  1.9922 +  ins_encode %{
  1.9923 +    __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
  1.9924 +  %}
  1.9925 +  ins_pipe(pipe_slow); // XXX
  1.9926 +%}
  1.9927 +
  1.9928 +instruct convI2D_reg_mem(regD dst, memory src)
  1.9929 +%{
  1.9930 +  match(Set dst (ConvI2D (LoadI src)));
  1.9931 +
  1.9932 +  format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
  1.9933 +  ins_encode %{
  1.9934 +    __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
  1.9935 +  %}
  1.9936 +  ins_pipe(pipe_slow); // XXX
  1.9937 +%}
  1.9938 +
  1.9939 +instruct convXI2F_reg(regF dst, rRegI src)
  1.9940 +%{
  1.9941 +  predicate(UseXmmI2F);
  1.9942 +  match(Set dst (ConvI2F src));
  1.9943 +
  1.9944 +  format %{ "movdl $dst, $src\n\t"
  1.9945 +            "cvtdq2psl $dst, $dst\t# i2f" %}
  1.9946 +  ins_encode %{
  1.9947 +    __ movdl($dst$$XMMRegister, $src$$Register);
  1.9948 +    __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
  1.9949 +  %}
  1.9950 +  ins_pipe(pipe_slow); // XXX
  1.9951 +%}
  1.9952 +
  1.9953 +instruct convXI2D_reg(regD dst, rRegI src)
  1.9954 +%{
  1.9955 +  predicate(UseXmmI2D);
  1.9956 +  match(Set dst (ConvI2D src));
  1.9957 +
  1.9958 +  format %{ "movdl $dst, $src\n\t"
  1.9959 +            "cvtdq2pdl $dst, $dst\t# i2d" %}
  1.9960 +  ins_encode %{
  1.9961 +    __ movdl($dst$$XMMRegister, $src$$Register);
  1.9962 +    __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
  1.9963 +  %}
  1.9964 +  ins_pipe(pipe_slow); // XXX
  1.9965 +%}
  1.9966 +
  1.9967 +instruct convL2F_reg_reg(regF dst, rRegL src)
  1.9968 +%{
  1.9969 +  match(Set dst (ConvL2F src));
  1.9970 +
  1.9971 +  format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
  1.9972 +  ins_encode %{
  1.9973 +    __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
  1.9974 +  %}
  1.9975 +  ins_pipe(pipe_slow); // XXX
  1.9976 +%}
  1.9977 +
  1.9978 +instruct convL2F_reg_mem(regF dst, memory src)
  1.9979 +%{
  1.9980 +  match(Set dst (ConvL2F (LoadL src)));
  1.9981 +
  1.9982 +  format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
  1.9983 +  ins_encode %{
  1.9984 +    __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
  1.9985 +  %}
  1.9986 +  ins_pipe(pipe_slow); // XXX
  1.9987 +%}
  1.9988 +
  1.9989 +instruct convL2D_reg_reg(regD dst, rRegL src)
  1.9990 +%{
  1.9991 +  match(Set dst (ConvL2D src));
  1.9992 +
  1.9993 +  format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
  1.9994 +  ins_encode %{
  1.9995 +    __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
  1.9996 +  %}
  1.9997 +  ins_pipe(pipe_slow); // XXX
  1.9998 +%}
  1.9999 +
 1.10000 +instruct convL2D_reg_mem(regD dst, memory src)
 1.10001 +%{
 1.10002 +  match(Set dst (ConvL2D (LoadL src)));
 1.10003 +
 1.10004 +  format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
 1.10005 +  ins_encode %{
 1.10006 +    __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
 1.10007 +  %}
 1.10008 +  ins_pipe(pipe_slow); // XXX
 1.10009 +%}
 1.10010 +
 1.10011 +instruct convI2L_reg_reg(rRegL dst, rRegI src)
 1.10012 +%{
 1.10013 +  match(Set dst (ConvI2L src));
 1.10014 +
 1.10015 +  ins_cost(125);
 1.10016 +  format %{ "movslq  $dst, $src\t# i2l" %}
 1.10017 +  ins_encode %{
 1.10018 +    __ movslq($dst$$Register, $src$$Register);
 1.10019 +  %}
 1.10020 +  ins_pipe(ialu_reg_reg);
 1.10021 +%}
 1.10022 +
 1.10023 +// instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
 1.10024 +// %{
 1.10025 +//   match(Set dst (ConvI2L src));
 1.10026 +// //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
 1.10027 +// //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
 1.10028 +//   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
 1.10029 +//             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
 1.10030 +//             ((const TypeNode*) n)->type()->is_long()->_lo ==
 1.10031 +//             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
 1.10032 +
 1.10033 +//   format %{ "movl    $dst, $src\t# unsigned i2l" %}
 1.10034 +//   ins_encode(enc_copy(dst, src));
 1.10035 +// //   opcode(0x63); // needs REX.W
 1.10036 +// //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
 1.10037 +//   ins_pipe(ialu_reg_reg);
 1.10038 +// %}
 1.10039 +
 1.10040 +// Zero-extend convert int to long
 1.10041 +instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
 1.10042 +%{
 1.10043 +  match(Set dst (AndL (ConvI2L src) mask));
 1.10044 +
 1.10045 +  format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
 1.10046 +  ins_encode %{
 1.10047 +    if ($dst$$reg != $src$$reg) {
 1.10048 +      __ movl($dst$$Register, $src$$Register);
 1.10049 +    }
 1.10050 +  %}
 1.10051 +  ins_pipe(ialu_reg_reg);
 1.10052 +%}
 1.10053 +
 1.10054 +// Zero-extend convert int to long
 1.10055 +instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
 1.10056 +%{
 1.10057 +  match(Set dst (AndL (ConvI2L (LoadI src)) mask));
 1.10058 +
 1.10059 +  format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
 1.10060 +  ins_encode %{
 1.10061 +    __ movl($dst$$Register, $src$$Address);
 1.10062 +  %}
 1.10063 +  ins_pipe(ialu_reg_mem);
 1.10064 +%}
 1.10065 +
 1.10066 +instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
 1.10067 +%{
 1.10068 +  match(Set dst (AndL src mask));
 1.10069 +
 1.10070 +  format %{ "movl    $dst, $src\t# zero-extend long" %}
 1.10071 +  ins_encode %{
 1.10072 +    __ movl($dst$$Register, $src$$Register);
 1.10073 +  %}
 1.10074 +  ins_pipe(ialu_reg_reg);
 1.10075 +%}
 1.10076 +
 1.10077 +instruct convL2I_reg_reg(rRegI dst, rRegL src)
 1.10078 +%{
 1.10079 +  match(Set dst (ConvL2I src));
 1.10080 +
 1.10081 +  format %{ "movl    $dst, $src\t# l2i" %}
 1.10082 +  ins_encode %{
 1.10083 +    __ movl($dst$$Register, $src$$Register);
 1.10084 +  %}
 1.10085 +  ins_pipe(ialu_reg_reg);
 1.10086 +%}
 1.10087 +
 1.10088 +
 1.10089 +instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
 1.10090 +  match(Set dst (MoveF2I src));
 1.10091 +  effect(DEF dst, USE src);
 1.10092 +
 1.10093 +  ins_cost(125);
 1.10094 +  format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
 1.10095 +  ins_encode %{
 1.10096 +    __ movl($dst$$Register, Address(rsp, $src$$disp));
 1.10097 +  %}
 1.10098 +  ins_pipe(ialu_reg_mem);
 1.10099 +%}
 1.10100 +
 1.10101 +instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
 1.10102 +  match(Set dst (MoveI2F src));
 1.10103 +  effect(DEF dst, USE src);
 1.10104 +
 1.10105 +  ins_cost(125);
 1.10106 +  format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
 1.10107 +  ins_encode %{
 1.10108 +    __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
 1.10109 +  %}
 1.10110 +  ins_pipe(pipe_slow);
 1.10111 +%}
 1.10112 +
 1.10113 +instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
 1.10114 +  match(Set dst (MoveD2L src));
 1.10115 +  effect(DEF dst, USE src);
 1.10116 +
 1.10117 +  ins_cost(125);
 1.10118 +  format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
 1.10119 +  ins_encode %{
 1.10120 +    __ movq($dst$$Register, Address(rsp, $src$$disp));
 1.10121 +  %}
 1.10122 +  ins_pipe(ialu_reg_mem);
 1.10123 +%}
 1.10124 +
 1.10125 +instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
 1.10126 +  predicate(!UseXmmLoadAndClearUpper);
 1.10127 +  match(Set dst (MoveL2D src));
 1.10128 +  effect(DEF dst, USE src);
 1.10129 +
 1.10130 +  ins_cost(125);
 1.10131 +  format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
 1.10132 +  ins_encode %{
 1.10133 +    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
 1.10134 +  %}
 1.10135 +  ins_pipe(pipe_slow);
 1.10136 +%}
 1.10137 +
 1.10138 +instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
 1.10139 +  predicate(UseXmmLoadAndClearUpper);
 1.10140 +  match(Set dst (MoveL2D src));
 1.10141 +  effect(DEF dst, USE src);
 1.10142 +
 1.10143 +  ins_cost(125);
 1.10144 +  format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
 1.10145 +  ins_encode %{
 1.10146 +    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
 1.10147 +  %}
 1.10148 +  ins_pipe(pipe_slow);
 1.10149 +%}
 1.10150 +
 1.10151 +
 1.10152 +instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
 1.10153 +  match(Set dst (MoveF2I src));
 1.10154 +  effect(DEF dst, USE src);
 1.10155 +
 1.10156 +  ins_cost(95); // XXX
 1.10157 +  format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
 1.10158 +  ins_encode %{
 1.10159 +    __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
 1.10160 +  %}
 1.10161 +  ins_pipe(pipe_slow);
 1.10162 +%}
 1.10163 +
 1.10164 +instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
 1.10165 +  match(Set dst (MoveI2F src));
 1.10166 +  effect(DEF dst, USE src);
 1.10167 +
 1.10168 +  ins_cost(100);
 1.10169 +  format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
 1.10170 +  ins_encode %{
 1.10171 +    __ movl(Address(rsp, $dst$$disp), $src$$Register);
 1.10172 +  %}
 1.10173 +  ins_pipe( ialu_mem_reg );
 1.10174 +%}
 1.10175 +
 1.10176 +instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
 1.10177 +  match(Set dst (MoveD2L src));
 1.10178 +  effect(DEF dst, USE src);
 1.10179 +
 1.10180 +  ins_cost(95); // XXX
 1.10181 +  format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
 1.10182 +  ins_encode %{
 1.10183 +    __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
 1.10184 +  %}
 1.10185 +  ins_pipe(pipe_slow);
 1.10186 +%}
 1.10187 +
 1.10188 +instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
 1.10189 +  match(Set dst (MoveL2D src));
 1.10190 +  effect(DEF dst, USE src);
 1.10191 +
 1.10192 +  ins_cost(100);
 1.10193 +  format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
 1.10194 +  ins_encode %{
 1.10195 +    __ movq(Address(rsp, $dst$$disp), $src$$Register);
 1.10196 +  %}
 1.10197 +  ins_pipe(ialu_mem_reg);
 1.10198 +%}
 1.10199 +
 1.10200 +instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
 1.10201 +  match(Set dst (MoveF2I src));
 1.10202 +  effect(DEF dst, USE src);
 1.10203 +  ins_cost(85);
 1.10204 +  format %{ "movd    $dst,$src\t# MoveF2I" %}
 1.10205 +  ins_encode %{
 1.10206 +    __ movdl($dst$$Register, $src$$XMMRegister);
 1.10207 +  %}
 1.10208 +  ins_pipe( pipe_slow );
 1.10209 +%}
 1.10210 +
 1.10211 +instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
 1.10212 +  match(Set dst (MoveD2L src));
 1.10213 +  effect(DEF dst, USE src);
 1.10214 +  ins_cost(85);
 1.10215 +  format %{ "movd    $dst,$src\t# MoveD2L" %}
 1.10216 +  ins_encode %{
 1.10217 +    __ movdq($dst$$Register, $src$$XMMRegister);
 1.10218 +  %}
 1.10219 +  ins_pipe( pipe_slow );
 1.10220 +%}
 1.10221 +
 1.10222 +instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
 1.10223 +  match(Set dst (MoveI2F src));
 1.10224 +  effect(DEF dst, USE src);
 1.10225 +  ins_cost(100);
 1.10226 +  format %{ "movd    $dst,$src\t# MoveI2F" %}
 1.10227 +  ins_encode %{
 1.10228 +    __ movdl($dst$$XMMRegister, $src$$Register);
 1.10229 +  %}
 1.10230 +  ins_pipe( pipe_slow );
 1.10231 +%}
 1.10232 +
 1.10233 +instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
 1.10234 +  match(Set dst (MoveL2D src));
 1.10235 +  effect(DEF dst, USE src);
 1.10236 +  ins_cost(100);
 1.10237 +  format %{ "movd    $dst,$src\t# MoveL2D" %}
 1.10238 +  ins_encode %{
 1.10239 +     __ movdq($dst$$XMMRegister, $src$$Register);
 1.10240 +  %}
 1.10241 +  ins_pipe( pipe_slow );
 1.10242 +%}
 1.10243 +
 1.10244 +
 1.10245 +// =======================================================================
 1.10246 +// fast clearing of an array
 1.10247 +instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
 1.10248 +                  rFlagsReg cr)
 1.10249 +%{
 1.10250 +  predicate(!UseFastStosb);
 1.10251 +  match(Set dummy (ClearArray cnt base));
 1.10252 +  effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
 1.10253 +
 1.10254 +  format %{ "xorq    rax, rax\t# ClearArray:\n\t"
 1.10255 +            "rep     stosq\t# Store rax to *rdi++ while rcx--" %}
 1.10256 +  ins_encode %{ 
 1.10257 +    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
 1.10258 +  %}
 1.10259 +  ins_pipe(pipe_slow);
 1.10260 +%}
 1.10261 +
 1.10262 +instruct rep_fast_stosb(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
 1.10263 +                        rFlagsReg cr)
 1.10264 +%{
 1.10265 +  predicate(UseFastStosb);
 1.10266 +  match(Set dummy (ClearArray cnt base));
 1.10267 +  effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
 1.10268 +  format %{ "xorq    rax, rax\t# ClearArray:\n\t"
 1.10269 +            "shlq    rcx,3\t# Convert doublewords to bytes\n\t"
 1.10270 +            "rep     stosb\t# Store rax to *rdi++ while rcx--" %}
 1.10271 +  ins_encode %{ 
 1.10272 +    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
 1.10273 +  %}
 1.10274 +  ins_pipe( pipe_slow );
 1.10275 +%}
 1.10276 +
 1.10277 +instruct string_compare(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
 1.10278 +                        rax_RegI result, regD tmp1, rFlagsReg cr)
 1.10279 +%{
 1.10280 +  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
 1.10281 +  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
 1.10282 +
 1.10283 +  format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
 1.10284 +  ins_encode %{
 1.10285 +    __ string_compare($str1$$Register, $str2$$Register,
 1.10286 +                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
 1.10287 +                      $tmp1$$XMMRegister);
 1.10288 +  %}
 1.10289 +  ins_pipe( pipe_slow );
 1.10290 +%}
 1.10291 +
 1.10292 +// fast search of substring with known size.
 1.10293 +instruct string_indexof_con(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
 1.10294 +                            rbx_RegI result, regD vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
 1.10295 +%{
 1.10296 +  predicate(UseSSE42Intrinsics);
 1.10297 +  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
 1.10298 +  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
 1.10299 +
 1.10300 +  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
 1.10301 +  ins_encode %{
 1.10302 +    int icnt2 = (int)$int_cnt2$$constant;
 1.10303 +    if (icnt2 >= 8) {
 1.10304 +      // IndexOf for constant substrings with size >= 8 elements
 1.10305 +      // which don't need to be loaded through stack.
 1.10306 +      __ string_indexofC8($str1$$Register, $str2$$Register,
 1.10307 +                          $cnt1$$Register, $cnt2$$Register,
 1.10308 +                          icnt2, $result$$Register,
 1.10309 +                          $vec$$XMMRegister, $tmp$$Register);
 1.10310 +    } else {
 1.10311 +      // Small strings are loaded through stack if they cross page boundary.
 1.10312 +      __ string_indexof($str1$$Register, $str2$$Register,
 1.10313 +                        $cnt1$$Register, $cnt2$$Register,
 1.10314 +                        icnt2, $result$$Register,
 1.10315 +                        $vec$$XMMRegister, $tmp$$Register);
 1.10316 +    }
 1.10317 +  %}
 1.10318 +  ins_pipe( pipe_slow );
 1.10319 +%}
 1.10320 +
 1.10321 +instruct string_indexof(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
 1.10322 +                        rbx_RegI result, regD vec, rcx_RegI tmp, rFlagsReg cr)
 1.10323 +%{
 1.10324 +  predicate(UseSSE42Intrinsics);
 1.10325 +  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
 1.10326 +  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
 1.10327 +
 1.10328 +  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
 1.10329 +  ins_encode %{
 1.10330 +    __ string_indexof($str1$$Register, $str2$$Register,
 1.10331 +                      $cnt1$$Register, $cnt2$$Register,
 1.10332 +                      (-1), $result$$Register,
 1.10333 +                      $vec$$XMMRegister, $tmp$$Register);
 1.10334 +  %}
 1.10335 +  ins_pipe( pipe_slow );
 1.10336 +%}
 1.10337 +
 1.10338 +// fast string equals
 1.10339 +instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
 1.10340 +                       regD tmp1, regD tmp2, rbx_RegI tmp3, rFlagsReg cr)
 1.10341 +%{
 1.10342 +  match(Set result (StrEquals (Binary str1 str2) cnt));
 1.10343 +  effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
 1.10344 +
 1.10345 +  format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
 1.10346 +  ins_encode %{
 1.10347 +    __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
 1.10348 +                          $cnt$$Register, $result$$Register, $tmp3$$Register,
 1.10349 +                          $tmp1$$XMMRegister, $tmp2$$XMMRegister);
 1.10350 +  %}
 1.10351 +  ins_pipe( pipe_slow );
 1.10352 +%}
 1.10353 +
 1.10354 +// fast array equals
 1.10355 +instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
 1.10356 +                      regD tmp1, regD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
 1.10357 +%{
 1.10358 +  match(Set result (AryEq ary1 ary2));
 1.10359 +  effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
 1.10360 +  //ins_cost(300);
 1.10361 +
 1.10362 +  format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
 1.10363 +  ins_encode %{
 1.10364 +    __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
 1.10365 +                          $tmp3$$Register, $result$$Register, $tmp4$$Register,
 1.10366 +                          $tmp1$$XMMRegister, $tmp2$$XMMRegister);
 1.10367 +  %}
 1.10368 +  ins_pipe( pipe_slow );
 1.10369 +%}
 1.10370 +
 1.10371 +// encode char[] to byte[] in ISO_8859_1
 1.10372 +instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
 1.10373 +                          regD tmp1, regD tmp2, regD tmp3, regD tmp4,
 1.10374 +                          rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
 1.10375 +  match(Set result (EncodeISOArray src (Binary dst len)));
 1.10376 +  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
 1.10377 +
 1.10378 +  format %{ "Encode array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
 1.10379 +  ins_encode %{
 1.10380 +    __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
 1.10381 +                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
 1.10382 +                        $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
 1.10383 +  %}
 1.10384 +  ins_pipe( pipe_slow );
 1.10385 +%}
 1.10386 +
 1.10387 +//----------Overflow Math Instructions-----------------------------------------
 1.10388 +
 1.10389 +instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
 1.10390 +%{
 1.10391 +  match(Set cr (OverflowAddI op1 op2));
 1.10392 +  effect(DEF cr, USE_KILL op1, USE op2);
 1.10393 +
 1.10394 +  format %{ "addl    $op1, $op2\t# overflow check int" %}
 1.10395 +
 1.10396 +  ins_encode %{
 1.10397 +    __ addl($op1$$Register, $op2$$Register);
 1.10398 +  %}
 1.10399 +  ins_pipe(ialu_reg_reg);
 1.10400 +%}
 1.10401 +
 1.10402 +instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
 1.10403 +%{
 1.10404 +  match(Set cr (OverflowAddI op1 op2));
 1.10405 +  effect(DEF cr, USE_KILL op1, USE op2);
 1.10406 +
 1.10407 +  format %{ "addl    $op1, $op2\t# overflow check int" %}
 1.10408 +
 1.10409 +  ins_encode %{
 1.10410 +    __ addl($op1$$Register, $op2$$constant);
 1.10411 +  %}
 1.10412 +  ins_pipe(ialu_reg_reg);
 1.10413 +%}
 1.10414 +
 1.10415 +instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
 1.10416 +%{
 1.10417 +  match(Set cr (OverflowAddL op1 op2));
 1.10418 +  effect(DEF cr, USE_KILL op1, USE op2);
 1.10419 +
 1.10420 +  format %{ "addq    $op1, $op2\t# overflow check long" %}
 1.10421 +  ins_encode %{
 1.10422 +    __ addq($op1$$Register, $op2$$Register);
 1.10423 +  %}
 1.10424 +  ins_pipe(ialu_reg_reg);
 1.10425 +%}
 1.10426 +
 1.10427 +instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
 1.10428 +%{
 1.10429 +  match(Set cr (OverflowAddL op1 op2));
 1.10430 +  effect(DEF cr, USE_KILL op1, USE op2);
 1.10431 +
 1.10432 +  format %{ "addq    $op1, $op2\t# overflow check long" %}
 1.10433 +  ins_encode %{
 1.10434 +    __ addq($op1$$Register, $op2$$constant);
 1.10435 +  %}
 1.10436 +  ins_pipe(ialu_reg_reg);
 1.10437 +%}
 1.10438 +
 1.10439 +instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
 1.10440 +%{
 1.10441 +  match(Set cr (OverflowSubI op1 op2));
 1.10442 +
 1.10443 +  format %{ "cmpl    $op1, $op2\t# overflow check int" %}
 1.10444 +  ins_encode %{
 1.10445 +    __ cmpl($op1$$Register, $op2$$Register);
 1.10446 +  %}
 1.10447 +  ins_pipe(ialu_reg_reg);
 1.10448 +%}
 1.10449 +
 1.10450 +instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
 1.10451 +%{
 1.10452 +  match(Set cr (OverflowSubI op1 op2));
 1.10453 +
 1.10454 +  format %{ "cmpl    $op1, $op2\t# overflow check int" %}
 1.10455 +  ins_encode %{
 1.10456 +    __ cmpl($op1$$Register, $op2$$constant);
 1.10457 +  %}
 1.10458 +  ins_pipe(ialu_reg_reg);
 1.10459 +%}
 1.10460 +
 1.10461 +instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
 1.10462 +%{
 1.10463 +  match(Set cr (OverflowSubL op1 op2));
 1.10464 +
 1.10465 +  format %{ "cmpq    $op1, $op2\t# overflow check long" %}
 1.10466 +  ins_encode %{
 1.10467 +    __ cmpq($op1$$Register, $op2$$Register);
 1.10468 +  %}
 1.10469 +  ins_pipe(ialu_reg_reg);
 1.10470 +%}
 1.10471 +
 1.10472 +instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
 1.10473 +%{
 1.10474 +  match(Set cr (OverflowSubL op1 op2));
 1.10475 +
 1.10476 +  format %{ "cmpq    $op1, $op2\t# overflow check long" %}
 1.10477 +  ins_encode %{
 1.10478 +    __ cmpq($op1$$Register, $op2$$constant);
 1.10479 +  %}
 1.10480 +  ins_pipe(ialu_reg_reg);
 1.10481 +%}
 1.10482 +
 1.10483 +instruct overflowNegI_rReg(rFlagsReg cr, immI0 zero, rax_RegI op2)
 1.10484 +%{
 1.10485 +  match(Set cr (OverflowSubI zero op2));
 1.10486 +  effect(DEF cr, USE_KILL op2);
 1.10487 +
 1.10488 +  format %{ "negl    $op2\t# overflow check int" %}
 1.10489 +  ins_encode %{
 1.10490 +    __ negl($op2$$Register);
 1.10491 +  %}
 1.10492 +  ins_pipe(ialu_reg_reg);
 1.10493 +%}
 1.10494 +
 1.10495 +instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
 1.10496 +%{
 1.10497 +  match(Set cr (OverflowSubL zero op2));
 1.10498 +  effect(DEF cr, USE_KILL op2);
 1.10499 +
 1.10500 +  format %{ "negq    $op2\t# overflow check long" %}
 1.10501 +  ins_encode %{
 1.10502 +    __ negq($op2$$Register);
 1.10503 +  %}
 1.10504 +  ins_pipe(ialu_reg_reg);
 1.10505 +%}
 1.10506 +
 1.10507 +instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
 1.10508 +%{
 1.10509 +  match(Set cr (OverflowMulI op1 op2));
 1.10510 +  effect(DEF cr, USE_KILL op1, USE op2);
 1.10511 +
 1.10512 +  format %{ "imull    $op1, $op2\t# overflow check int" %}
 1.10513 +  ins_encode %{
 1.10514 +    __ imull($op1$$Register, $op2$$Register);
 1.10515 +  %}
 1.10516 +  ins_pipe(ialu_reg_reg_alu0);
 1.10517 +%}
 1.10518 +
 1.10519 +instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
 1.10520 +%{
 1.10521 +  match(Set cr (OverflowMulI op1 op2));
 1.10522 +  effect(DEF cr, TEMP tmp, USE op1, USE op2);
 1.10523 +
 1.10524 +  format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
 1.10525 +  ins_encode %{
 1.10526 +    __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
 1.10527 +  %}
 1.10528 +  ins_pipe(ialu_reg_reg_alu0);
 1.10529 +%}
 1.10530 +
 1.10531 +instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
 1.10532 +%{
 1.10533 +  match(Set cr (OverflowMulL op1 op2));
 1.10534 +  effect(DEF cr, USE_KILL op1, USE op2);
 1.10535 +
 1.10536 +  format %{ "imulq    $op1, $op2\t# overflow check long" %}
 1.10537 +  ins_encode %{
 1.10538 +    __ imulq($op1$$Register, $op2$$Register);
 1.10539 +  %}
 1.10540 +  ins_pipe(ialu_reg_reg_alu0);
 1.10541 +%}
 1.10542 +
 1.10543 +instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
 1.10544 +%{
 1.10545 +  match(Set cr (OverflowMulL op1 op2));
 1.10546 +  effect(DEF cr, TEMP tmp, USE op1, USE op2);
 1.10547 +
 1.10548 +  format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
 1.10549 +  ins_encode %{
 1.10550 +    __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
 1.10551 +  %}
 1.10552 +  ins_pipe(ialu_reg_reg_alu0);
 1.10553 +%}
 1.10554 +
 1.10555 +
 1.10556 +//----------Control Flow Instructions------------------------------------------
 1.10557 +// Signed compare Instructions
 1.10558 +
 1.10559 +// XXX more variants!!
 1.10560 +instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
 1.10561 +%{
 1.10562 +  match(Set cr (CmpI op1 op2));
 1.10563 +  effect(DEF cr, USE op1, USE op2);
 1.10564 +
 1.10565 +  format %{ "cmpl    $op1, $op2" %}
 1.10566 +  opcode(0x3B);  /* Opcode 3B /r */
 1.10567 +  ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
 1.10568 +  ins_pipe(ialu_cr_reg_reg);
 1.10569 +%}
 1.10570 +
 1.10571 +instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
 1.10572 +%{
 1.10573 +  match(Set cr (CmpI op1 op2));
 1.10574 +
 1.10575 +  format %{ "cmpl    $op1, $op2" %}
 1.10576 +  opcode(0x81, 0x07); /* Opcode 81 /7 */
 1.10577 +  ins_encode(OpcSErm(op1, op2), Con8or32(op2));
 1.10578 +  ins_pipe(ialu_cr_reg_imm);
 1.10579 +%}
 1.10580 +
 1.10581 +instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
 1.10582 +%{
 1.10583 +  match(Set cr (CmpI op1 (LoadI op2)));
 1.10584 +
 1.10585 +  ins_cost(500); // XXX
 1.10586 +  format %{ "cmpl    $op1, $op2" %}
 1.10587 +  opcode(0x3B); /* Opcode 3B /r */
 1.10588 +  ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
 1.10589 +  ins_pipe(ialu_cr_reg_mem);
 1.10590 +%}
 1.10591 +
 1.10592 +instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
 1.10593 +%{
 1.10594 +  match(Set cr (CmpI src zero));
 1.10595 +
 1.10596 +  format %{ "testl   $src, $src" %}
 1.10597 +  opcode(0x85);
 1.10598 +  ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
 1.10599 +  ins_pipe(ialu_cr_reg_imm);
 1.10600 +%}
 1.10601 +
 1.10602 +instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
 1.10603 +%{
 1.10604 +  match(Set cr (CmpI (AndI src con) zero));
 1.10605 +
 1.10606 +  format %{ "testl   $src, $con" %}
 1.10607 +  opcode(0xF7, 0x00);
 1.10608 +  ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
 1.10609 +  ins_pipe(ialu_cr_reg_imm);
 1.10610 +%}
 1.10611 +
 1.10612 +instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
 1.10613 +%{
 1.10614 +  match(Set cr (CmpI (AndI src (LoadI mem)) zero));
 1.10615 +
 1.10616 +  format %{ "testl   $src, $mem" %}
 1.10617 +  opcode(0x85);
 1.10618 +  ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
 1.10619 +  ins_pipe(ialu_cr_reg_mem);
 1.10620 +%}
 1.10621 +
 1.10622 +// Unsigned compare Instructions; really, same as signed except they
 1.10623 +// produce an rFlagsRegU instead of rFlagsReg.
 1.10624 +instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
 1.10625 +%{
 1.10626 +  match(Set cr (CmpU op1 op2));
 1.10627 +
 1.10628 +  format %{ "cmpl    $op1, $op2\t# unsigned" %}
 1.10629 +  opcode(0x3B); /* Opcode 3B /r */
 1.10630 +  ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
 1.10631 +  ins_pipe(ialu_cr_reg_reg);
 1.10632 +%}
 1.10633 +
 1.10634 +instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
 1.10635 +%{
 1.10636 +  match(Set cr (CmpU op1 op2));
 1.10637 +
 1.10638 +  format %{ "cmpl    $op1, $op2\t# unsigned" %}
 1.10639 +  opcode(0x81,0x07); /* Opcode 81 /7 */
 1.10640 +  ins_encode(OpcSErm(op1, op2), Con8or32(op2));
 1.10641 +  ins_pipe(ialu_cr_reg_imm);
 1.10642 +%}
 1.10643 +
 1.10644 +instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
 1.10645 +%{
 1.10646 +  match(Set cr (CmpU op1 (LoadI op2)));
 1.10647 +
 1.10648 +  ins_cost(500); // XXX
 1.10649 +  format %{ "cmpl    $op1, $op2\t# unsigned" %}
 1.10650 +  opcode(0x3B); /* Opcode 3B /r */
 1.10651 +  ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
 1.10652 +  ins_pipe(ialu_cr_reg_mem);
 1.10653 +%}
 1.10654 +
 1.10655 +// // // Cisc-spilled version of cmpU_rReg
 1.10656 +// //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
 1.10657 +// //%{
 1.10658 +// //  match(Set cr (CmpU (LoadI op1) op2));
 1.10659 +// //
 1.10660 +// //  format %{ "CMPu   $op1,$op2" %}
 1.10661 +// //  ins_cost(500);
 1.10662 +// //  opcode(0x39);  /* Opcode 39 /r */
 1.10663 +// //  ins_encode( OpcP, reg_mem( op1, op2) );
 1.10664 +// //%}
 1.10665 +
 1.10666 +instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
 1.10667 +%{
 1.10668 +  match(Set cr (CmpU src zero));
 1.10669 +
 1.10670 +  format %{ "testl  $src, $src\t# unsigned" %}
 1.10671 +  opcode(0x85);
 1.10672 +  ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
 1.10673 +  ins_pipe(ialu_cr_reg_imm);
 1.10674 +%}
 1.10675 +
 1.10676 +instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
 1.10677 +%{
 1.10678 +  match(Set cr (CmpP op1 op2));
 1.10679 +
 1.10680 +  format %{ "cmpq    $op1, $op2\t# ptr" %}
 1.10681 +  opcode(0x3B); /* Opcode 3B /r */
 1.10682 +  ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
 1.10683 +  ins_pipe(ialu_cr_reg_reg);
 1.10684 +%}
 1.10685 +
 1.10686 +instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
 1.10687 +%{
 1.10688 +  match(Set cr (CmpP op1 (LoadP op2)));
 1.10689 +
 1.10690 +  ins_cost(500); // XXX
 1.10691 +  format %{ "cmpq    $op1, $op2\t# ptr" %}
 1.10692 +  opcode(0x3B); /* Opcode 3B /r */
 1.10693 +  ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
 1.10694 +  ins_pipe(ialu_cr_reg_mem);
 1.10695 +%}
 1.10696 +
 1.10697 +// // // Cisc-spilled version of cmpP_rReg
 1.10698 +// //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
 1.10699 +// //%{
 1.10700 +// //  match(Set cr (CmpP (LoadP op1) op2));
 1.10701 +// //
 1.10702 +// //  format %{ "CMPu   $op1,$op2" %}
 1.10703 +// //  ins_cost(500);
 1.10704 +// //  opcode(0x39);  /* Opcode 39 /r */
 1.10705 +// //  ins_encode( OpcP, reg_mem( op1, op2) );
 1.10706 +// //%}
 1.10707 +
 1.10708 +// XXX this is generalized by compP_rReg_mem???
 1.10709 +// Compare raw pointer (used in out-of-heap check).
 1.10710 +// Only works because non-oop pointers must be raw pointers
 1.10711 +// and raw pointers have no anti-dependencies.
 1.10712 +instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
 1.10713 +%{
 1.10714 +  predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none);
 1.10715 +  match(Set cr (CmpP op1 (LoadP op2)));
 1.10716 +
 1.10717 +  format %{ "cmpq    $op1, $op2\t# raw ptr" %}
 1.10718 +  opcode(0x3B); /* Opcode 3B /r */
 1.10719 +  ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
 1.10720 +  ins_pipe(ialu_cr_reg_mem);
 1.10721 +%}
 1.10722 +
 1.10723 +// This will generate a signed flags result. This should be OK since
 1.10724 +// any compare to a zero should be eq/neq.
 1.10725 +instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
 1.10726 +%{
 1.10727 +  match(Set cr (CmpP src zero));
 1.10728 +
 1.10729 +  format %{ "testq   $src, $src\t# ptr" %}
 1.10730 +  opcode(0x85);
 1.10731 +  ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
 1.10732 +  ins_pipe(ialu_cr_reg_imm);
 1.10733 +%}
 1.10734 +
 1.10735 +// This will generate a signed flags result. This should be OK since
 1.10736 +// any compare to a zero should be eq/neq.
 1.10737 +instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
 1.10738 +%{
 1.10739 +  predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
 1.10740 +  match(Set cr (CmpP (LoadP op) zero));
 1.10741 +
 1.10742 +  ins_cost(500); // XXX
 1.10743 +  format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
 1.10744 +  opcode(0xF7); /* Opcode F7 /0 */
 1.10745 +  ins_encode(REX_mem_wide(op),
 1.10746 +             OpcP, RM_opc_mem(0x00, op), Con_d32(0xFFFFFFFF));
 1.10747 +  ins_pipe(ialu_cr_reg_imm);
 1.10748 +%}
 1.10749 +
 1.10750 +instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
 1.10751 +%{
 1.10752 +  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
 1.10753 +  match(Set cr (CmpP (LoadP mem) zero));
 1.10754 +
 1.10755 +  format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
 1.10756 +  ins_encode %{
 1.10757 +    __ cmpq(r12, $mem$$Address);
 1.10758 +  %}
 1.10759 +  ins_pipe(ialu_cr_reg_mem);
 1.10760 +%}
 1.10761 +
 1.10762 +instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
 1.10763 +%{
 1.10764 +  match(Set cr (CmpN op1 op2));
 1.10765 +
 1.10766 +  format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
 1.10767 +  ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
 1.10768 +  ins_pipe(ialu_cr_reg_reg);
 1.10769 +%}
 1.10770 +
 1.10771 +instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
 1.10772 +%{
 1.10773 +  match(Set cr (CmpN src (LoadN mem)));
 1.10774 +
 1.10775 +  format %{ "cmpl    $src, $mem\t# compressed ptr" %}
 1.10776 +  ins_encode %{
 1.10777 +    __ cmpl($src$$Register, $mem$$Address);
 1.10778 +  %}
 1.10779 +  ins_pipe(ialu_cr_reg_mem);
 1.10780 +%}
 1.10781 +
 1.10782 +instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
 1.10783 +  match(Set cr (CmpN op1 op2));
 1.10784 +
 1.10785 +  format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
 1.10786 +  ins_encode %{
 1.10787 +    __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
 1.10788 +  %}
 1.10789 +  ins_pipe(ialu_cr_reg_imm);
 1.10790 +%}
 1.10791 +
 1.10792 +instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
 1.10793 +%{
 1.10794 +  match(Set cr (CmpN src (LoadN mem)));
 1.10795 +
 1.10796 +  format %{ "cmpl    $mem, $src\t# compressed ptr" %}
 1.10797 +  ins_encode %{
 1.10798 +    __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
 1.10799 +  %}
 1.10800 +  ins_pipe(ialu_cr_reg_mem);
 1.10801 +%}
 1.10802 +
 1.10803 +instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
 1.10804 +  match(Set cr (CmpN op1 op2));
 1.10805 +
 1.10806 +  format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
 1.10807 +  ins_encode %{
 1.10808 +    __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
 1.10809 +  %}
 1.10810 +  ins_pipe(ialu_cr_reg_imm);
 1.10811 +%}
 1.10812 +
 1.10813 +instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
 1.10814 +%{
 1.10815 +  match(Set cr (CmpN src (LoadNKlass mem)));
 1.10816 +
 1.10817 +  format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
 1.10818 +  ins_encode %{
 1.10819 +    __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 1.10820 +  %}
 1.10821 +  ins_pipe(ialu_cr_reg_mem);
 1.10822 +%}
 1.10823 +
 1.10824 +instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
 1.10825 +  match(Set cr (CmpN src zero));
 1.10826 +
 1.10827 +  format %{ "testl   $src, $src\t# compressed ptr" %}
 1.10828 +  ins_encode %{ __ testl($src$$Register, $src$$Register); %}
 1.10829 +  ins_pipe(ialu_cr_reg_imm);
 1.10830 +%}
 1.10831 +
 1.10832 +instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
 1.10833 +%{
 1.10834 +  predicate(Universe::narrow_oop_base() != NULL);
 1.10835 +  match(Set cr (CmpN (LoadN mem) zero));
 1.10836 +
 1.10837 +  ins_cost(500); // XXX
 1.10838 +  format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
 1.10839 +  ins_encode %{
 1.10840 +    __ cmpl($mem$$Address, (int)0xFFFFFFFF);
 1.10841 +  %}
 1.10842 +  ins_pipe(ialu_cr_reg_mem);
 1.10843 +%}
 1.10844 +
 1.10845 +instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
 1.10846 +%{
 1.10847 +  predicate(Universe::narrow_oop_base() == NULL && (Universe::narrow_klass_base() == NULL));
 1.10848 +  match(Set cr (CmpN (LoadN mem) zero));
 1.10849 +
 1.10850 +  format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
 1.10851 +  ins_encode %{
 1.10852 +    __ cmpl(r12, $mem$$Address);
 1.10853 +  %}
 1.10854 +  ins_pipe(ialu_cr_reg_mem);
 1.10855 +%}
 1.10856 +
 1.10857 +// Yanked all unsigned pointer compare operations.
 1.10858 +// Pointer compares are done with CmpP which is already unsigned.
 1.10859 +
 1.10860 +instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
 1.10861 +%{
 1.10862 +  match(Set cr (CmpL op1 op2));
 1.10863 +
 1.10864 +  format %{ "cmpq    $op1, $op2" %}
 1.10865 +  opcode(0x3B);  /* Opcode 3B /r */
 1.10866 +  ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
 1.10867 +  ins_pipe(ialu_cr_reg_reg);
 1.10868 +%}
 1.10869 +
 1.10870 +instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
 1.10871 +%{
 1.10872 +  match(Set cr (CmpL op1 op2));
 1.10873 +
 1.10874 +  format %{ "cmpq    $op1, $op2" %}
 1.10875 +  opcode(0x81, 0x07); /* Opcode 81 /7 */
 1.10876 +  ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
 1.10877 +  ins_pipe(ialu_cr_reg_imm);
 1.10878 +%}
 1.10879 +
 1.10880 +instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
 1.10881 +%{
 1.10882 +  match(Set cr (CmpL op1 (LoadL op2)));
 1.10883 +
 1.10884 +  format %{ "cmpq    $op1, $op2" %}
 1.10885 +  opcode(0x3B); /* Opcode 3B /r */
 1.10886 +  ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
 1.10887 +  ins_pipe(ialu_cr_reg_mem);
 1.10888 +%}
 1.10889 +
 1.10890 +instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
 1.10891 +%{
 1.10892 +  match(Set cr (CmpL src zero));
 1.10893 +
 1.10894 +  format %{ "testq   $src, $src" %}
 1.10895 +  opcode(0x85);
 1.10896 +  ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
 1.10897 +  ins_pipe(ialu_cr_reg_imm);
 1.10898 +%}
 1.10899 +
 1.10900 +instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
 1.10901 +%{
 1.10902 +  match(Set cr (CmpL (AndL src con) zero));
 1.10903 +
 1.10904 +  format %{ "testq   $src, $con\t# long" %}
 1.10905 +  opcode(0xF7, 0x00);
 1.10906 +  ins_encode(REX_reg_wide(src), OpcP, reg_opc(src), Con32(con));
 1.10907 +  ins_pipe(ialu_cr_reg_imm);
 1.10908 +%}
 1.10909 +
 1.10910 +instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
 1.10911 +%{
 1.10912 +  match(Set cr (CmpL (AndL src (LoadL mem)) zero));
 1.10913 +
 1.10914 +  format %{ "testq   $src, $mem" %}
 1.10915 +  opcode(0x85);
 1.10916 +  ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
 1.10917 +  ins_pipe(ialu_cr_reg_mem);
 1.10918 +%}
 1.10919 +
 1.10920 +// Manifest a CmpL result in an integer register.  Very painful.
 1.10921 +// This is the test to avoid.
 1.10922 +instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
 1.10923 +%{
 1.10924 +  match(Set dst (CmpL3 src1 src2));
 1.10925 +  effect(KILL flags);
 1.10926 +
 1.10927 +  ins_cost(275); // XXX
 1.10928 +  format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
 1.10929 +            "movl    $dst, -1\n\t"
 1.10930 +            "jl,s    done\n\t"
 1.10931 +            "setne   $dst\n\t"
 1.10932 +            "movzbl  $dst, $dst\n\t"
 1.10933 +    "done:" %}
 1.10934 +  ins_encode(cmpl3_flag(src1, src2, dst));
 1.10935 +  ins_pipe(pipe_slow);
 1.10936 +%}
 1.10937 +
 1.10938 +//----------Max and Min--------------------------------------------------------
 1.10939 +// Min Instructions
 1.10940 +
 1.10941 +instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
 1.10942 +%{
 1.10943 +  effect(USE_DEF dst, USE src, USE cr);
 1.10944 +
 1.10945 +  format %{ "cmovlgt $dst, $src\t# min" %}
 1.10946 +  opcode(0x0F, 0x4F);
 1.10947 +  ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
 1.10948 +  ins_pipe(pipe_cmov_reg);
 1.10949 +%}
 1.10950 +
 1.10951 +
 1.10952 +instruct minI_rReg(rRegI dst, rRegI src)
 1.10953 +%{
 1.10954 +  match(Set dst (MinI dst src));
 1.10955 +
 1.10956 +  ins_cost(200);
 1.10957 +  expand %{
 1.10958 +    rFlagsReg cr;
 1.10959 +    compI_rReg(cr, dst, src);
 1.10960 +    cmovI_reg_g(dst, src, cr);
 1.10961 +  %}
 1.10962 +%}
 1.10963 +
 1.10964 +instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
 1.10965 +%{
 1.10966 +  effect(USE_DEF dst, USE src, USE cr);
 1.10967 +
 1.10968 +  format %{ "cmovllt $dst, $src\t# max" %}
 1.10969 +  opcode(0x0F, 0x4C);
 1.10970 +  ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
 1.10971 +  ins_pipe(pipe_cmov_reg);
 1.10972 +%}
 1.10973 +
 1.10974 +
 1.10975 +instruct maxI_rReg(rRegI dst, rRegI src)
 1.10976 +%{
 1.10977 +  match(Set dst (MaxI dst src));
 1.10978 +
 1.10979 +  ins_cost(200);
 1.10980 +  expand %{
 1.10981 +    rFlagsReg cr;
 1.10982 +    compI_rReg(cr, dst, src);
 1.10983 +    cmovI_reg_l(dst, src, cr);
 1.10984 +  %}
 1.10985 +%}
 1.10986 +
 1.10987 +// ============================================================================
 1.10988 +// Branch Instructions
 1.10989 +
 1.10990 +// Jump Direct - Label defines a relative address from JMP+1
 1.10991 +instruct jmpDir(label labl)
 1.10992 +%{
 1.10993 +  match(Goto);
 1.10994 +  effect(USE labl);
 1.10995 +
 1.10996 +  ins_cost(300);
 1.10997 +  format %{ "jmp     $labl" %}
 1.10998 +  size(5);
 1.10999 +  ins_encode %{
 1.11000 +    Label* L = $labl$$label;
 1.11001 +    __ jmp(*L, false); // Always long jump
 1.11002 +  %}
 1.11003 +  ins_pipe(pipe_jmp);
 1.11004 +%}
 1.11005 +
 1.11006 +// Jump Direct Conditional - Label defines a relative address from Jcc+1
 1.11007 +instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
 1.11008 +%{
 1.11009 +  match(If cop cr);
 1.11010 +  effect(USE labl);
 1.11011 +
 1.11012 +  ins_cost(300);
 1.11013 +  format %{ "j$cop     $labl" %}
 1.11014 +  size(6);
 1.11015 +  ins_encode %{
 1.11016 +    Label* L = $labl$$label;
 1.11017 +    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
 1.11018 +  %}
 1.11019 +  ins_pipe(pipe_jcc);
 1.11020 +%}
 1.11021 +
 1.11022 +// Jump Direct Conditional - Label defines a relative address from Jcc+1
 1.11023 +instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
 1.11024 +%{
 1.11025 +  match(CountedLoopEnd cop cr);
 1.11026 +  effect(USE labl);
 1.11027 +
 1.11028 +  ins_cost(300);
 1.11029 +  format %{ "j$cop     $labl\t# loop end" %}
 1.11030 +  size(6);
 1.11031 +  ins_encode %{
 1.11032 +    Label* L = $labl$$label;
 1.11033 +    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
 1.11034 +  %}
 1.11035 +  ins_pipe(pipe_jcc);
 1.11036 +%}
 1.11037 +
 1.11038 +// Jump Direct Conditional - Label defines a relative address from Jcc+1
 1.11039 +instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
 1.11040 +  match(CountedLoopEnd cop cmp);
 1.11041 +  effect(USE labl);
 1.11042 +
 1.11043 +  ins_cost(300);
 1.11044 +  format %{ "j$cop,u   $labl\t# loop end" %}
 1.11045 +  size(6);
 1.11046 +  ins_encode %{
 1.11047 +    Label* L = $labl$$label;
 1.11048 +    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
 1.11049 +  %}
 1.11050 +  ins_pipe(pipe_jcc);
 1.11051 +%}
 1.11052 +
 1.11053 +instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
 1.11054 +  match(CountedLoopEnd cop cmp);
 1.11055 +  effect(USE labl);
 1.11056 +
 1.11057 +  ins_cost(200);
 1.11058 +  format %{ "j$cop,u   $labl\t# loop end" %}
 1.11059 +  size(6);
 1.11060 +  ins_encode %{
 1.11061 +    Label* L = $labl$$label;
 1.11062 +    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
 1.11063 +  %}
 1.11064 +  ins_pipe(pipe_jcc);
 1.11065 +%}
 1.11066 +
 1.11067 +// Jump Direct Conditional - using unsigned comparison
 1.11068 +instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
 1.11069 +  match(If cop cmp);
 1.11070 +  effect(USE labl);
 1.11071 +
 1.11072 +  ins_cost(300);
 1.11073 +  format %{ "j$cop,u  $labl" %}
 1.11074 +  size(6);
 1.11075 +  ins_encode %{
 1.11076 +    Label* L = $labl$$label;
 1.11077 +    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
 1.11078 +  %}
 1.11079 +  ins_pipe(pipe_jcc);
 1.11080 +%}
 1.11081 +
 1.11082 +instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
 1.11083 +  match(If cop cmp);
 1.11084 +  effect(USE labl);
 1.11085 +
 1.11086 +  ins_cost(200);
 1.11087 +  format %{ "j$cop,u  $labl" %}
 1.11088 +  size(6);
 1.11089 +  ins_encode %{
 1.11090 +    Label* L = $labl$$label;
 1.11091 +    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
 1.11092 +  %}
 1.11093 +  ins_pipe(pipe_jcc);
 1.11094 +%}
 1.11095 +
 1.11096 +instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
 1.11097 +  match(If cop cmp);
 1.11098 +  effect(USE labl);
 1.11099 +
 1.11100 +  ins_cost(200);
 1.11101 +  format %{ $$template
 1.11102 +    if ($cop$$cmpcode == Assembler::notEqual) {
 1.11103 +      $$emit$$"jp,u   $labl\n\t"
 1.11104 +      $$emit$$"j$cop,u   $labl"
 1.11105 +    } else {
 1.11106 +      $$emit$$"jp,u   done\n\t"
 1.11107 +      $$emit$$"j$cop,u   $labl\n\t"
 1.11108 +      $$emit$$"done:"
 1.11109 +    }
 1.11110 +  %}
 1.11111 +  ins_encode %{
 1.11112 +    Label* l = $labl$$label;
 1.11113 +    if ($cop$$cmpcode == Assembler::notEqual) {
 1.11114 +      __ jcc(Assembler::parity, *l, false);
 1.11115 +      __ jcc(Assembler::notEqual, *l, false);
 1.11116 +    } else if ($cop$$cmpcode == Assembler::equal) {
 1.11117 +      Label done;
 1.11118 +      __ jccb(Assembler::parity, done);
 1.11119 +      __ jcc(Assembler::equal, *l, false);
 1.11120 +      __ bind(done);
 1.11121 +    } else {
 1.11122 +       ShouldNotReachHere();
 1.11123 +    }
 1.11124 +  %}
 1.11125 +  ins_pipe(pipe_jcc);
 1.11126 +%}
 1.11127 +
 1.11128 +// ============================================================================
 1.11129 +// The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
 1.11130 +// superklass array for an instance of the superklass.  Set a hidden
 1.11131 +// internal cache on a hit (cache is checked with exposed code in
 1.11132 +// gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
 1.11133 +// encoding ALSO sets flags.
 1.11134 +
 1.11135 +instruct partialSubtypeCheck(rdi_RegP result,
 1.11136 +                             rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
 1.11137 +                             rFlagsReg cr)
 1.11138 +%{
 1.11139 +  match(Set result (PartialSubtypeCheck sub super));
 1.11140 +  effect(KILL rcx, KILL cr);
 1.11141 +
 1.11142 +  ins_cost(1100);  // slightly larger than the next version
 1.11143 +  format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
 1.11144 +            "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
 1.11145 +            "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
 1.11146 +            "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
 1.11147 +            "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
 1.11148 +            "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
 1.11149 +            "xorq    $result, $result\t\t Hit: rdi zero\n\t"
 1.11150 +    "miss:\t" %}
 1.11151 +
 1.11152 +  opcode(0x1); // Force a XOR of RDI
 1.11153 +  ins_encode(enc_PartialSubtypeCheck());
 1.11154 +  ins_pipe(pipe_slow);
 1.11155 +%}
 1.11156 +
 1.11157 +instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
 1.11158 +                                     rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
 1.11159 +                                     immP0 zero,
 1.11160 +                                     rdi_RegP result)
 1.11161 +%{
 1.11162 +  match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
 1.11163 +  effect(KILL rcx, KILL result);
 1.11164 +
 1.11165 +  ins_cost(1000);
 1.11166 +  format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
 1.11167 +            "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
 1.11168 +            "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
 1.11169 +            "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
 1.11170 +            "jne,s   miss\t\t# Missed: flags nz\n\t"
 1.11171 +            "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
 1.11172 +    "miss:\t" %}
 1.11173 +
 1.11174 +  opcode(0x0); // No need to XOR RDI
 1.11175 +  ins_encode(enc_PartialSubtypeCheck());
 1.11176 +  ins_pipe(pipe_slow);
 1.11177 +%}
 1.11178 +
 1.11179 +// ============================================================================
 1.11180 +// Branch Instructions -- short offset versions
 1.11181 +//
 1.11182 +// These instructions are used to replace jumps of a long offset (the default
 1.11183 +// match) with jumps of a shorter offset.  These instructions are all tagged
 1.11184 +// with the ins_short_branch attribute, which causes the ADLC to suppress the
 1.11185 +// match rules in general matching.  Instead, the ADLC generates a conversion
 1.11186 +// method in the MachNode which can be used to do in-place replacement of the
 1.11187 +// long variant with the shorter variant.  The compiler will determine if a
 1.11188 +// branch can be taken by the is_short_branch_offset() predicate in the machine
 1.11189 +// specific code section of the file.
 1.11190 +
 1.11191 +// Jump Direct - Label defines a relative address from JMP+1
 1.11192 +instruct jmpDir_short(label labl) %{
 1.11193 +  match(Goto);
 1.11194 +  effect(USE labl);
 1.11195 +
 1.11196 +  ins_cost(300);
 1.11197 +  format %{ "jmp,s   $labl" %}
 1.11198 +  size(2);
 1.11199 +  ins_encode %{
 1.11200 +    Label* L = $labl$$label;
 1.11201 +    __ jmpb(*L);
 1.11202 +  %}
 1.11203 +  ins_pipe(pipe_jmp);
 1.11204 +  ins_short_branch(1);
 1.11205 +%}
 1.11206 +
 1.11207 +// Jump Direct Conditional - Label defines a relative address from Jcc+1
 1.11208 +instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
 1.11209 +  match(If cop cr);
 1.11210 +  effect(USE labl);
 1.11211 +
 1.11212 +  ins_cost(300);
 1.11213 +  format %{ "j$cop,s   $labl" %}
 1.11214 +  size(2);
 1.11215 +  ins_encode %{
 1.11216 +    Label* L = $labl$$label;
 1.11217 +    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
 1.11218 +  %}
 1.11219 +  ins_pipe(pipe_jcc);
 1.11220 +  ins_short_branch(1);
 1.11221 +%}
 1.11222 +
 1.11223 +// Jump Direct Conditional - Label defines a relative address from Jcc+1
 1.11224 +instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
 1.11225 +  match(CountedLoopEnd cop cr);
 1.11226 +  effect(USE labl);
 1.11227 +
 1.11228 +  ins_cost(300);
 1.11229 +  format %{ "j$cop,s   $labl\t# loop end" %}
 1.11230 +  size(2);
 1.11231 +  ins_encode %{
 1.11232 +    Label* L = $labl$$label;
 1.11233 +    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
 1.11234 +  %}
 1.11235 +  ins_pipe(pipe_jcc);
 1.11236 +  ins_short_branch(1);
 1.11237 +%}
 1.11238 +
 1.11239 +// Jump Direct Conditional - Label defines a relative address from Jcc+1
 1.11240 +instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
 1.11241 +  match(CountedLoopEnd cop cmp);
 1.11242 +  effect(USE labl);
 1.11243 +
 1.11244 +  ins_cost(300);
 1.11245 +  format %{ "j$cop,us  $labl\t# loop end" %}
 1.11246 +  size(2);
 1.11247 +  ins_encode %{
 1.11248 +    Label* L = $labl$$label;
 1.11249 +    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
 1.11250 +  %}
 1.11251 +  ins_pipe(pipe_jcc);
 1.11252 +  ins_short_branch(1);
 1.11253 +%}
 1.11254 +
 1.11255 +instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
 1.11256 +  match(CountedLoopEnd cop cmp);
 1.11257 +  effect(USE labl);
 1.11258 +
 1.11259 +  ins_cost(300);
 1.11260 +  format %{ "j$cop,us  $labl\t# loop end" %}
 1.11261 +  size(2);
 1.11262 +  ins_encode %{
 1.11263 +    Label* L = $labl$$label;
 1.11264 +    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
 1.11265 +  %}
 1.11266 +  ins_pipe(pipe_jcc);
 1.11267 +  ins_short_branch(1);
 1.11268 +%}
 1.11269 +
 1.11270 +// Jump Direct Conditional - using unsigned comparison
 1.11271 +instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
 1.11272 +  match(If cop cmp);
 1.11273 +  effect(USE labl);
 1.11274 +
 1.11275 +  ins_cost(300);
 1.11276 +  format %{ "j$cop,us  $labl" %}
 1.11277 +  size(2);
 1.11278 +  ins_encode %{
 1.11279 +    Label* L = $labl$$label;
 1.11280 +    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
 1.11281 +  %}
 1.11282 +  ins_pipe(pipe_jcc);
 1.11283 +  ins_short_branch(1);
 1.11284 +%}
 1.11285 +
 1.11286 +instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
 1.11287 +  match(If cop cmp);
 1.11288 +  effect(USE labl);
 1.11289 +
 1.11290 +  ins_cost(300);
 1.11291 +  format %{ "j$cop,us  $labl" %}
 1.11292 +  size(2);
 1.11293 +  ins_encode %{
 1.11294 +    Label* L = $labl$$label;
 1.11295 +    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
 1.11296 +  %}
 1.11297 +  ins_pipe(pipe_jcc);
 1.11298 +  ins_short_branch(1);
 1.11299 +%}
 1.11300 +
 1.11301 +instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
 1.11302 +  match(If cop cmp);
 1.11303 +  effect(USE labl);
 1.11304 +
 1.11305 +  ins_cost(300);
 1.11306 +  format %{ $$template
 1.11307 +    if ($cop$$cmpcode == Assembler::notEqual) {
 1.11308 +      $$emit$$"jp,u,s   $labl\n\t"
 1.11309 +      $$emit$$"j$cop,u,s   $labl"
 1.11310 +    } else {
 1.11311 +      $$emit$$"jp,u,s   done\n\t"
 1.11312 +      $$emit$$"j$cop,u,s  $labl\n\t"
 1.11313 +      $$emit$$"done:"
 1.11314 +    }
 1.11315 +  %}
 1.11316 +  size(4);
 1.11317 +  ins_encode %{
 1.11318 +    Label* l = $labl$$label;
 1.11319 +    if ($cop$$cmpcode == Assembler::notEqual) {
 1.11320 +      __ jccb(Assembler::parity, *l);
 1.11321 +      __ jccb(Assembler::notEqual, *l);
 1.11322 +    } else if ($cop$$cmpcode == Assembler::equal) {
 1.11323 +      Label done;
 1.11324 +      __ jccb(Assembler::parity, done);
 1.11325 +      __ jccb(Assembler::equal, *l);
 1.11326 +      __ bind(done);
 1.11327 +    } else {
 1.11328 +       ShouldNotReachHere();
 1.11329 +    }
 1.11330 +  %}
 1.11331 +  ins_pipe(pipe_jcc);
 1.11332 +  ins_short_branch(1);
 1.11333 +%}
 1.11334 +
 1.11335 +// ============================================================================
 1.11336 +// inlined locking and unlocking
 1.11337 +
 1.11338 +instruct cmpFastLockRTM(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rdx_RegI scr, rRegI cx1, rRegI cx2) %{
 1.11339 +  predicate(Compile::current()->use_rtm());
 1.11340 +  match(Set cr (FastLock object box));
 1.11341 +  effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
 1.11342 +  ins_cost(300);
 1.11343 +  format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
 1.11344 +  ins_encode %{
 1.11345 +    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
 1.11346 +                 $scr$$Register, $cx1$$Register, $cx2$$Register,
 1.11347 +                 _counters, _rtm_counters, _stack_rtm_counters,
 1.11348 +                 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
 1.11349 +                 true, ra_->C->profile_rtm());
 1.11350 +  %}
 1.11351 +  ins_pipe(pipe_slow);
 1.11352 +%}
 1.11353 +
 1.11354 +instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr) %{
 1.11355 +  predicate(!Compile::current()->use_rtm());
 1.11356 +  match(Set cr (FastLock object box));
 1.11357 +  effect(TEMP tmp, TEMP scr, USE_KILL box);
 1.11358 +  ins_cost(300);
 1.11359 +  format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %}
 1.11360 +  ins_encode %{
 1.11361 +    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
 1.11362 +                 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
 1.11363 +  %}
 1.11364 +  ins_pipe(pipe_slow);
 1.11365 +%}
 1.11366 +
 1.11367 +instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP box, rRegP tmp) %{
 1.11368 +  match(Set cr (FastUnlock object box));
 1.11369 +  effect(TEMP tmp, USE_KILL box);
 1.11370 +  ins_cost(300);
 1.11371 +  format %{ "fastunlock $object,$box\t! kills $box,$tmp" %}
 1.11372 +  ins_encode %{
 1.11373 +    __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
 1.11374 +  %}
 1.11375 +  ins_pipe(pipe_slow);
 1.11376 +%}
 1.11377 +
 1.11378 +
 1.11379 +// ============================================================================
 1.11380 +// Safepoint Instructions
 1.11381 +instruct safePoint_poll(rFlagsReg cr)
 1.11382 +%{
 1.11383 +  predicate(!Assembler::is_polling_page_far());
 1.11384 +  match(SafePoint);
 1.11385 +  effect(KILL cr);
 1.11386 +
 1.11387 +  format %{ "testl  rax, [rip + #offset_to_poll_page]\t"
 1.11388 +            "# Safepoint: poll for GC" %}
 1.11389 +  ins_cost(125);
 1.11390 +  ins_encode %{
 1.11391 +    AddressLiteral addr(os::get_polling_page(), relocInfo::poll_type);
 1.11392 +    __ testl(rax, addr);
 1.11393 +  %}
 1.11394 +  ins_pipe(ialu_reg_mem);
 1.11395 +%}
 1.11396 +
 1.11397 +instruct safePoint_poll_far(rFlagsReg cr, rRegP poll)
 1.11398 +%{
 1.11399 +  predicate(Assembler::is_polling_page_far());
 1.11400 +  match(SafePoint poll);
 1.11401 +  effect(KILL cr, USE poll);
 1.11402 +
 1.11403 +  format %{ "testl  rax, [$poll]\t"
 1.11404 +            "# Safepoint: poll for GC" %}
 1.11405 +  ins_cost(125);
 1.11406 +  ins_encode %{
 1.11407 +    __ relocate(relocInfo::poll_type);
 1.11408 +    __ testl(rax, Address($poll$$Register, 0));
 1.11409 +  %}
 1.11410 +  ins_pipe(ialu_reg_mem);
 1.11411 +%}
 1.11412 +
 1.11413 +// ============================================================================
 1.11414 +// Procedure Call/Return Instructions
 1.11415 +// Call Java Static Instruction
 1.11416 +// Note: If this code changes, the corresponding ret_addr_offset() and
 1.11417 +//       compute_padding() functions will have to be adjusted.
 1.11418 +instruct CallStaticJavaDirect(method meth) %{
 1.11419 +  match(CallStaticJava);
 1.11420 +  predicate(!((CallStaticJavaNode*) n)->is_method_handle_invoke());
 1.11421 +  effect(USE meth);
 1.11422 +
 1.11423 +  ins_cost(300);
 1.11424 +  format %{ "call,static " %}
 1.11425 +  opcode(0xE8); /* E8 cd */
 1.11426 +  ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
 1.11427 +  ins_pipe(pipe_slow);
 1.11428 +  ins_alignment(4);
 1.11429 +%}
 1.11430 +
 1.11431 +// Call Java Static Instruction (method handle version)
 1.11432 +// Note: If this code changes, the corresponding ret_addr_offset() and
 1.11433 +//       compute_padding() functions will have to be adjusted.
 1.11434 +instruct CallStaticJavaHandle(method meth, rbp_RegP rbp_mh_SP_save) %{
 1.11435 +  match(CallStaticJava);
 1.11436 +  predicate(((CallStaticJavaNode*) n)->is_method_handle_invoke());
 1.11437 +  effect(USE meth);
 1.11438 +  // RBP is saved by all callees (for interpreter stack correction).
 1.11439 +  // We use it here for a similar purpose, in {preserve,restore}_SP.
 1.11440 +
 1.11441 +  ins_cost(300);
 1.11442 +  format %{ "call,static/MethodHandle " %}
 1.11443 +  opcode(0xE8); /* E8 cd */
 1.11444 +  ins_encode(clear_avx, preserve_SP,
 1.11445 +             Java_Static_Call(meth),
 1.11446 +             restore_SP,
 1.11447 +             call_epilog);
 1.11448 +  ins_pipe(pipe_slow);
 1.11449 +  ins_alignment(4);
 1.11450 +%}
 1.11451 +
 1.11452 +// Call Java Dynamic Instruction
 1.11453 +// Note: If this code changes, the corresponding ret_addr_offset() and
 1.11454 +//       compute_padding() functions will have to be adjusted.
 1.11455 +instruct CallDynamicJavaDirect(method meth)
 1.11456 +%{
 1.11457 +  match(CallDynamicJava);
 1.11458 +  effect(USE meth);
 1.11459 +
 1.11460 +  ins_cost(300);
 1.11461 +  format %{ "movq    rax, #Universe::non_oop_word()\n\t"
 1.11462 +            "call,dynamic " %}
 1.11463 +  ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
 1.11464 +  ins_pipe(pipe_slow);
 1.11465 +  ins_alignment(4);
 1.11466 +%}
 1.11467 +
 1.11468 +// Call Runtime Instruction
 1.11469 +instruct CallRuntimeDirect(method meth)
 1.11470 +%{
 1.11471 +  match(CallRuntime);
 1.11472 +  effect(USE meth);
 1.11473 +
 1.11474 +  ins_cost(300);
 1.11475 +  format %{ "call,runtime " %}
 1.11476 +  ins_encode(clear_avx, Java_To_Runtime(meth));
 1.11477 +  ins_pipe(pipe_slow);
 1.11478 +%}
 1.11479 +
 1.11480 +// Call runtime without safepoint
 1.11481 +instruct CallLeafDirect(method meth)
 1.11482 +%{
 1.11483 +  match(CallLeaf);
 1.11484 +  effect(USE meth);
 1.11485 +
 1.11486 +  ins_cost(300);
 1.11487 +  format %{ "call_leaf,runtime " %}
 1.11488 +  ins_encode(clear_avx, Java_To_Runtime(meth));
 1.11489 +  ins_pipe(pipe_slow);
 1.11490 +%}
 1.11491 +
 1.11492 +// Call runtime without safepoint
 1.11493 +instruct CallLeafNoFPDirect(method meth)
 1.11494 +%{
 1.11495 +  match(CallLeafNoFP);
 1.11496 +  effect(USE meth);
 1.11497 +
 1.11498 +  ins_cost(300);
 1.11499 +  format %{ "call_leaf_nofp,runtime " %}
 1.11500 +  ins_encode(Java_To_Runtime(meth));
 1.11501 +  ins_pipe(pipe_slow);
 1.11502 +%}
 1.11503 +
 1.11504 +// Return Instruction
 1.11505 +// Remove the return address & jump to it.
 1.11506 +// Notice: We always emit a nop after a ret to make sure there is room
 1.11507 +// for safepoint patching
 1.11508 +instruct Ret()
 1.11509 +%{
 1.11510 +  match(Return);
 1.11511 +
 1.11512 +  format %{ "ret" %}
 1.11513 +  opcode(0xC3);
 1.11514 +  ins_encode(OpcP);
 1.11515 +  ins_pipe(pipe_jmp);
 1.11516 +%}
 1.11517 +
 1.11518 +// Tail Call; Jump from runtime stub to Java code.
 1.11519 +// Also known as an 'interprocedural jump'.
 1.11520 +// Target of jump will eventually return to caller.
 1.11521 +// TailJump below removes the return address.
 1.11522 +instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_oop)
 1.11523 +%{
 1.11524 +  match(TailCall jump_target method_oop);
 1.11525 +
 1.11526 +  ins_cost(300);
 1.11527 +  format %{ "jmp     $jump_target\t# rbx holds method oop" %}
 1.11528 +  opcode(0xFF, 0x4); /* Opcode FF /4 */
 1.11529 +  ins_encode(REX_reg(jump_target), OpcP, reg_opc(jump_target));
 1.11530 +  ins_pipe(pipe_jmp);
 1.11531 +%}
 1.11532 +
 1.11533 +// Tail Jump; remove the return address; jump to target.
 1.11534 +// TailCall above leaves the return address around.
 1.11535 +instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
 1.11536 +%{
 1.11537 +  match(TailJump jump_target ex_oop);
 1.11538 +
 1.11539 +  ins_cost(300);
 1.11540 +  format %{ "popq    rdx\t# pop return address\n\t"
 1.11541 +            "jmp     $jump_target" %}
 1.11542 +  opcode(0xFF, 0x4); /* Opcode FF /4 */
 1.11543 +  ins_encode(Opcode(0x5a), // popq rdx
 1.11544 +             REX_reg(jump_target), OpcP, reg_opc(jump_target));
 1.11545 +  ins_pipe(pipe_jmp);
 1.11546 +%}
 1.11547 +
 1.11548 +// Create exception oop: created by stack-crawling runtime code.
 1.11549 +// Created exception is now available to this handler, and is setup
 1.11550 +// just prior to jumping to this handler.  No code emitted.
 1.11551 +instruct CreateException(rax_RegP ex_oop)
 1.11552 +%{
 1.11553 +  match(Set ex_oop (CreateEx));
 1.11554 +
 1.11555 +  size(0);
 1.11556 +  // use the following format syntax
 1.11557 +  format %{ "# exception oop is in rax; no code emitted" %}
 1.11558 +  ins_encode();
 1.11559 +  ins_pipe(empty);
 1.11560 +%}
 1.11561 +
 1.11562 +// Rethrow exception:
 1.11563 +// The exception oop will come in the first argument position.
 1.11564 +// Then JUMP (not call) to the rethrow stub code.
 1.11565 +instruct RethrowException()
 1.11566 +%{
 1.11567 +  match(Rethrow);
 1.11568 +
 1.11569 +  // use the following format syntax
 1.11570 +  format %{ "jmp     rethrow_stub" %}
 1.11571 +  ins_encode(enc_rethrow);
 1.11572 +  ins_pipe(pipe_jmp);
 1.11573 +%}
 1.11574 +
 1.11575 +
 1.11576 +// ============================================================================
 1.11577 +// This name is KNOWN by the ADLC and cannot be changed.
 1.11578 +// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
 1.11579 +// for this guy.
 1.11580 +instruct tlsLoadP(r15_RegP dst) %{
 1.11581 +  match(Set dst (ThreadLocal));
 1.11582 +  effect(DEF dst);
 1.11583 +
 1.11584 +  size(0);
 1.11585 +  format %{ "# TLS is in R15" %}
 1.11586 +  ins_encode( /*empty encoding*/ );
 1.11587 +  ins_pipe(ialu_reg_reg);
 1.11588 +%}
 1.11589 +
 1.11590 +
 1.11591 +//----------PEEPHOLE RULES-----------------------------------------------------
 1.11592 +// These must follow all instruction definitions as they use the names
 1.11593 +// defined in the instructions definitions.
 1.11594 +//
 1.11595 +// peepmatch ( root_instr_name [preceding_instruction]* );
 1.11596 +//
 1.11597 +// peepconstraint %{
 1.11598 +// (instruction_number.operand_name relational_op instruction_number.operand_name
 1.11599 +//  [, ...] );
 1.11600 +// // instruction numbers are zero-based using left to right order in peepmatch
 1.11601 +//
 1.11602 +// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
 1.11603 +// // provide an instruction_number.operand_name for each operand that appears
 1.11604 +// // in the replacement instruction's match rule
 1.11605 +//
 1.11606 +// ---------VM FLAGS---------------------------------------------------------
 1.11607 +//
 1.11608 +// All peephole optimizations can be turned off using -XX:-OptoPeephole
 1.11609 +//
 1.11610 +// Each peephole rule is given an identifying number starting with zero and
 1.11611 +// increasing by one in the order seen by the parser.  An individual peephole
 1.11612 +// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
 1.11613 +// on the command-line.
 1.11614 +//
 1.11615 +// ---------CURRENT LIMITATIONS----------------------------------------------
 1.11616 +//
 1.11617 +// Only match adjacent instructions in same basic block
 1.11618 +// Only equality constraints
 1.11619 +// Only constraints between operands, not (0.dest_reg == RAX_enc)
 1.11620 +// Only one replacement instruction
 1.11621 +//
 1.11622 +// ---------EXAMPLE----------------------------------------------------------
 1.11623 +//
 1.11624 +// // pertinent parts of existing instructions in architecture description
 1.11625 +// instruct movI(rRegI dst, rRegI src)
 1.11626 +// %{
 1.11627 +//   match(Set dst (CopyI src));
 1.11628 +// %}
 1.11629 +//
 1.11630 +// instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
 1.11631 +// %{
 1.11632 +//   match(Set dst (AddI dst src));
 1.11633 +//   effect(KILL cr);
 1.11634 +// %}
 1.11635 +//
 1.11636 +// // Change (inc mov) to lea
 1.11637 +// peephole %{
 1.11638 +//   // increment preceeded by register-register move
 1.11639 +//   peepmatch ( incI_rReg movI );
 1.11640 +//   // require that the destination register of the increment
 1.11641 +//   // match the destination register of the move
 1.11642 +//   peepconstraint ( 0.dst == 1.dst );
 1.11643 +//   // construct a replacement instruction that sets
 1.11644 +//   // the destination to ( move's source register + one )
 1.11645 +//   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
 1.11646 +// %}
 1.11647 +//
 1.11648 +
 1.11649 +// Implementation no longer uses movX instructions since
 1.11650 +// machine-independent system no longer uses CopyX nodes.
 1.11651 +//
 1.11652 +// peephole
 1.11653 +// %{
 1.11654 +//   peepmatch (incI_rReg movI);
 1.11655 +//   peepconstraint (0.dst == 1.dst);
 1.11656 +//   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
 1.11657 +// %}
 1.11658 +
 1.11659 +// peephole
 1.11660 +// %{
 1.11661 +//   peepmatch (decI_rReg movI);
 1.11662 +//   peepconstraint (0.dst == 1.dst);
 1.11663 +//   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
 1.11664 +// %}
 1.11665 +
 1.11666 +// peephole
 1.11667 +// %{
 1.11668 +//   peepmatch (addI_rReg_imm movI);
 1.11669 +//   peepconstraint (0.dst == 1.dst);
 1.11670 +//   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
 1.11671 +// %}
 1.11672 +
 1.11673 +// peephole
 1.11674 +// %{
 1.11675 +//   peepmatch (incL_rReg movL);
 1.11676 +//   peepconstraint (0.dst == 1.dst);
 1.11677 +//   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
 1.11678 +// %}
 1.11679 +
 1.11680 +// peephole
 1.11681 +// %{
 1.11682 +//   peepmatch (decL_rReg movL);
 1.11683 +//   peepconstraint (0.dst == 1.dst);
 1.11684 +//   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
 1.11685 +// %}
 1.11686 +
 1.11687 +// peephole
 1.11688 +// %{
 1.11689 +//   peepmatch (addL_rReg_imm movL);
 1.11690 +//   peepconstraint (0.dst == 1.dst);
 1.11691 +//   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
 1.11692 +// %}
 1.11693 +
 1.11694 +// peephole
 1.11695 +// %{
 1.11696 +//   peepmatch (addP_rReg_imm movP);
 1.11697 +//   peepconstraint (0.dst == 1.dst);
 1.11698 +//   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
 1.11699 +// %}
 1.11700 +
 1.11701 +// // Change load of spilled value to only a spill
 1.11702 +// instruct storeI(memory mem, rRegI src)
 1.11703 +// %{
 1.11704 +//   match(Set mem (StoreI mem src));
 1.11705 +// %}
 1.11706 +//
 1.11707 +// instruct loadI(rRegI dst, memory mem)
 1.11708 +// %{
 1.11709 +//   match(Set dst (LoadI mem));
 1.11710 +// %}
 1.11711 +//
 1.11712 +
 1.11713 +peephole
 1.11714 +%{
 1.11715 +  peepmatch (loadI storeI);
 1.11716 +  peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
 1.11717 +  peepreplace (storeI(1.mem 1.mem 1.src));
 1.11718 +%}
 1.11719 +
 1.11720 +peephole
 1.11721 +%{
 1.11722 +  peepmatch (loadL storeL);
 1.11723 +  peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
 1.11724 +  peepreplace (storeL(1.mem 1.mem 1.src));
 1.11725 +%}
 1.11726 +
 1.11727 +//----------SMARTSPILL RULES---------------------------------------------------
 1.11728 +// These must follow all instruction definitions as they use the names
 1.11729 +// defined in the instructions definitions.

mercurial