src/cpu/x86/vm/x86.ad

Fri, 15 Jun 2012 01:25:19 -0700

author
kvn
date
Fri, 15 Jun 2012 01:25:19 -0700
changeset 3882
8c92982cbbc4
parent 3577
9b8ce46870df
child 3886
6f8f439e247d
permissions
-rw-r--r--

7119644: Increase superword's vector size up to 256 bits
Summary: Increase vector size up to 256-bits for YMM AVX registers on x86.
Reviewed-by: never, twisti, roland

     1 //
     2 // Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
     3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4 //
     5 // This code is free software; you can redistribute it and/or modify it
     6 // under the terms of the GNU General Public License version 2 only, as
     7 // published by the Free Software Foundation.
     8 //
     9 // This code is distributed in the hope that it will be useful, but WITHOUT
    10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12 // version 2 for more details (a copy is included in the LICENSE file that
    13 // accompanied this code).
    14 //
    15 // You should have received a copy of the GNU General Public License version
    16 // 2 along with this work; if not, write to the Free Software Foundation,
    17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18 //
    19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    20 // or visit www.oracle.com if you need additional information or have any
    21 // questions.
    22 //
    23 //
    25 // X86 Common Architecture Description File
    27 //----------REGISTER DEFINITION BLOCK------------------------------------------
    28 // This information is used by the matcher and the register allocator to
    29 // describe individual registers and classes of registers within the target
    30 // archtecture.
    32 register %{
    33 //----------Architecture Description Register Definitions----------------------
    34 // General Registers
    35 // "reg_def"  name ( register save type, C convention save type,
    36 //                   ideal register type, encoding );
    37 // Register Save Types:
    38 //
    39 // NS  = No-Save:       The register allocator assumes that these registers
    40 //                      can be used without saving upon entry to the method, &
    41 //                      that they do not need to be saved at call sites.
    42 //
    43 // SOC = Save-On-Call:  The register allocator assumes that these registers
    44 //                      can be used without saving upon entry to the method,
    45 //                      but that they must be saved at call sites.
    46 //
    47 // SOE = Save-On-Entry: The register allocator assumes that these registers
    48 //                      must be saved before using them upon entry to the
    49 //                      method, but they do not need to be saved at call
    50 //                      sites.
    51 //
    52 // AS  = Always-Save:   The register allocator assumes that these registers
    53 //                      must be saved before using them upon entry to the
    54 //                      method, & that they must be saved at call sites.
    55 //
    56 // Ideal Register Type is used to determine how to save & restore a
    57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
    58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
    59 //
    60 // The encoding number is the actual bit-pattern placed into the opcodes.
    62 // XMM registers.  256-bit registers or 8 words each, labeled (a)-h.
    63 // Word a in each register holds a Float, words ab hold a Double.
    64 // The whole registers are used in SSE4.2 version intrinsics,
    65 // array copy stubs and superword operations (see UseSSE42Intrinsics,
    66 // UseXMMForArrayCopy and UseSuperword flags).
    67 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX).
    68 // Linux ABI:   No register preserved across function calls
    69 //              XMM0-XMM7 might hold parameters
    70 // Windows ABI: XMM6-XMM15 preserved across function calls
    71 //              XMM0-XMM3 might hold parameters
    73 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
    74 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next());
    75 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next());
    76 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next());
    77 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next());
    78 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next());
    79 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next()->next());
    80 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
    82 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
    83 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next());
    84 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next());
    85 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next());
    86 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next());
    87 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next());
    88 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next()->next());
    89 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
    91 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
    92 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next());
    93 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next());
    94 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next());
    95 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next());
    96 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next());
    97 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next()->next());
    98 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
   100 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
   101 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next());
   102 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next());
   103 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next());
   104 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next());
   105 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next());
   106 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next()->next());
   107 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
   109 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
   110 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next());
   111 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next());
   112 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next());
   113 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next());
   114 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next());
   115 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next()->next());
   116 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
   118 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
   119 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next());
   120 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next());
   121 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next());
   122 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next());
   123 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next());
   124 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next()->next());
   125 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
   127 #ifdef _WIN64
   129 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg());
   130 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next());
   131 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next());
   132 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next());
   133 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next());
   134 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next());
   135 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next());
   136 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
   138 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg());
   139 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next());
   140 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next());
   141 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next());
   142 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next());
   143 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next());
   144 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next());
   145 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
   147 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg());
   148 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next());
   149 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next());
   150 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next());
   151 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next());
   152 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next());
   153 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next());
   154 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
   156 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg());
   157 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next());
   158 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next());
   159 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next());
   160 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next());
   161 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next());
   162 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next());
   163 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
   165 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
   166 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next());
   167 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next());
   168 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next());
   169 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next());
   170 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next());
   171 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next());
   172 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
   174 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
   175 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next());
   176 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next());
   177 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next());
   178 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next());
   179 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next());
   180 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next());
   181 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
   183 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
   184 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next());
   185 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next());
   186 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next());
   187 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next());
   188 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next());
   189 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next());
   190 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
   192 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
   193 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next());
   194 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next());
   195 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next());
   196 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next());
   197 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next());
   198 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next());
   199 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
   201 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
   202 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next());
   203 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next());
   204 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next());
   205 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next());
   206 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next());
   207 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next());
   208 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
   210 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
   211 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next());
   212 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next());
   213 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next());
   214 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next());
   215 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next());
   216 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next());
   217 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
   219 #else // _WIN64
   221 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
   222 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next());
   223 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next());
   224 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next());
   225 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next());
   226 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next());
   227 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next());
   228 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
   230 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
   231 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next());
   232 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next());
   233 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next());
   234 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next());
   235 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next());
   236 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next());
   237 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
   239 #ifdef _LP64
   241 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
   242 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next());
   243 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next());
   244 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next());
   245 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next());
   246 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next());
   247 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next());
   248 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
   250 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
   251 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next());
   252 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next());
   253 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next());
   254 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next());
   255 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next());
   256 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next());
   257 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
   259 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
   260 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next());
   261 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next());
   262 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next());
   263 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next());
   264 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next());
   265 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next());
   266 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
   268 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
   269 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next());
   270 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next());
   271 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next());
   272 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next());
   273 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next());
   274 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next());
   275 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
   277 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
   278 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next());
   279 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next());
   280 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next());
   281 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next());
   282 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next());
   283 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next());
   284 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
   286 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
   287 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next());
   288 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next());
   289 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next());
   290 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next());
   291 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next());
   292 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next());
   293 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
   295 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
   296 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next());
   297 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next());
   298 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next());
   299 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next());
   300 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next());
   301 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next());
   302 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
   304 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
   305 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next());
   306 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next());
   307 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next());
   308 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next());
   309 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next());
   310 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next());
   311 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
   313 #endif // _LP64
   315 #endif // _WIN64
   317 #ifdef _LP64
   318 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
   319 #else
   320 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
   321 #endif // _LP64
   323 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
   324                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
   325                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
   326                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
   327                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
   328                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
   329                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
   330                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h
   331 #ifdef _LP64
   332                   ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
   333                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
   334                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
   335                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
   336                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
   337                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
   338                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
   339                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
   340 #endif
   341                    );
   343 // flags allocation class should be last.
   344 alloc_class chunk2(RFLAGS);
   346 // Singleton class for condition codes
   347 reg_class int_flags(RFLAGS);
   349 // Class for all float registers
   350 reg_class float_reg(XMM0,
   351                     XMM1,
   352                     XMM2,
   353                     XMM3,
   354                     XMM4,
   355                     XMM5,
   356                     XMM6,
   357                     XMM7
   358 #ifdef _LP64
   359                    ,XMM8,
   360                     XMM9,
   361                     XMM10,
   362                     XMM11,
   363                     XMM12,
   364                     XMM13,
   365                     XMM14,
   366                     XMM15
   367 #endif
   368                     );
   370 // Class for all double registers
   371 reg_class double_reg(XMM0,  XMM0b,
   372                      XMM1,  XMM1b,
   373                      XMM2,  XMM2b,
   374                      XMM3,  XMM3b,
   375                      XMM4,  XMM4b,
   376                      XMM5,  XMM5b,
   377                      XMM6,  XMM6b,
   378                      XMM7,  XMM7b
   379 #ifdef _LP64
   380                     ,XMM8,  XMM8b,
   381                      XMM9,  XMM9b,
   382                      XMM10, XMM10b,
   383                      XMM11, XMM11b,
   384                      XMM12, XMM12b,
   385                      XMM13, XMM13b,
   386                      XMM14, XMM14b,
   387                      XMM15, XMM15b
   388 #endif
   389                      );
   391 // Class for all 32bit vector registers
   392 reg_class vectors_reg(XMM0,
   393                       XMM1,
   394                       XMM2,
   395                       XMM3,
   396                       XMM4,
   397                       XMM5,
   398                       XMM6,
   399                       XMM7
   400 #ifdef _LP64
   401                      ,XMM8,
   402                       XMM9,
   403                       XMM10,
   404                       XMM11,
   405                       XMM12,
   406                       XMM13,
   407                       XMM14,
   408                       XMM15
   409 #endif
   410                       );
   412 // Class for all 64bit vector registers
   413 reg_class vectord_reg(XMM0,  XMM0b,
   414                       XMM1,  XMM1b,
   415                       XMM2,  XMM2b,
   416                       XMM3,  XMM3b,
   417                       XMM4,  XMM4b,
   418                       XMM5,  XMM5b,
   419                       XMM6,  XMM6b,
   420                       XMM7,  XMM7b
   421 #ifdef _LP64
   422                      ,XMM8,  XMM8b,
   423                       XMM9,  XMM9b,
   424                       XMM10, XMM10b,
   425                       XMM11, XMM11b,
   426                       XMM12, XMM12b,
   427                       XMM13, XMM13b,
   428                       XMM14, XMM14b,
   429                       XMM15, XMM15b
   430 #endif
   431                       );
   433 // Class for all 128bit vector registers
   434 reg_class vectorx_reg(XMM0,  XMM0b,  XMM0c,  XMM0d,
   435                       XMM1,  XMM1b,  XMM1c,  XMM1d,
   436                       XMM2,  XMM2b,  XMM2c,  XMM2d,
   437                       XMM3,  XMM3b,  XMM3c,  XMM3d,
   438                       XMM4,  XMM4b,  XMM4c,  XMM4d,
   439                       XMM5,  XMM5b,  XMM5c,  XMM5d,
   440                       XMM6,  XMM6b,  XMM6c,  XMM6d,
   441                       XMM7,  XMM7b,  XMM7c,  XMM7d
   442 #ifdef _LP64
   443                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,
   444                       XMM9,  XMM9b,  XMM9c,  XMM9d,
   445                       XMM10, XMM10b, XMM10c, XMM10d,
   446                       XMM11, XMM11b, XMM11c, XMM11d,
   447                       XMM12, XMM12b, XMM12c, XMM12d,
   448                       XMM13, XMM13b, XMM13c, XMM13d,
   449                       XMM14, XMM14b, XMM14c, XMM14d,
   450                       XMM15, XMM15b, XMM15c, XMM15d
   451 #endif
   452                       );
   454 // Class for all 256bit vector registers
   455 reg_class vectory_reg(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
   456                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
   457                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
   458                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
   459                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
   460                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
   461                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
   462                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h
   463 #ifdef _LP64
   464                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
   465                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
   466                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
   467                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
   468                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
   469                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
   470                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
   471                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
   472 #endif
   473                       );
   475 %}
   477 source %{
   478   // Float masks come from different places depending on platform.
   479 #ifdef _LP64
   480   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
   481   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
   482   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
   483   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
   484 #else
   485   static address float_signmask()  { return (address)float_signmask_pool; }
   486   static address float_signflip()  { return (address)float_signflip_pool; }
   487   static address double_signmask() { return (address)double_signmask_pool; }
   488   static address double_signflip() { return (address)double_signflip_pool; }
   489 #endif
   491 // Map Types to machine register types
   492 const int Matcher::base2reg[Type::lastype] = {
   493   Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN,
   494   Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */
   495   Op_VecS, Op_VecD, Op_VecX, Op_VecY, /* Vectors */
   496   Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */
   497   0, 0/*abio*/,
   498   Op_RegP /* Return address */, 0, /* the memories */
   499   Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD,
   500   0  /*bottom*/
   501 };
   503 // Max vector size in bytes. 0 if not supported.
   504 const int Matcher::vector_width_in_bytes(BasicType bt) {
   505   assert(is_java_primitive(bt), "only primitive type vectors");
   506   if (UseSSE < 2) return 0;
   507   // SSE2 supports 128bit vectors for all types.
   508   // AVX2 supports 256bit vectors for all types.
   509   int size = (UseAVX > 1) ? 32 : 16;
   510   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
   511   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
   512     size = 32;
   513   // Use flag to limit vector size.
   514   size = MIN2(size,(int)MaxVectorSize);
   515   // Minimum 2 values in vector (or 4 for bytes).
   516   switch (bt) {
   517   case T_DOUBLE:
   518   case T_LONG:
   519     if (size < 16) return 0;
   520   case T_FLOAT:
   521   case T_INT:
   522     if (size < 8) return 0;
   523   case T_BOOLEAN:
   524   case T_BYTE:
   525   case T_CHAR:
   526   case T_SHORT:
   527     if (size < 4) return 0;
   528     break;
   529   default:
   530     ShouldNotReachHere();
   531   }
   532   return size;
   533 }
   535 // Limits on vector size (number of elements) loaded into vector.
   536 const int Matcher::max_vector_size(const BasicType bt) {
   537   return vector_width_in_bytes(bt)/type2aelembytes(bt);
   538 }
   539 const int Matcher::min_vector_size(const BasicType bt) {
   540   int max_size = max_vector_size(bt);
   541   // Min size which can be loaded into vector is 4 bytes.
   542   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
   543   return MIN2(size,max_size);
   544 }
   546 // Vector ideal reg corresponding to specidied size in bytes
   547 const int Matcher::vector_ideal_reg(int size) {
   548   assert(MaxVectorSize >= size, "");
   549   switch(size) {
   550     case  4: return Op_VecS;
   551     case  8: return Op_VecD;
   552     case 16: return Op_VecX;
   553     case 32: return Op_VecY;
   554   }
   555   ShouldNotReachHere();
   556   return 0;
   557 }
   559 // x86 supports misaligned vectors store/load.
   560 const bool Matcher::misaligned_vectors_ok() {
   561   return !AlignVector; // can be changed by flag
   562 }
   564 // Helper methods for MachSpillCopyNode::implementation().
   565 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
   566                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
   567   // In 64-bit VM size calculation is very complex. Emitting instructions
   568   // into scratch buffer is used to get size in 64-bit VM.
   569   LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
   570   assert(ireg == Op_VecS || // 32bit vector
   571          (src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
   572          (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
   573          "no non-adjacent vector moves" );
   574   if (cbuf) {
   575     MacroAssembler _masm(cbuf);
   576     int offset = __ offset();
   577     switch (ireg) {
   578     case Op_VecS: // copy whole register
   579     case Op_VecD:
   580     case Op_VecX:
   581       __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
   582       break;
   583     case Op_VecY:
   584       __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
   585       break;
   586     default:
   587       ShouldNotReachHere();
   588     }
   589     int size = __ offset() - offset;
   590 #ifdef ASSERT
   591     // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
   592     assert(!do_size || size == 4, "incorrect size calculattion");
   593 #endif
   594     return size;
   595 #ifndef PRODUCT
   596   } else if (!do_size) {
   597     switch (ireg) {
   598     case Op_VecS:
   599     case Op_VecD:
   600     case Op_VecX:
   601       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
   602       break;
   603     case Op_VecY:
   604       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
   605       break;
   606     default:
   607       ShouldNotReachHere();
   608     }
   609 #endif
   610   }
   611   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
   612   return 4;
   613 }
   615 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
   616                             int stack_offset, int reg, uint ireg, outputStream* st) {
   617   // In 64-bit VM size calculation is very complex. Emitting instructions
   618   // into scratch buffer is used to get size in 64-bit VM.
   619   LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
   620   if (cbuf) {
   621     MacroAssembler _masm(cbuf);
   622     int offset = __ offset();
   623     if (is_load) {
   624       switch (ireg) {
   625       case Op_VecS:
   626         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
   627         break;
   628       case Op_VecD:
   629         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
   630         break;
   631       case Op_VecX:
   632         __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
   633         break;
   634       case Op_VecY:
   635         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
   636         break;
   637       default:
   638         ShouldNotReachHere();
   639       }
   640     } else { // store
   641       switch (ireg) {
   642       case Op_VecS:
   643         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
   644         break;
   645       case Op_VecD:
   646         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
   647         break;
   648       case Op_VecX:
   649         __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
   650         break;
   651       case Op_VecY:
   652         __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
   653         break;
   654       default:
   655         ShouldNotReachHere();
   656       }
   657     }
   658     int size = __ offset() - offset;
   659 #ifdef ASSERT
   660     int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
   661     // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
   662     assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
   663 #endif
   664     return size;
   665 #ifndef PRODUCT
   666   } else if (!do_size) {
   667     if (is_load) {
   668       switch (ireg) {
   669       case Op_VecS:
   670         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
   671         break;
   672       case Op_VecD:
   673         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
   674         break;
   675        case Op_VecX:
   676         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
   677         break;
   678       case Op_VecY:
   679         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
   680         break;
   681       default:
   682         ShouldNotReachHere();
   683       }
   684     } else { // store
   685       switch (ireg) {
   686       case Op_VecS:
   687         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
   688         break;
   689       case Op_VecD:
   690         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
   691         break;
   692        case Op_VecX:
   693         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
   694         break;
   695       case Op_VecY:
   696         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
   697         break;
   698       default:
   699         ShouldNotReachHere();
   700       }
   701     }
   702 #endif
   703   }
   704   int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
   705   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
   706   return 5+offset_size;
   707 }
   709 static inline jfloat replicate4_imm(int con, int width) {
   710   // Load a constant of "width" (in bytes) and replicate it to fill 32bit.
   711   assert(width == 1 || width == 2, "only byte or short types here");
   712   int bit_width = width * 8;
   713   jint val = con;
   714   val &= (1 << bit_width) - 1;  // mask off sign bits
   715   while(bit_width < 32) {
   716     val |= (val << bit_width);
   717     bit_width <<= 1;
   718   }
   719   jfloat fval = *((jfloat*) &val);  // coerce to float type
   720   return fval;
   721 }
   723 static inline jdouble replicate8_imm(int con, int width) {
   724   // Load a constant of "width" (in bytes) and replicate it to fill 64bit.
   725   assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here");
   726   int bit_width = width * 8;
   727   jlong val = con;
   728   val &= (((jlong) 1) << bit_width) - 1;  // mask off sign bits
   729   while(bit_width < 64) {
   730     val |= (val << bit_width);
   731     bit_width <<= 1;
   732   }
   733   jdouble dval = *((jdouble*) &val);  // coerce to double type
   734   return dval;
   735 }
   737 #ifndef PRODUCT
   738   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
   739     st->print("nop \t# %d bytes pad for loops and calls", _count);
   740   }
   741 #endif
   743   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
   744     MacroAssembler _masm(&cbuf);
   745     __ nop(_count);
   746   }
   748   uint MachNopNode::size(PhaseRegAlloc*) const {
   749     return _count;
   750   }
   752 #ifndef PRODUCT
   753   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
   754     st->print("# breakpoint");
   755   }
   756 #endif
   758   void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
   759     MacroAssembler _masm(&cbuf);
   760     __ int3();
   761   }
   763   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
   764     return MachNode::size(ra_);
   765   }
   767 %}
   769 encode %{
   771   enc_class preserve_SP %{
   772     debug_only(int off0 = cbuf.insts_size());
   773     MacroAssembler _masm(&cbuf);
   774     // RBP is preserved across all calls, even compiled calls.
   775     // Use it to preserve RSP in places where the callee might change the SP.
   776     __ movptr(rbp_mh_SP_save, rsp);
   777     debug_only(int off1 = cbuf.insts_size());
   778     assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
   779   %}
   781   enc_class restore_SP %{
   782     MacroAssembler _masm(&cbuf);
   783     __ movptr(rsp, rbp_mh_SP_save);
   784   %}
   786   enc_class call_epilog %{
   787     if (VerifyStackAtCalls) {
   788       // Check that stack depth is unchanged: find majik cookie on stack
   789       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
   790       MacroAssembler _masm(&cbuf);
   791       Label L;
   792       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
   793       __ jccb(Assembler::equal, L);
   794       // Die if stack mismatch
   795       __ int3();
   796       __ bind(L);
   797     }
   798   %}
   800 %}
   803 //----------OPERANDS-----------------------------------------------------------
   804 // Operand definitions must precede instruction definitions for correct parsing
   805 // in the ADLC because operands constitute user defined types which are used in
   806 // instruction definitions.
   808 // Vectors
   809 operand vecS() %{
   810   constraint(ALLOC_IN_RC(vectors_reg));
   811   match(VecS);
   813   format %{ %}
   814   interface(REG_INTER);
   815 %}
   817 operand vecD() %{
   818   constraint(ALLOC_IN_RC(vectord_reg));
   819   match(VecD);
   821   format %{ %}
   822   interface(REG_INTER);
   823 %}
   825 operand vecX() %{
   826   constraint(ALLOC_IN_RC(vectorx_reg));
   827   match(VecX);
   829   format %{ %}
   830   interface(REG_INTER);
   831 %}
   833 operand vecY() %{
   834   constraint(ALLOC_IN_RC(vectory_reg));
   835   match(VecY);
   837   format %{ %}
   838   interface(REG_INTER);
   839 %}
   842 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
   844 // ============================================================================
   846 instruct ShouldNotReachHere() %{
   847   match(Halt);
   848   format %{ "int3\t# ShouldNotReachHere" %}
   849   ins_encode %{
   850     __ int3();
   851   %}
   852   ins_pipe(pipe_slow);
   853 %}
   855 // ============================================================================
   857 instruct addF_reg(regF dst, regF src) %{
   858   predicate((UseSSE>=1) && (UseAVX == 0));
   859   match(Set dst (AddF dst src));
   861   format %{ "addss   $dst, $src" %}
   862   ins_cost(150);
   863   ins_encode %{
   864     __ addss($dst$$XMMRegister, $src$$XMMRegister);
   865   %}
   866   ins_pipe(pipe_slow);
   867 %}
   869 instruct addF_mem(regF dst, memory src) %{
   870   predicate((UseSSE>=1) && (UseAVX == 0));
   871   match(Set dst (AddF dst (LoadF src)));
   873   format %{ "addss   $dst, $src" %}
   874   ins_cost(150);
   875   ins_encode %{
   876     __ addss($dst$$XMMRegister, $src$$Address);
   877   %}
   878   ins_pipe(pipe_slow);
   879 %}
   881 instruct addF_imm(regF dst, immF con) %{
   882   predicate((UseSSE>=1) && (UseAVX == 0));
   883   match(Set dst (AddF dst con));
   884   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
   885   ins_cost(150);
   886   ins_encode %{
   887     __ addss($dst$$XMMRegister, $constantaddress($con));
   888   %}
   889   ins_pipe(pipe_slow);
   890 %}
   892 instruct vaddF_reg(regF dst, regF src1, regF src2) %{
   893   predicate(UseAVX > 0);
   894   match(Set dst (AddF src1 src2));
   896   format %{ "vaddss  $dst, $src1, $src2" %}
   897   ins_cost(150);
   898   ins_encode %{
   899     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
   900   %}
   901   ins_pipe(pipe_slow);
   902 %}
   904 instruct vaddF_mem(regF dst, regF src1, memory src2) %{
   905   predicate(UseAVX > 0);
   906   match(Set dst (AddF src1 (LoadF src2)));
   908   format %{ "vaddss  $dst, $src1, $src2" %}
   909   ins_cost(150);
   910   ins_encode %{
   911     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
   912   %}
   913   ins_pipe(pipe_slow);
   914 %}
   916 instruct vaddF_imm(regF dst, regF src, immF con) %{
   917   predicate(UseAVX > 0);
   918   match(Set dst (AddF src con));
   920   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
   921   ins_cost(150);
   922   ins_encode %{
   923     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
   924   %}
   925   ins_pipe(pipe_slow);
   926 %}
   928 instruct addD_reg(regD dst, regD src) %{
   929   predicate((UseSSE>=2) && (UseAVX == 0));
   930   match(Set dst (AddD dst src));
   932   format %{ "addsd   $dst, $src" %}
   933   ins_cost(150);
   934   ins_encode %{
   935     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
   936   %}
   937   ins_pipe(pipe_slow);
   938 %}
   940 instruct addD_mem(regD dst, memory src) %{
   941   predicate((UseSSE>=2) && (UseAVX == 0));
   942   match(Set dst (AddD dst (LoadD src)));
   944   format %{ "addsd   $dst, $src" %}
   945   ins_cost(150);
   946   ins_encode %{
   947     __ addsd($dst$$XMMRegister, $src$$Address);
   948   %}
   949   ins_pipe(pipe_slow);
   950 %}
   952 instruct addD_imm(regD dst, immD con) %{
   953   predicate((UseSSE>=2) && (UseAVX == 0));
   954   match(Set dst (AddD dst con));
   955   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
   956   ins_cost(150);
   957   ins_encode %{
   958     __ addsd($dst$$XMMRegister, $constantaddress($con));
   959   %}
   960   ins_pipe(pipe_slow);
   961 %}
   963 instruct vaddD_reg(regD dst, regD src1, regD src2) %{
   964   predicate(UseAVX > 0);
   965   match(Set dst (AddD src1 src2));
   967   format %{ "vaddsd  $dst, $src1, $src2" %}
   968   ins_cost(150);
   969   ins_encode %{
   970     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
   971   %}
   972   ins_pipe(pipe_slow);
   973 %}
   975 instruct vaddD_mem(regD dst, regD src1, memory src2) %{
   976   predicate(UseAVX > 0);
   977   match(Set dst (AddD src1 (LoadD src2)));
   979   format %{ "vaddsd  $dst, $src1, $src2" %}
   980   ins_cost(150);
   981   ins_encode %{
   982     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
   983   %}
   984   ins_pipe(pipe_slow);
   985 %}
   987 instruct vaddD_imm(regD dst, regD src, immD con) %{
   988   predicate(UseAVX > 0);
   989   match(Set dst (AddD src con));
   991   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
   992   ins_cost(150);
   993   ins_encode %{
   994     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
   995   %}
   996   ins_pipe(pipe_slow);
   997 %}
   999 instruct subF_reg(regF dst, regF src) %{
  1000   predicate((UseSSE>=1) && (UseAVX == 0));
  1001   match(Set dst (SubF dst src));
  1003   format %{ "subss   $dst, $src" %}
  1004   ins_cost(150);
  1005   ins_encode %{
  1006     __ subss($dst$$XMMRegister, $src$$XMMRegister);
  1007   %}
  1008   ins_pipe(pipe_slow);
  1009 %}
  1011 instruct subF_mem(regF dst, memory src) %{
  1012   predicate((UseSSE>=1) && (UseAVX == 0));
  1013   match(Set dst (SubF dst (LoadF src)));
  1015   format %{ "subss   $dst, $src" %}
  1016   ins_cost(150);
  1017   ins_encode %{
  1018     __ subss($dst$$XMMRegister, $src$$Address);
  1019   %}
  1020   ins_pipe(pipe_slow);
  1021 %}
  1023 instruct subF_imm(regF dst, immF con) %{
  1024   predicate((UseSSE>=1) && (UseAVX == 0));
  1025   match(Set dst (SubF dst con));
  1026   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
  1027   ins_cost(150);
  1028   ins_encode %{
  1029     __ subss($dst$$XMMRegister, $constantaddress($con));
  1030   %}
  1031   ins_pipe(pipe_slow);
  1032 %}
  1034 instruct vsubF_reg(regF dst, regF src1, regF src2) %{
  1035   predicate(UseAVX > 0);
  1036   match(Set dst (SubF src1 src2));
  1038   format %{ "vsubss  $dst, $src1, $src2" %}
  1039   ins_cost(150);
  1040   ins_encode %{
  1041     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  1042   %}
  1043   ins_pipe(pipe_slow);
  1044 %}
  1046 instruct vsubF_mem(regF dst, regF src1, memory src2) %{
  1047   predicate(UseAVX > 0);
  1048   match(Set dst (SubF src1 (LoadF src2)));
  1050   format %{ "vsubss  $dst, $src1, $src2" %}
  1051   ins_cost(150);
  1052   ins_encode %{
  1053     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
  1054   %}
  1055   ins_pipe(pipe_slow);
  1056 %}
  1058 instruct vsubF_imm(regF dst, regF src, immF con) %{
  1059   predicate(UseAVX > 0);
  1060   match(Set dst (SubF src con));
  1062   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
  1063   ins_cost(150);
  1064   ins_encode %{
  1065     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
  1066   %}
  1067   ins_pipe(pipe_slow);
  1068 %}
  1070 instruct subD_reg(regD dst, regD src) %{
  1071   predicate((UseSSE>=2) && (UseAVX == 0));
  1072   match(Set dst (SubD dst src));
  1074   format %{ "subsd   $dst, $src" %}
  1075   ins_cost(150);
  1076   ins_encode %{
  1077     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
  1078   %}
  1079   ins_pipe(pipe_slow);
  1080 %}
  1082 instruct subD_mem(regD dst, memory src) %{
  1083   predicate((UseSSE>=2) && (UseAVX == 0));
  1084   match(Set dst (SubD dst (LoadD src)));
  1086   format %{ "subsd   $dst, $src" %}
  1087   ins_cost(150);
  1088   ins_encode %{
  1089     __ subsd($dst$$XMMRegister, $src$$Address);
  1090   %}
  1091   ins_pipe(pipe_slow);
  1092 %}
  1094 instruct subD_imm(regD dst, immD con) %{
  1095   predicate((UseSSE>=2) && (UseAVX == 0));
  1096   match(Set dst (SubD dst con));
  1097   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
  1098   ins_cost(150);
  1099   ins_encode %{
  1100     __ subsd($dst$$XMMRegister, $constantaddress($con));
  1101   %}
  1102   ins_pipe(pipe_slow);
  1103 %}
  1105 instruct vsubD_reg(regD dst, regD src1, regD src2) %{
  1106   predicate(UseAVX > 0);
  1107   match(Set dst (SubD src1 src2));
  1109   format %{ "vsubsd  $dst, $src1, $src2" %}
  1110   ins_cost(150);
  1111   ins_encode %{
  1112     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  1113   %}
  1114   ins_pipe(pipe_slow);
  1115 %}
  1117 instruct vsubD_mem(regD dst, regD src1, memory src2) %{
  1118   predicate(UseAVX > 0);
  1119   match(Set dst (SubD src1 (LoadD src2)));
  1121   format %{ "vsubsd  $dst, $src1, $src2" %}
  1122   ins_cost(150);
  1123   ins_encode %{
  1124     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
  1125   %}
  1126   ins_pipe(pipe_slow);
  1127 %}
  1129 instruct vsubD_imm(regD dst, regD src, immD con) %{
  1130   predicate(UseAVX > 0);
  1131   match(Set dst (SubD src con));
  1133   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
  1134   ins_cost(150);
  1135   ins_encode %{
  1136     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
  1137   %}
  1138   ins_pipe(pipe_slow);
  1139 %}
  1141 instruct mulF_reg(regF dst, regF src) %{
  1142   predicate((UseSSE>=1) && (UseAVX == 0));
  1143   match(Set dst (MulF dst src));
  1145   format %{ "mulss   $dst, $src" %}
  1146   ins_cost(150);
  1147   ins_encode %{
  1148     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
  1149   %}
  1150   ins_pipe(pipe_slow);
  1151 %}
  1153 instruct mulF_mem(regF dst, memory src) %{
  1154   predicate((UseSSE>=1) && (UseAVX == 0));
  1155   match(Set dst (MulF dst (LoadF src)));
  1157   format %{ "mulss   $dst, $src" %}
  1158   ins_cost(150);
  1159   ins_encode %{
  1160     __ mulss($dst$$XMMRegister, $src$$Address);
  1161   %}
  1162   ins_pipe(pipe_slow);
  1163 %}
  1165 instruct mulF_imm(regF dst, immF con) %{
  1166   predicate((UseSSE>=1) && (UseAVX == 0));
  1167   match(Set dst (MulF dst con));
  1168   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
  1169   ins_cost(150);
  1170   ins_encode %{
  1171     __ mulss($dst$$XMMRegister, $constantaddress($con));
  1172   %}
  1173   ins_pipe(pipe_slow);
  1174 %}
  1176 instruct vmulF_reg(regF dst, regF src1, regF src2) %{
  1177   predicate(UseAVX > 0);
  1178   match(Set dst (MulF src1 src2));
  1180   format %{ "vmulss  $dst, $src1, $src2" %}
  1181   ins_cost(150);
  1182   ins_encode %{
  1183     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  1184   %}
  1185   ins_pipe(pipe_slow);
  1186 %}
  1188 instruct vmulF_mem(regF dst, regF src1, memory src2) %{
  1189   predicate(UseAVX > 0);
  1190   match(Set dst (MulF src1 (LoadF src2)));
  1192   format %{ "vmulss  $dst, $src1, $src2" %}
  1193   ins_cost(150);
  1194   ins_encode %{
  1195     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
  1196   %}
  1197   ins_pipe(pipe_slow);
  1198 %}
  1200 instruct vmulF_imm(regF dst, regF src, immF con) %{
  1201   predicate(UseAVX > 0);
  1202   match(Set dst (MulF src con));
  1204   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
  1205   ins_cost(150);
  1206   ins_encode %{
  1207     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
  1208   %}
  1209   ins_pipe(pipe_slow);
  1210 %}
  1212 instruct mulD_reg(regD dst, regD src) %{
  1213   predicate((UseSSE>=2) && (UseAVX == 0));
  1214   match(Set dst (MulD dst src));
  1216   format %{ "mulsd   $dst, $src" %}
  1217   ins_cost(150);
  1218   ins_encode %{
  1219     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
  1220   %}
  1221   ins_pipe(pipe_slow);
  1222 %}
  1224 instruct mulD_mem(regD dst, memory src) %{
  1225   predicate((UseSSE>=2) && (UseAVX == 0));
  1226   match(Set dst (MulD dst (LoadD src)));
  1228   format %{ "mulsd   $dst, $src" %}
  1229   ins_cost(150);
  1230   ins_encode %{
  1231     __ mulsd($dst$$XMMRegister, $src$$Address);
  1232   %}
  1233   ins_pipe(pipe_slow);
  1234 %}
  1236 instruct mulD_imm(regD dst, immD con) %{
  1237   predicate((UseSSE>=2) && (UseAVX == 0));
  1238   match(Set dst (MulD dst con));
  1239   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
  1240   ins_cost(150);
  1241   ins_encode %{
  1242     __ mulsd($dst$$XMMRegister, $constantaddress($con));
  1243   %}
  1244   ins_pipe(pipe_slow);
  1245 %}
  1247 instruct vmulD_reg(regD dst, regD src1, regD src2) %{
  1248   predicate(UseAVX > 0);
  1249   match(Set dst (MulD src1 src2));
  1251   format %{ "vmulsd  $dst, $src1, $src2" %}
  1252   ins_cost(150);
  1253   ins_encode %{
  1254     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  1255   %}
  1256   ins_pipe(pipe_slow);
  1257 %}
  1259 instruct vmulD_mem(regD dst, regD src1, memory src2) %{
  1260   predicate(UseAVX > 0);
  1261   match(Set dst (MulD src1 (LoadD src2)));
  1263   format %{ "vmulsd  $dst, $src1, $src2" %}
  1264   ins_cost(150);
  1265   ins_encode %{
  1266     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
  1267   %}
  1268   ins_pipe(pipe_slow);
  1269 %}
  1271 instruct vmulD_imm(regD dst, regD src, immD con) %{
  1272   predicate(UseAVX > 0);
  1273   match(Set dst (MulD src con));
  1275   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
  1276   ins_cost(150);
  1277   ins_encode %{
  1278     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
  1279   %}
  1280   ins_pipe(pipe_slow);
  1281 %}
  1283 instruct divF_reg(regF dst, regF src) %{
  1284   predicate((UseSSE>=1) && (UseAVX == 0));
  1285   match(Set dst (DivF dst src));
  1287   format %{ "divss   $dst, $src" %}
  1288   ins_cost(150);
  1289   ins_encode %{
  1290     __ divss($dst$$XMMRegister, $src$$XMMRegister);
  1291   %}
  1292   ins_pipe(pipe_slow);
  1293 %}
  1295 instruct divF_mem(regF dst, memory src) %{
  1296   predicate((UseSSE>=1) && (UseAVX == 0));
  1297   match(Set dst (DivF dst (LoadF src)));
  1299   format %{ "divss   $dst, $src" %}
  1300   ins_cost(150);
  1301   ins_encode %{
  1302     __ divss($dst$$XMMRegister, $src$$Address);
  1303   %}
  1304   ins_pipe(pipe_slow);
  1305 %}
  1307 instruct divF_imm(regF dst, immF con) %{
  1308   predicate((UseSSE>=1) && (UseAVX == 0));
  1309   match(Set dst (DivF dst con));
  1310   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
  1311   ins_cost(150);
  1312   ins_encode %{
  1313     __ divss($dst$$XMMRegister, $constantaddress($con));
  1314   %}
  1315   ins_pipe(pipe_slow);
  1316 %}
  1318 instruct vdivF_reg(regF dst, regF src1, regF src2) %{
  1319   predicate(UseAVX > 0);
  1320   match(Set dst (DivF src1 src2));
  1322   format %{ "vdivss  $dst, $src1, $src2" %}
  1323   ins_cost(150);
  1324   ins_encode %{
  1325     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  1326   %}
  1327   ins_pipe(pipe_slow);
  1328 %}
  1330 instruct vdivF_mem(regF dst, regF src1, memory src2) %{
  1331   predicate(UseAVX > 0);
  1332   match(Set dst (DivF src1 (LoadF src2)));
  1334   format %{ "vdivss  $dst, $src1, $src2" %}
  1335   ins_cost(150);
  1336   ins_encode %{
  1337     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
  1338   %}
  1339   ins_pipe(pipe_slow);
  1340 %}
  1342 instruct vdivF_imm(regF dst, regF src, immF con) %{
  1343   predicate(UseAVX > 0);
  1344   match(Set dst (DivF src con));
  1346   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
  1347   ins_cost(150);
  1348   ins_encode %{
  1349     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
  1350   %}
  1351   ins_pipe(pipe_slow);
  1352 %}
  1354 instruct divD_reg(regD dst, regD src) %{
  1355   predicate((UseSSE>=2) && (UseAVX == 0));
  1356   match(Set dst (DivD dst src));
  1358   format %{ "divsd   $dst, $src" %}
  1359   ins_cost(150);
  1360   ins_encode %{
  1361     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
  1362   %}
  1363   ins_pipe(pipe_slow);
  1364 %}
  1366 instruct divD_mem(regD dst, memory src) %{
  1367   predicate((UseSSE>=2) && (UseAVX == 0));
  1368   match(Set dst (DivD dst (LoadD src)));
  1370   format %{ "divsd   $dst, $src" %}
  1371   ins_cost(150);
  1372   ins_encode %{
  1373     __ divsd($dst$$XMMRegister, $src$$Address);
  1374   %}
  1375   ins_pipe(pipe_slow);
  1376 %}
  1378 instruct divD_imm(regD dst, immD con) %{
  1379   predicate((UseSSE>=2) && (UseAVX == 0));
  1380   match(Set dst (DivD dst con));
  1381   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
  1382   ins_cost(150);
  1383   ins_encode %{
  1384     __ divsd($dst$$XMMRegister, $constantaddress($con));
  1385   %}
  1386   ins_pipe(pipe_slow);
  1387 %}
  1389 instruct vdivD_reg(regD dst, regD src1, regD src2) %{
  1390   predicate(UseAVX > 0);
  1391   match(Set dst (DivD src1 src2));
  1393   format %{ "vdivsd  $dst, $src1, $src2" %}
  1394   ins_cost(150);
  1395   ins_encode %{
  1396     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  1397   %}
  1398   ins_pipe(pipe_slow);
  1399 %}
  1401 instruct vdivD_mem(regD dst, regD src1, memory src2) %{
  1402   predicate(UseAVX > 0);
  1403   match(Set dst (DivD src1 (LoadD src2)));
  1405   format %{ "vdivsd  $dst, $src1, $src2" %}
  1406   ins_cost(150);
  1407   ins_encode %{
  1408     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
  1409   %}
  1410   ins_pipe(pipe_slow);
  1411 %}
  1413 instruct vdivD_imm(regD dst, regD src, immD con) %{
  1414   predicate(UseAVX > 0);
  1415   match(Set dst (DivD src con));
  1417   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
  1418   ins_cost(150);
  1419   ins_encode %{
  1420     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
  1421   %}
  1422   ins_pipe(pipe_slow);
  1423 %}
  1425 instruct absF_reg(regF dst) %{
  1426   predicate((UseSSE>=1) && (UseAVX == 0));
  1427   match(Set dst (AbsF dst));
  1428   ins_cost(150);
  1429   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
  1430   ins_encode %{
  1431     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
  1432   %}
  1433   ins_pipe(pipe_slow);
  1434 %}
  1436 instruct vabsF_reg(regF dst, regF src) %{
  1437   predicate(UseAVX > 0);
  1438   match(Set dst (AbsF src));
  1439   ins_cost(150);
  1440   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
  1441   ins_encode %{
  1442     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
  1443               ExternalAddress(float_signmask()));
  1444   %}
  1445   ins_pipe(pipe_slow);
  1446 %}
  1448 instruct absD_reg(regD dst) %{
  1449   predicate((UseSSE>=2) && (UseAVX == 0));
  1450   match(Set dst (AbsD dst));
  1451   ins_cost(150);
  1452   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
  1453             "# abs double by sign masking" %}
  1454   ins_encode %{
  1455     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
  1456   %}
  1457   ins_pipe(pipe_slow);
  1458 %}
  1460 instruct vabsD_reg(regD dst, regD src) %{
  1461   predicate(UseAVX > 0);
  1462   match(Set dst (AbsD src));
  1463   ins_cost(150);
  1464   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
  1465             "# abs double by sign masking" %}
  1466   ins_encode %{
  1467     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
  1468               ExternalAddress(double_signmask()));
  1469   %}
  1470   ins_pipe(pipe_slow);
  1471 %}
  1473 instruct negF_reg(regF dst) %{
  1474   predicate((UseSSE>=1) && (UseAVX == 0));
  1475   match(Set dst (NegF dst));
  1476   ins_cost(150);
  1477   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
  1478   ins_encode %{
  1479     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
  1480   %}
  1481   ins_pipe(pipe_slow);
  1482 %}
  1484 instruct vnegF_reg(regF dst, regF src) %{
  1485   predicate(UseAVX > 0);
  1486   match(Set dst (NegF src));
  1487   ins_cost(150);
  1488   format %{ "vxorps  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
  1489   ins_encode %{
  1490     __ vxorps($dst$$XMMRegister, $src$$XMMRegister,
  1491               ExternalAddress(float_signflip()));
  1492   %}
  1493   ins_pipe(pipe_slow);
  1494 %}
  1496 instruct negD_reg(regD dst) %{
  1497   predicate((UseSSE>=2) && (UseAVX == 0));
  1498   match(Set dst (NegD dst));
  1499   ins_cost(150);
  1500   format %{ "xorpd   $dst, [0x8000000000000000]\t"
  1501             "# neg double by sign flipping" %}
  1502   ins_encode %{
  1503     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
  1504   %}
  1505   ins_pipe(pipe_slow);
  1506 %}
  1508 instruct vnegD_reg(regD dst, regD src) %{
  1509   predicate(UseAVX > 0);
  1510   match(Set dst (NegD src));
  1511   ins_cost(150);
  1512   format %{ "vxorpd  $dst, $src, [0x8000000000000000]\t"
  1513             "# neg double by sign flipping" %}
  1514   ins_encode %{
  1515     __ vxorpd($dst$$XMMRegister, $src$$XMMRegister,
  1516               ExternalAddress(double_signflip()));
  1517   %}
  1518   ins_pipe(pipe_slow);
  1519 %}
  1521 instruct sqrtF_reg(regF dst, regF src) %{
  1522   predicate(UseSSE>=1);
  1523   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
  1525   format %{ "sqrtss  $dst, $src" %}
  1526   ins_cost(150);
  1527   ins_encode %{
  1528     __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
  1529   %}
  1530   ins_pipe(pipe_slow);
  1531 %}
  1533 instruct sqrtF_mem(regF dst, memory src) %{
  1534   predicate(UseSSE>=1);
  1535   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
  1537   format %{ "sqrtss  $dst, $src" %}
  1538   ins_cost(150);
  1539   ins_encode %{
  1540     __ sqrtss($dst$$XMMRegister, $src$$Address);
  1541   %}
  1542   ins_pipe(pipe_slow);
  1543 %}
  1545 instruct sqrtF_imm(regF dst, immF con) %{
  1546   predicate(UseSSE>=1);
  1547   match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
  1548   format %{ "sqrtss  $dst, [$constantaddress]\t# load from constant table: float=$con" %}
  1549   ins_cost(150);
  1550   ins_encode %{
  1551     __ sqrtss($dst$$XMMRegister, $constantaddress($con));
  1552   %}
  1553   ins_pipe(pipe_slow);
  1554 %}
  1556 instruct sqrtD_reg(regD dst, regD src) %{
  1557   predicate(UseSSE>=2);
  1558   match(Set dst (SqrtD src));
  1560   format %{ "sqrtsd  $dst, $src" %}
  1561   ins_cost(150);
  1562   ins_encode %{
  1563     __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
  1564   %}
  1565   ins_pipe(pipe_slow);
  1566 %}
  1568 instruct sqrtD_mem(regD dst, memory src) %{
  1569   predicate(UseSSE>=2);
  1570   match(Set dst (SqrtD (LoadD src)));
  1572   format %{ "sqrtsd  $dst, $src" %}
  1573   ins_cost(150);
  1574   ins_encode %{
  1575     __ sqrtsd($dst$$XMMRegister, $src$$Address);
  1576   %}
  1577   ins_pipe(pipe_slow);
  1578 %}
  1580 instruct sqrtD_imm(regD dst, immD con) %{
  1581   predicate(UseSSE>=2);
  1582   match(Set dst (SqrtD con));
  1583   format %{ "sqrtsd  $dst, [$constantaddress]\t# load from constant table: double=$con" %}
  1584   ins_cost(150);
  1585   ins_encode %{
  1586     __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
  1587   %}
  1588   ins_pipe(pipe_slow);
  1589 %}
  1592 // ====================VECTOR INSTRUCTIONS=====================================
  1594 // Load vectors (4 bytes long)
  1595 instruct loadV4(vecS dst, memory mem) %{
  1596   predicate(n->as_LoadVector()->memory_size() == 4);
  1597   match(Set dst (LoadVector mem));
  1598   ins_cost(125);
  1599   format %{ "movd    $dst,$mem\t! load vector (4 bytes)" %}
  1600   ins_encode %{
  1601     __ movdl($dst$$XMMRegister, $mem$$Address);
  1602   %}
  1603   ins_pipe( pipe_slow );
  1604 %}
  1606 // Load vectors (8 bytes long)
  1607 instruct loadV8(vecD dst, memory mem) %{
  1608   predicate(n->as_LoadVector()->memory_size() == 8);
  1609   match(Set dst (LoadVector mem));
  1610   ins_cost(125);
  1611   format %{ "movq    $dst,$mem\t! load vector (8 bytes)" %}
  1612   ins_encode %{
  1613     __ movq($dst$$XMMRegister, $mem$$Address);
  1614   %}
  1615   ins_pipe( pipe_slow );
  1616 %}
  1618 // Load vectors (16 bytes long)
  1619 instruct loadV16(vecX dst, memory mem) %{
  1620   predicate(n->as_LoadVector()->memory_size() == 16);
  1621   match(Set dst (LoadVector mem));
  1622   ins_cost(125);
  1623   format %{ "movdqu  $dst,$mem\t! load vector (16 bytes)" %}
  1624   ins_encode %{
  1625     __ movdqu($dst$$XMMRegister, $mem$$Address);
  1626   %}
  1627   ins_pipe( pipe_slow );
  1628 %}
  1630 // Load vectors (32 bytes long)
  1631 instruct loadV32(vecY dst, memory mem) %{
  1632   predicate(n->as_LoadVector()->memory_size() == 32);
  1633   match(Set dst (LoadVector mem));
  1634   ins_cost(125);
  1635   format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %}
  1636   ins_encode %{
  1637     __ vmovdqu($dst$$XMMRegister, $mem$$Address);
  1638   %}
  1639   ins_pipe( pipe_slow );
  1640 %}
  1642 // Store vectors
  1643 instruct storeV4(memory mem, vecS src) %{
  1644   predicate(n->as_StoreVector()->memory_size() == 4);
  1645   match(Set mem (StoreVector mem src));
  1646   ins_cost(145);
  1647   format %{ "movd    $mem,$src\t! store vector (4 bytes)" %}
  1648   ins_encode %{
  1649     __ movdl($mem$$Address, $src$$XMMRegister);
  1650   %}
  1651   ins_pipe( pipe_slow );
  1652 %}
  1654 instruct storeV8(memory mem, vecD src) %{
  1655   predicate(n->as_StoreVector()->memory_size() == 8);
  1656   match(Set mem (StoreVector mem src));
  1657   ins_cost(145);
  1658   format %{ "movq    $mem,$src\t! store vector (8 bytes)" %}
  1659   ins_encode %{
  1660     __ movq($mem$$Address, $src$$XMMRegister);
  1661   %}
  1662   ins_pipe( pipe_slow );
  1663 %}
  1665 instruct storeV16(memory mem, vecX src) %{
  1666   predicate(n->as_StoreVector()->memory_size() == 16);
  1667   match(Set mem (StoreVector mem src));
  1668   ins_cost(145);
  1669   format %{ "movdqu  $mem,$src\t! store vector (16 bytes)" %}
  1670   ins_encode %{
  1671     __ movdqu($mem$$Address, $src$$XMMRegister);
  1672   %}
  1673   ins_pipe( pipe_slow );
  1674 %}
  1676 instruct storeV32(memory mem, vecY src) %{
  1677   predicate(n->as_StoreVector()->memory_size() == 32);
  1678   match(Set mem (StoreVector mem src));
  1679   ins_cost(145);
  1680   format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %}
  1681   ins_encode %{
  1682     __ vmovdqu($mem$$Address, $src$$XMMRegister);
  1683   %}
  1684   ins_pipe( pipe_slow );
  1685 %}
  1687 // Replicate byte scalar to be vector
  1688 instruct Repl4B(vecS dst, rRegI src) %{
  1689   predicate(n->as_Vector()->length() == 4);
  1690   match(Set dst (ReplicateB src));
  1691   format %{ "movd    $dst,$src\n\t"
  1692             "punpcklbw $dst,$dst\n\t"
  1693             "pshuflw $dst,$dst,0x00\t! replicate4B" %}
  1694   ins_encode %{
  1695     __ movdl($dst$$XMMRegister, $src$$Register);
  1696     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
  1697     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  1698   %}
  1699   ins_pipe( pipe_slow );
  1700 %}
  1702 instruct Repl8B(vecD dst, rRegI src) %{
  1703   predicate(n->as_Vector()->length() == 8);
  1704   match(Set dst (ReplicateB src));
  1705   format %{ "movd    $dst,$src\n\t"
  1706             "punpcklbw $dst,$dst\n\t"
  1707             "pshuflw $dst,$dst,0x00\t! replicate8B" %}
  1708   ins_encode %{
  1709     __ movdl($dst$$XMMRegister, $src$$Register);
  1710     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
  1711     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  1712   %}
  1713   ins_pipe( pipe_slow );
  1714 %}
  1716 instruct Repl16B(vecX dst, rRegI src) %{
  1717   predicate(n->as_Vector()->length() == 16);
  1718   match(Set dst (ReplicateB src));
  1719   format %{ "movd    $dst,$src\n\t"
  1720             "punpcklbw $dst,$dst\n\t"
  1721             "pshuflw $dst,$dst,0x00\n\t"
  1722             "movlhps $dst,$dst\t! replicate16B" %}
  1723   ins_encode %{
  1724     __ movdl($dst$$XMMRegister, $src$$Register);
  1725     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
  1726     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  1727     __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
  1728   %}
  1729   ins_pipe( pipe_slow );
  1730 %}
  1732 instruct Repl32B(vecY dst, rRegI src) %{
  1733   predicate(n->as_Vector()->length() == 32);
  1734   match(Set dst (ReplicateB src));
  1735   format %{ "movd    $dst,$src\n\t"
  1736             "punpcklbw $dst,$dst\n\t"
  1737             "pshuflw $dst,$dst,0x00\n\t"
  1738             "movlhps $dst,$dst\n\t"
  1739             "vinsertf128h $dst,$dst,$dst\t! replicate32B" %}
  1740   ins_encode %{
  1741     __ movdl($dst$$XMMRegister, $src$$Register);
  1742     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
  1743     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  1744     __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
  1745     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  1746   %}
  1747   ins_pipe( pipe_slow );
  1748 %}
  1750 // Replicate byte scalar immediate to be vector by loading from const table.
  1751 instruct Repl4B_imm(vecS dst, immI con) %{
  1752   predicate(n->as_Vector()->length() == 4);
  1753   match(Set dst (ReplicateB con));
  1754   format %{ "movss   $dst,[$constantaddress]\t! replicate4B($con)" %}
  1755   ins_encode %{
  1756     __ movflt($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1)));
  1757   %}
  1758   ins_pipe( pipe_slow );
  1759 %}
  1761 instruct Repl8B_imm(vecD dst, immI con) %{
  1762   predicate(n->as_Vector()->length() == 8);
  1763   match(Set dst (ReplicateB con));
  1764   format %{ "movsd   $dst,[$constantaddress]\t! replicate8B($con)" %}
  1765   ins_encode %{
  1766     __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
  1767   %}
  1768   ins_pipe( pipe_slow );
  1769 %}
  1771 instruct Repl16B_imm(vecX dst, immI con) %{
  1772   predicate(n->as_Vector()->length() == 16);
  1773   match(Set dst (ReplicateB con));
  1774   format %{ "movsd   $dst,[$constantaddress]\t! replicate16B($con)\n\t"
  1775             "movlhps $dst,$dst" %}
  1776   ins_encode %{
  1777     __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
  1778     __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
  1779   %}
  1780   ins_pipe( pipe_slow );
  1781 %}
  1783 instruct Repl32B_imm(vecY dst, immI con) %{
  1784   predicate(n->as_Vector()->length() == 32);
  1785   match(Set dst (ReplicateB con));
  1786   format %{ "movsd   $dst,[$constantaddress]\t! lreplicate32B($con)\n\t"
  1787             "movlhps $dst,$dst\n\t"
  1788             "vinsertf128h $dst,$dst,$dst" %}
  1789   ins_encode %{
  1790     __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
  1791     __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
  1792     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  1793   %}
  1794   ins_pipe( pipe_slow );
  1795 %}
  1797 // Replicate byte scalar zero to be vector
  1798 instruct Repl4B_zero(vecS dst, immI0 zero) %{
  1799   predicate(n->as_Vector()->length() == 4);
  1800   match(Set dst (ReplicateB zero));
  1801   format %{ "pxor    $dst,$dst\t! replicate4B zero" %}
  1802   ins_encode %{
  1803     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  1804   %}
  1805   ins_pipe( fpu_reg_reg );
  1806 %}
  1808 instruct Repl8B_zero(vecD dst, immI0 zero) %{
  1809   predicate(n->as_Vector()->length() == 8);
  1810   match(Set dst (ReplicateB zero));
  1811   format %{ "pxor    $dst,$dst\t! replicate8B zero" %}
  1812   ins_encode %{
  1813     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  1814   %}
  1815   ins_pipe( fpu_reg_reg );
  1816 %}
  1818 instruct Repl16B_zero(vecX dst, immI0 zero) %{
  1819   predicate(n->as_Vector()->length() == 16);
  1820   match(Set dst (ReplicateB zero));
  1821   format %{ "pxor    $dst,$dst\t! replicate16B zero" %}
  1822   ins_encode %{
  1823     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  1824   %}
  1825   ins_pipe( fpu_reg_reg );
  1826 %}
  1828 instruct Repl32B_zero(vecY dst, immI0 zero) %{
  1829   predicate(n->as_Vector()->length() == 32);
  1830   match(Set dst (ReplicateB zero));
  1831   format %{ "vxorpd  $dst,$dst,$dst\t! replicate32B zero" %}
  1832   ins_encode %{
  1833     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
  1834     bool vector256 = true;
  1835     __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
  1836   %}
  1837   ins_pipe( fpu_reg_reg );
  1838 %}
  1840 // Replicate char/short (2 byte) scalar to be vector
  1841 instruct Repl2S(vecS dst, rRegI src) %{
  1842   predicate(n->as_Vector()->length() == 2);
  1843   match(Set dst (ReplicateS src));
  1844   format %{ "movd    $dst,$src\n\t"
  1845             "pshuflw $dst,$dst,0x00\t! replicate2S" %}
  1846   ins_encode %{
  1847     __ movdl($dst$$XMMRegister, $src$$Register);
  1848     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  1849   %}
  1850   ins_pipe( fpu_reg_reg );
  1851 %}
  1853 instruct Repl4S(vecD dst, rRegI src) %{
  1854   predicate(n->as_Vector()->length() == 4);
  1855   match(Set dst (ReplicateS src));
  1856   format %{ "movd    $dst,$src\n\t"
  1857             "pshuflw $dst,$dst,0x00\t! replicate4S" %}
  1858   ins_encode %{
  1859     __ movdl($dst$$XMMRegister, $src$$Register);
  1860     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  1861   %}
  1862   ins_pipe( fpu_reg_reg );
  1863 %}
  1865 instruct Repl8S(vecX dst, rRegI src) %{
  1866   predicate(n->as_Vector()->length() == 8);
  1867   match(Set dst (ReplicateS src));
  1868   format %{ "movd    $dst,$src\n\t"
  1869             "pshuflw $dst,$dst,0x00\n\t"
  1870             "movlhps $dst,$dst\t! replicate8S" %}
  1871   ins_encode %{
  1872     __ movdl($dst$$XMMRegister, $src$$Register);
  1873     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  1874     __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
  1875   %}
  1876   ins_pipe( pipe_slow );
  1877 %}
  1879 instruct Repl16S(vecY dst, rRegI src) %{
  1880   predicate(n->as_Vector()->length() == 16);
  1881   match(Set dst (ReplicateS src));
  1882   format %{ "movd    $dst,$src\n\t"
  1883             "pshuflw $dst,$dst,0x00\n\t"
  1884             "movlhps $dst,$dst\n\t"
  1885             "vinsertf128h $dst,$dst,$dst\t! replicate16S" %}
  1886   ins_encode %{
  1887     __ movdl($dst$$XMMRegister, $src$$Register);
  1888     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  1889     __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
  1890     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  1891   %}
  1892   ins_pipe( pipe_slow );
  1893 %}
  1895 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table.
  1896 instruct Repl2S_imm(vecS dst, immI con) %{
  1897   predicate(n->as_Vector()->length() == 2);
  1898   match(Set dst (ReplicateS con));
  1899   format %{ "movss   $dst,[$constantaddress]\t! replicate2S($con)" %}
  1900   ins_encode %{
  1901     __ movflt($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2)));
  1902   %}
  1903   ins_pipe( fpu_reg_reg );
  1904 %}
  1906 instruct Repl4S_imm(vecD dst, immI con) %{
  1907   predicate(n->as_Vector()->length() == 4);
  1908   match(Set dst (ReplicateS con));
  1909   format %{ "movsd   $dst,[$constantaddress]\t! replicate4S($con)" %}
  1910   ins_encode %{
  1911     __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
  1912   %}
  1913   ins_pipe( fpu_reg_reg );
  1914 %}
  1916 instruct Repl8S_imm(vecX dst, immI con) %{
  1917   predicate(n->as_Vector()->length() == 8);
  1918   match(Set dst (ReplicateS con));
  1919   format %{ "movsd   $dst,[$constantaddress]\t! replicate8S($con)\n\t"
  1920             "movlhps $dst,$dst" %}
  1921   ins_encode %{
  1922     __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
  1923     __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
  1924   %}
  1925   ins_pipe( pipe_slow );
  1926 %}
  1928 instruct Repl16S_imm(vecY dst, immI con) %{
  1929   predicate(n->as_Vector()->length() == 16);
  1930   match(Set dst (ReplicateS con));
  1931   format %{ "movsd   $dst,[$constantaddress]\t! replicate16S($con)\n\t"
  1932             "movlhps $dst,$dst\n\t"
  1933             "vinsertf128h $dst,$dst,$dst" %}
  1934   ins_encode %{
  1935     __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
  1936     __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
  1937     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  1938   %}
  1939   ins_pipe( pipe_slow );
  1940 %}
  1942 // Replicate char/short (2 byte) scalar zero to be vector
  1943 instruct Repl2S_zero(vecS dst, immI0 zero) %{
  1944   predicate(n->as_Vector()->length() == 2);
  1945   match(Set dst (ReplicateS zero));
  1946   format %{ "pxor    $dst,$dst\t! replicate2S zero" %}
  1947   ins_encode %{
  1948     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  1949   %}
  1950   ins_pipe( fpu_reg_reg );
  1951 %}
  1953 instruct Repl4S_zero(vecD dst, immI0 zero) %{
  1954   predicate(n->as_Vector()->length() == 4);
  1955   match(Set dst (ReplicateS zero));
  1956   format %{ "pxor    $dst,$dst\t! replicate4S zero" %}
  1957   ins_encode %{
  1958     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  1959   %}
  1960   ins_pipe( fpu_reg_reg );
  1961 %}
  1963 instruct Repl8S_zero(vecX dst, immI0 zero) %{
  1964   predicate(n->as_Vector()->length() == 8);
  1965   match(Set dst (ReplicateS zero));
  1966   format %{ "pxor    $dst,$dst\t! replicate8S zero" %}
  1967   ins_encode %{
  1968     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  1969   %}
  1970   ins_pipe( fpu_reg_reg );
  1971 %}
  1973 instruct Repl16S_zero(vecY dst, immI0 zero) %{
  1974   predicate(n->as_Vector()->length() == 16);
  1975   match(Set dst (ReplicateS zero));
  1976   format %{ "vxorpd  $dst,$dst,$dst\t! replicate16S zero" %}
  1977   ins_encode %{
  1978     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
  1979     bool vector256 = true;
  1980     __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
  1981   %}
  1982   ins_pipe( fpu_reg_reg );
  1983 %}
  1985 // Replicate integer (4 byte) scalar to be vector
  1986 instruct Repl2I(vecD dst, rRegI src) %{
  1987   predicate(n->as_Vector()->length() == 2);
  1988   match(Set dst (ReplicateI src));
  1989   format %{ "movd    $dst,$src\n\t"
  1990             "pshufd  $dst,$dst,0x00\t! replicate2I" %}
  1991   ins_encode %{
  1992     __ movdl($dst$$XMMRegister, $src$$Register);
  1993     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  1994   %}
  1995   ins_pipe( fpu_reg_reg );
  1996 %}
  1998 instruct Repl4I(vecX dst, rRegI src) %{
  1999   predicate(n->as_Vector()->length() == 4);
  2000   match(Set dst (ReplicateI src));
  2001   format %{ "movd    $dst,$src\n\t"
  2002             "pshufd  $dst,$dst,0x00\t! replicate4I" %}
  2003   ins_encode %{
  2004     __ movdl($dst$$XMMRegister, $src$$Register);
  2005     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  2006   %}
  2007   ins_pipe( pipe_slow );
  2008 %}
  2010 instruct Repl8I(vecY dst, rRegI src) %{
  2011   predicate(n->as_Vector()->length() == 8);
  2012   match(Set dst (ReplicateI src));
  2013   format %{ "movd    $dst,$src\n\t"
  2014             "pshufd  $dst,$dst,0x00\n\t"
  2015             "vinsertf128h $dst,$dst,$dst\t! replicate8I" %}
  2016   ins_encode %{
  2017     __ movdl($dst$$XMMRegister, $src$$Register);
  2018     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  2019     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2020   %}
  2021   ins_pipe( pipe_slow );
  2022 %}
  2024 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
  2025 instruct Repl2I_imm(vecD dst, immI con) %{
  2026   predicate(n->as_Vector()->length() == 2);
  2027   match(Set dst (ReplicateI con));
  2028   format %{ "movsd   $dst,[$constantaddress]\t! replicate2I($con)" %}
  2029   ins_encode %{
  2030     __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
  2031   %}
  2032   ins_pipe( fpu_reg_reg );
  2033 %}
  2035 instruct Repl4I_imm(vecX dst, immI con) %{
  2036   predicate(n->as_Vector()->length() == 4);
  2037   match(Set dst (ReplicateI con));
  2038   format %{ "movsd   $dst,[$constantaddress]\t! replicate4I($con)\n\t"
  2039             "movlhps $dst,$dst" %}
  2040   ins_encode %{
  2041     __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
  2042     __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
  2043   %}
  2044   ins_pipe( pipe_slow );
  2045 %}
  2047 instruct Repl8I_imm(vecY dst, immI con) %{
  2048   predicate(n->as_Vector()->length() == 8);
  2049   match(Set dst (ReplicateI con));
  2050   format %{ "movsd   $dst,[$constantaddress]\t! replicate8I($con)\n\t"
  2051             "movlhps $dst,$dst\n\t"
  2052             "vinsertf128h $dst,$dst,$dst" %}
  2053   ins_encode %{
  2054     __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
  2055     __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
  2056     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2057   %}
  2058   ins_pipe( pipe_slow );
  2059 %}
  2061 // Integer could be loaded into xmm register directly from memory.
  2062 instruct Repl2I_mem(vecD dst, memory mem) %{
  2063   predicate(n->as_Vector()->length() == 2);
  2064   match(Set dst (ReplicateI mem));
  2065   format %{ "movd    $dst,$mem\n\t"
  2066             "pshufd  $dst,$dst,0x00\t! replicate2I" %}
  2067   ins_encode %{
  2068     __ movdl($dst$$XMMRegister, $mem$$Address);
  2069     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  2070   %}
  2071   ins_pipe( fpu_reg_reg );
  2072 %}
  2074 instruct Repl4I_mem(vecX dst, memory mem) %{
  2075   predicate(n->as_Vector()->length() == 4);
  2076   match(Set dst (ReplicateI mem));
  2077   format %{ "movd    $dst,$mem\n\t"
  2078             "pshufd  $dst,$dst,0x00\t! replicate4I" %}
  2079   ins_encode %{
  2080     __ movdl($dst$$XMMRegister, $mem$$Address);
  2081     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  2082   %}
  2083   ins_pipe( pipe_slow );
  2084 %}
  2086 instruct Repl8I_mem(vecY dst, memory mem) %{
  2087   predicate(n->as_Vector()->length() == 8);
  2088   match(Set dst (ReplicateI mem));
  2089   format %{ "movd    $dst,$mem\n\t"
  2090             "pshufd  $dst,$dst,0x00\n\t"
  2091             "vinsertf128h $dst,$dst,$dst\t! replicate8I" %}
  2092   ins_encode %{
  2093     __ movdl($dst$$XMMRegister, $mem$$Address);
  2094     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  2095     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2096   %}
  2097   ins_pipe( pipe_slow );
  2098 %}
  2100 // Replicate integer (4 byte) scalar zero to be vector
  2101 instruct Repl2I_zero(vecD dst, immI0 zero) %{
  2102   predicate(n->as_Vector()->length() == 2);
  2103   match(Set dst (ReplicateI zero));
  2104   format %{ "pxor    $dst,$dst\t! replicate2I" %}
  2105   ins_encode %{
  2106     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  2107   %}
  2108   ins_pipe( fpu_reg_reg );
  2109 %}
  2111 instruct Repl4I_zero(vecX dst, immI0 zero) %{
  2112   predicate(n->as_Vector()->length() == 4);
  2113   match(Set dst (ReplicateI zero));
  2114   format %{ "pxor    $dst,$dst\t! replicate4I zero)" %}
  2115   ins_encode %{
  2116     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  2117   %}
  2118   ins_pipe( fpu_reg_reg );
  2119 %}
  2121 instruct Repl8I_zero(vecY dst, immI0 zero) %{
  2122   predicate(n->as_Vector()->length() == 8);
  2123   match(Set dst (ReplicateI zero));
  2124   format %{ "vxorpd  $dst,$dst,$dst\t! replicate8I zero" %}
  2125   ins_encode %{
  2126     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
  2127     bool vector256 = true;
  2128     __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
  2129   %}
  2130   ins_pipe( fpu_reg_reg );
  2131 %}
  2133 // Replicate long (8 byte) scalar to be vector
  2134 #ifdef _LP64
  2135 instruct Repl2L(vecX dst, rRegL src) %{
  2136   predicate(n->as_Vector()->length() == 2);
  2137   match(Set dst (ReplicateL src));
  2138   format %{ "movdq   $dst,$src\n\t"
  2139             "movlhps $dst,$dst\t! replicate2L" %}
  2140   ins_encode %{
  2141     __ movdq($dst$$XMMRegister, $src$$Register);
  2142     __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
  2143   %}
  2144   ins_pipe( pipe_slow );
  2145 %}
  2147 instruct Repl4L(vecY dst, rRegL src) %{
  2148   predicate(n->as_Vector()->length() == 4);
  2149   match(Set dst (ReplicateL src));
  2150   format %{ "movdq   $dst,$src\n\t"
  2151             "movlhps $dst,$dst\n\t"
  2152             "vinsertf128h $dst,$dst,$dst\t! replicate4L" %}
  2153   ins_encode %{
  2154     __ movdq($dst$$XMMRegister, $src$$Register);
  2155     __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
  2156     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2157   %}
  2158   ins_pipe( pipe_slow );
  2159 %}
  2160 #else // _LP64
  2161 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{
  2162   predicate(n->as_Vector()->length() == 2);
  2163   match(Set dst (ReplicateL src));
  2164   effect(TEMP dst, USE src, TEMP tmp);
  2165   format %{ "movdl   $dst,$src.lo\n\t"
  2166             "movdl   $tmp,$src.hi\n\t"
  2167             "punpckldq $dst,$tmp\n\t"
  2168             "movlhps $dst,$dst\t! replicate2L"%}
  2169   ins_encode %{
  2170     __ movdl($dst$$XMMRegister, $src$$Register);
  2171     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
  2172     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
  2173     __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
  2174   %}
  2175   ins_pipe( pipe_slow );
  2176 %}
  2178 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
  2179   predicate(n->as_Vector()->length() == 4);
  2180   match(Set dst (ReplicateL src));
  2181   effect(TEMP dst, USE src, TEMP tmp);
  2182   format %{ "movdl   $dst,$src.lo\n\t"
  2183             "movdl   $tmp,$src.hi\n\t"
  2184             "punpckldq $dst,$tmp\n\t"
  2185             "movlhps $dst,$dst\n\t"
  2186             "vinsertf128h $dst,$dst,$dst\t! replicate4L" %}
  2187   ins_encode %{
  2188     __ movdl($dst$$XMMRegister, $src$$Register);
  2189     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
  2190     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
  2191     __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
  2192     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2193   %}
  2194   ins_pipe( pipe_slow );
  2195 %}
  2196 #endif // _LP64
  2198 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
  2199 instruct Repl2L_imm(vecX dst, immL con) %{
  2200   predicate(n->as_Vector()->length() == 2);
  2201   match(Set dst (ReplicateL con));
  2202   format %{ "movsd   $dst,[$constantaddress]\t! replicate2L($con)\n\t"
  2203             "movlhps $dst,$dst" %}
  2204   ins_encode %{
  2205     __ movdbl($dst$$XMMRegister, $constantaddress($con));
  2206     __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
  2207   %}
  2208   ins_pipe( pipe_slow );
  2209 %}
  2211 instruct Repl4L_imm(vecY dst, immL con) %{
  2212   predicate(n->as_Vector()->length() == 4);
  2213   match(Set dst (ReplicateL con));
  2214   format %{ "movsd   $dst,[$constantaddress]\t! replicate4L($con)\n\t"
  2215             "movlhps $dst,$dst\n\t"
  2216             "vinsertf128h $dst,$dst,$dst" %}
  2217   ins_encode %{
  2218     __ movdbl($dst$$XMMRegister, $constantaddress($con));
  2219     __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
  2220     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2221   %}
  2222   ins_pipe( pipe_slow );
  2223 %}
  2225 // Long could be loaded into xmm register directly from memory.
  2226 instruct Repl2L_mem(vecX dst, memory mem) %{
  2227   predicate(n->as_Vector()->length() == 2);
  2228   match(Set dst (ReplicateL mem));
  2229   format %{ "movq    $dst,$mem\n\t"
  2230             "movlhps $dst,$dst\t! replicate2L" %}
  2231   ins_encode %{
  2232     __ movq($dst$$XMMRegister, $mem$$Address);
  2233     __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
  2234   %}
  2235   ins_pipe( pipe_slow );
  2236 %}
  2238 instruct Repl4L_mem(vecY dst, memory mem) %{
  2239   predicate(n->as_Vector()->length() == 4);
  2240   match(Set dst (ReplicateL mem));
  2241   format %{ "movq    $dst,$mem\n\t"
  2242             "movlhps $dst,$dst\n\t"
  2243             "vinsertf128h $dst,$dst,$dst\t! replicate4L" %}
  2244   ins_encode %{
  2245     __ movq($dst$$XMMRegister, $mem$$Address);
  2246     __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
  2247     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2248   %}
  2249   ins_pipe( pipe_slow );
  2250 %}
  2252 // Replicate long (8 byte) scalar zero to be vector
  2253 instruct Repl2L_zero(vecX dst, immL0 zero) %{
  2254   predicate(n->as_Vector()->length() == 2);
  2255   match(Set dst (ReplicateL zero));
  2256   format %{ "pxor    $dst,$dst\t! replicate2L zero" %}
  2257   ins_encode %{
  2258     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  2259   %}
  2260   ins_pipe( fpu_reg_reg );
  2261 %}
  2263 instruct Repl4L_zero(vecY dst, immL0 zero) %{
  2264   predicate(n->as_Vector()->length() == 4);
  2265   match(Set dst (ReplicateL zero));
  2266   format %{ "vxorpd  $dst,$dst,$dst\t! replicate4L zero" %}
  2267   ins_encode %{
  2268     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
  2269     bool vector256 = true;
  2270     __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
  2271   %}
  2272   ins_pipe( fpu_reg_reg );
  2273 %}
  2275 // Replicate float (4 byte) scalar to be vector
  2276 instruct Repl2F(vecD dst, regF src) %{
  2277   predicate(n->as_Vector()->length() == 2);
  2278   match(Set dst (ReplicateF src));
  2279   format %{ "pshufd  $dst,$dst,0x00\t! replicate2F" %}
  2280   ins_encode %{
  2281     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
  2282   %}
  2283   ins_pipe( fpu_reg_reg );
  2284 %}
  2286 instruct Repl4F(vecX dst, regF src) %{
  2287   predicate(n->as_Vector()->length() == 4);
  2288   match(Set dst (ReplicateF src));
  2289   format %{ "pshufd  $dst,$dst,0x00\t! replicate4F" %}
  2290   ins_encode %{
  2291     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
  2292   %}
  2293   ins_pipe( pipe_slow );
  2294 %}
  2296 instruct Repl8F(vecY dst, regF src) %{
  2297   predicate(n->as_Vector()->length() == 8);
  2298   match(Set dst (ReplicateF src));
  2299   format %{ "pshufd  $dst,$src,0x00\n\t"
  2300             "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
  2301   ins_encode %{
  2302     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
  2303     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2304   %}
  2305   ins_pipe( pipe_slow );
  2306 %}
  2308 // Replicate float (4 byte) scalar zero to be vector
  2309 instruct Repl2F_zero(vecD dst, immF0 zero) %{
  2310   predicate(n->as_Vector()->length() == 2);
  2311   match(Set dst (ReplicateF zero));
  2312   format %{ "xorps   $dst,$dst\t! replicate2F zero" %}
  2313   ins_encode %{
  2314     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
  2315   %}
  2316   ins_pipe( fpu_reg_reg );
  2317 %}
  2319 instruct Repl4F_zero(vecX dst, immF0 zero) %{
  2320   predicate(n->as_Vector()->length() == 4);
  2321   match(Set dst (ReplicateF zero));
  2322   format %{ "xorps   $dst,$dst\t! replicate4F zero" %}
  2323   ins_encode %{
  2324     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
  2325   %}
  2326   ins_pipe( fpu_reg_reg );
  2327 %}
  2329 instruct Repl8F_zero(vecY dst, immF0 zero) %{
  2330   predicate(n->as_Vector()->length() == 8);
  2331   match(Set dst (ReplicateF zero));
  2332   format %{ "vxorps  $dst,$dst,$dst\t! replicate8F zero" %}
  2333   ins_encode %{
  2334     bool vector256 = true;
  2335     __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
  2336   %}
  2337   ins_pipe( fpu_reg_reg );
  2338 %}
  2340 // Replicate double (8 bytes) scalar to be vector
  2341 instruct Repl2D(vecX dst, regD src) %{
  2342   predicate(n->as_Vector()->length() == 2);
  2343   match(Set dst (ReplicateD src));
  2344   format %{ "pshufd  $dst,$src,0x44\t! replicate2D" %}
  2345   ins_encode %{
  2346     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
  2347   %}
  2348   ins_pipe( pipe_slow );
  2349 %}
  2351 instruct Repl4D(vecY dst, regD src) %{
  2352   predicate(n->as_Vector()->length() == 4);
  2353   match(Set dst (ReplicateD src));
  2354   format %{ "pshufd  $dst,$src,0x44\n\t"
  2355             "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
  2356   ins_encode %{
  2357     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
  2358     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2359   %}
  2360   ins_pipe( pipe_slow );
  2361 %}
  2363 // Replicate double (8 byte) scalar zero to be vector
  2364 instruct Repl2D_zero(vecX dst, immD0 zero) %{
  2365   predicate(n->as_Vector()->length() == 2);
  2366   match(Set dst (ReplicateD zero));
  2367   format %{ "xorpd   $dst,$dst\t! replicate2D zero" %}
  2368   ins_encode %{
  2369     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
  2370   %}
  2371   ins_pipe( fpu_reg_reg );
  2372 %}
  2374 instruct Repl4D_zero(vecY dst, immD0 zero) %{
  2375   predicate(n->as_Vector()->length() == 4);
  2376   match(Set dst (ReplicateD zero));
  2377   format %{ "vxorpd  $dst,$dst,$dst,vect256\t! replicate4D zero" %}
  2378   ins_encode %{
  2379     bool vector256 = true;
  2380     __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
  2381   %}
  2382   ins_pipe( fpu_reg_reg );
  2383 %}

mercurial