src/cpu/x86/vm/x86.ad

Fri, 25 Jan 2013 10:04:08 -0500

author
zgu
date
Fri, 25 Jan 2013 10:04:08 -0500
changeset 4492
8b46b0196eb0
parent 4204
b2c669fd8114
child 6312
04d32e7fad07
permissions
-rw-r--r--

8000692: Remove old KERNEL code
Summary: Removed depreciated kernel VM source code from hotspot VM
Reviewed-by: dholmes, acorn

     1 //
     2 // Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
     3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4 //
     5 // This code is free software; you can redistribute it and/or modify it
     6 // under the terms of the GNU General Public License version 2 only, as
     7 // published by the Free Software Foundation.
     8 //
     9 // This code is distributed in the hope that it will be useful, but WITHOUT
    10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12 // version 2 for more details (a copy is included in the LICENSE file that
    13 // accompanied this code).
    14 //
    15 // You should have received a copy of the GNU General Public License version
    16 // 2 along with this work; if not, write to the Free Software Foundation,
    17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18 //
    19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    20 // or visit www.oracle.com if you need additional information or have any
    21 // questions.
    22 //
    23 //
    25 // X86 Common Architecture Description File
    27 //----------REGISTER DEFINITION BLOCK------------------------------------------
    28 // This information is used by the matcher and the register allocator to
    29 // describe individual registers and classes of registers within the target
    30 // archtecture.
    32 register %{
    33 //----------Architecture Description Register Definitions----------------------
    34 // General Registers
    35 // "reg_def"  name ( register save type, C convention save type,
    36 //                   ideal register type, encoding );
    37 // Register Save Types:
    38 //
    39 // NS  = No-Save:       The register allocator assumes that these registers
    40 //                      can be used without saving upon entry to the method, &
    41 //                      that they do not need to be saved at call sites.
    42 //
    43 // SOC = Save-On-Call:  The register allocator assumes that these registers
    44 //                      can be used without saving upon entry to the method,
    45 //                      but that they must be saved at call sites.
    46 //
    47 // SOE = Save-On-Entry: The register allocator assumes that these registers
    48 //                      must be saved before using them upon entry to the
    49 //                      method, but they do not need to be saved at call
    50 //                      sites.
    51 //
    52 // AS  = Always-Save:   The register allocator assumes that these registers
    53 //                      must be saved before using them upon entry to the
    54 //                      method, & that they must be saved at call sites.
    55 //
    56 // Ideal Register Type is used to determine how to save & restore a
    57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
    58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
    59 //
    60 // The encoding number is the actual bit-pattern placed into the opcodes.
    62 // XMM registers.  256-bit registers or 8 words each, labeled (a)-h.
    63 // Word a in each register holds a Float, words ab hold a Double.
    64 // The whole registers are used in SSE4.2 version intrinsics,
    65 // array copy stubs and superword operations (see UseSSE42Intrinsics,
    66 // UseXMMForArrayCopy and UseSuperword flags).
    67 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX).
    68 // Linux ABI:   No register preserved across function calls
    69 //              XMM0-XMM7 might hold parameters
    70 // Windows ABI: XMM6-XMM15 preserved across function calls
    71 //              XMM0-XMM3 might hold parameters
    73 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
    74 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
    75 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
    76 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
    77 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
    78 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
    79 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
    80 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
    82 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
    83 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
    84 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
    85 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
    86 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
    87 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
    88 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
    89 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
    91 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
    92 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
    93 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
    94 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
    95 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
    96 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
    97 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
    98 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
   100 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
   101 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
   102 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
   103 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
   104 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
   105 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
   106 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
   107 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
   109 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
   110 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
   111 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
   112 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
   113 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
   114 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
   115 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
   116 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
   118 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
   119 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
   120 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
   121 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
   122 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
   123 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
   124 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
   125 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
   127 #ifdef _WIN64
   129 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg());
   130 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1));
   131 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2));
   132 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3));
   133 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4));
   134 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5));
   135 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6));
   136 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7));
   138 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg());
   139 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1));
   140 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2));
   141 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3));
   142 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4));
   143 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5));
   144 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6));
   145 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7));
   147 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg());
   148 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1));
   149 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2));
   150 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3));
   151 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4));
   152 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5));
   153 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6));
   154 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7));
   156 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg());
   157 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1));
   158 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2));
   159 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3));
   160 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4));
   161 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5));
   162 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6));
   163 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7));
   165 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
   166 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1));
   167 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2));
   168 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3));
   169 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4));
   170 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5));
   171 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6));
   172 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7));
   174 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
   175 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1));
   176 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2));
   177 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3));
   178 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4));
   179 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5));
   180 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6));
   181 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7));
   183 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
   184 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1));
   185 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2));
   186 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3));
   187 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4));
   188 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5));
   189 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6));
   190 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7));
   192 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
   193 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1));
   194 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2));
   195 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3));
   196 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4));
   197 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5));
   198 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6));
   199 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7));
   201 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
   202 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1));
   203 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2));
   204 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3));
   205 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4));
   206 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5));
   207 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6));
   208 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7));
   210 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
   211 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1));
   212 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2));
   213 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3));
   214 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4));
   215 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5));
   216 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6));
   217 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7));
   219 #else // _WIN64
   221 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
   222 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
   223 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
   224 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
   225 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
   226 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
   227 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
   228 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
   230 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
   231 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
   232 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
   233 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
   234 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
   235 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
   236 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
   237 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
   239 #ifdef _LP64
   241 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
   242 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
   243 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
   244 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
   245 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
   246 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
   247 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
   248 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
   250 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
   251 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
   252 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
   253 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
   254 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
   255 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
   256 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
   257 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
   259 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
   260 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
   261 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
   262 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
   263 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
   264 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
   265 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
   266 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
   268 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
   269 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
   270 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
   271 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
   272 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
   273 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
   274 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
   275 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
   277 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
   278 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
   279 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
   280 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
   281 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
   282 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
   283 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
   284 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
   286 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
   287 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
   288 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
   289 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
   290 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
   291 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
   292 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
   293 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
   295 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
   296 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
   297 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
   298 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
   299 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
   300 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
   301 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
   302 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
   304 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
   305 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
   306 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
   307 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
   308 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
   309 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
   310 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
   311 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
   313 #endif // _LP64
   315 #endif // _WIN64
   317 #ifdef _LP64
   318 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
   319 #else
   320 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
   321 #endif // _LP64
   323 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
   324                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
   325                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
   326                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
   327                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
   328                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
   329                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
   330                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h
   331 #ifdef _LP64
   332                   ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
   333                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
   334                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
   335                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
   336                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
   337                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
   338                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
   339                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
   340 #endif
   341                    );
   343 // flags allocation class should be last.
   344 alloc_class chunk2(RFLAGS);
   346 // Singleton class for condition codes
   347 reg_class int_flags(RFLAGS);
   349 // Class for all float registers
   350 reg_class float_reg(XMM0,
   351                     XMM1,
   352                     XMM2,
   353                     XMM3,
   354                     XMM4,
   355                     XMM5,
   356                     XMM6,
   357                     XMM7
   358 #ifdef _LP64
   359                    ,XMM8,
   360                     XMM9,
   361                     XMM10,
   362                     XMM11,
   363                     XMM12,
   364                     XMM13,
   365                     XMM14,
   366                     XMM15
   367 #endif
   368                     );
   370 // Class for all double registers
   371 reg_class double_reg(XMM0,  XMM0b,
   372                      XMM1,  XMM1b,
   373                      XMM2,  XMM2b,
   374                      XMM3,  XMM3b,
   375                      XMM4,  XMM4b,
   376                      XMM5,  XMM5b,
   377                      XMM6,  XMM6b,
   378                      XMM7,  XMM7b
   379 #ifdef _LP64
   380                     ,XMM8,  XMM8b,
   381                      XMM9,  XMM9b,
   382                      XMM10, XMM10b,
   383                      XMM11, XMM11b,
   384                      XMM12, XMM12b,
   385                      XMM13, XMM13b,
   386                      XMM14, XMM14b,
   387                      XMM15, XMM15b
   388 #endif
   389                      );
   391 // Class for all 32bit vector registers
   392 reg_class vectors_reg(XMM0,
   393                       XMM1,
   394                       XMM2,
   395                       XMM3,
   396                       XMM4,
   397                       XMM5,
   398                       XMM6,
   399                       XMM7
   400 #ifdef _LP64
   401                      ,XMM8,
   402                       XMM9,
   403                       XMM10,
   404                       XMM11,
   405                       XMM12,
   406                       XMM13,
   407                       XMM14,
   408                       XMM15
   409 #endif
   410                       );
   412 // Class for all 64bit vector registers
   413 reg_class vectord_reg(XMM0,  XMM0b,
   414                       XMM1,  XMM1b,
   415                       XMM2,  XMM2b,
   416                       XMM3,  XMM3b,
   417                       XMM4,  XMM4b,
   418                       XMM5,  XMM5b,
   419                       XMM6,  XMM6b,
   420                       XMM7,  XMM7b
   421 #ifdef _LP64
   422                      ,XMM8,  XMM8b,
   423                       XMM9,  XMM9b,
   424                       XMM10, XMM10b,
   425                       XMM11, XMM11b,
   426                       XMM12, XMM12b,
   427                       XMM13, XMM13b,
   428                       XMM14, XMM14b,
   429                       XMM15, XMM15b
   430 #endif
   431                       );
   433 // Class for all 128bit vector registers
   434 reg_class vectorx_reg(XMM0,  XMM0b,  XMM0c,  XMM0d,
   435                       XMM1,  XMM1b,  XMM1c,  XMM1d,
   436                       XMM2,  XMM2b,  XMM2c,  XMM2d,
   437                       XMM3,  XMM3b,  XMM3c,  XMM3d,
   438                       XMM4,  XMM4b,  XMM4c,  XMM4d,
   439                       XMM5,  XMM5b,  XMM5c,  XMM5d,
   440                       XMM6,  XMM6b,  XMM6c,  XMM6d,
   441                       XMM7,  XMM7b,  XMM7c,  XMM7d
   442 #ifdef _LP64
   443                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,
   444                       XMM9,  XMM9b,  XMM9c,  XMM9d,
   445                       XMM10, XMM10b, XMM10c, XMM10d,
   446                       XMM11, XMM11b, XMM11c, XMM11d,
   447                       XMM12, XMM12b, XMM12c, XMM12d,
   448                       XMM13, XMM13b, XMM13c, XMM13d,
   449                       XMM14, XMM14b, XMM14c, XMM14d,
   450                       XMM15, XMM15b, XMM15c, XMM15d
   451 #endif
   452                       );
   454 // Class for all 256bit vector registers
   455 reg_class vectory_reg(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
   456                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
   457                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
   458                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
   459                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
   460                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
   461                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
   462                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h
   463 #ifdef _LP64
   464                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
   465                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
   466                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
   467                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
   468                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
   469                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
   470                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
   471                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
   472 #endif
   473                       );
   475 %}
   477 source %{
   478   // Float masks come from different places depending on platform.
   479 #ifdef _LP64
   480   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
   481   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
   482   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
   483   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
   484 #else
   485   static address float_signmask()  { return (address)float_signmask_pool; }
   486   static address float_signflip()  { return (address)float_signflip_pool; }
   487   static address double_signmask() { return (address)double_signmask_pool; }
   488   static address double_signflip() { return (address)double_signflip_pool; }
   489 #endif
   492 const bool Matcher::match_rule_supported(int opcode) {
   493   if (!has_match_rule(opcode))
   494     return false;
   496   switch (opcode) {
   497     case Op_PopCountI:
   498     case Op_PopCountL:
   499       if (!UsePopCountInstruction)
   500         return false;
   501     break;
   502     case Op_MulVI:
   503       if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
   504         return false;
   505     break;
   506     case Op_CompareAndSwapL:
   507 #ifdef _LP64
   508     case Op_CompareAndSwapP:
   509 #endif
   510       if (!VM_Version::supports_cx8())
   511         return false;
   512     break;
   513   }
   515   return true;  // Per default match rules are supported.
   516 }
   518 // Max vector size in bytes. 0 if not supported.
   519 const int Matcher::vector_width_in_bytes(BasicType bt) {
   520   assert(is_java_primitive(bt), "only primitive type vectors");
   521   if (UseSSE < 2) return 0;
   522   // SSE2 supports 128bit vectors for all types.
   523   // AVX2 supports 256bit vectors for all types.
   524   int size = (UseAVX > 1) ? 32 : 16;
   525   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
   526   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
   527     size = 32;
   528   // Use flag to limit vector size.
   529   size = MIN2(size,(int)MaxVectorSize);
   530   // Minimum 2 values in vector (or 4 for bytes).
   531   switch (bt) {
   532   case T_DOUBLE:
   533   case T_LONG:
   534     if (size < 16) return 0;
   535   case T_FLOAT:
   536   case T_INT:
   537     if (size < 8) return 0;
   538   case T_BOOLEAN:
   539   case T_BYTE:
   540   case T_CHAR:
   541   case T_SHORT:
   542     if (size < 4) return 0;
   543     break;
   544   default:
   545     ShouldNotReachHere();
   546   }
   547   return size;
   548 }
   550 // Limits on vector size (number of elements) loaded into vector.
   551 const int Matcher::max_vector_size(const BasicType bt) {
   552   return vector_width_in_bytes(bt)/type2aelembytes(bt);
   553 }
   554 const int Matcher::min_vector_size(const BasicType bt) {
   555   int max_size = max_vector_size(bt);
   556   // Min size which can be loaded into vector is 4 bytes.
   557   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
   558   return MIN2(size,max_size);
   559 }
   561 // Vector ideal reg corresponding to specidied size in bytes
   562 const int Matcher::vector_ideal_reg(int size) {
   563   assert(MaxVectorSize >= size, "");
   564   switch(size) {
   565     case  4: return Op_VecS;
   566     case  8: return Op_VecD;
   567     case 16: return Op_VecX;
   568     case 32: return Op_VecY;
   569   }
   570   ShouldNotReachHere();
   571   return 0;
   572 }
   574 // Only lowest bits of xmm reg are used for vector shift count.
   575 const int Matcher::vector_shift_count_ideal_reg(int size) {
   576   return Op_VecS;
   577 }
   579 // x86 supports misaligned vectors store/load.
   580 const bool Matcher::misaligned_vectors_ok() {
   581   return !AlignVector; // can be changed by flag
   582 }
   584 // Helper methods for MachSpillCopyNode::implementation().
   585 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
   586                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
   587   // In 64-bit VM size calculation is very complex. Emitting instructions
   588   // into scratch buffer is used to get size in 64-bit VM.
   589   LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
   590   assert(ireg == Op_VecS || // 32bit vector
   591          (src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
   592          (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
   593          "no non-adjacent vector moves" );
   594   if (cbuf) {
   595     MacroAssembler _masm(cbuf);
   596     int offset = __ offset();
   597     switch (ireg) {
   598     case Op_VecS: // copy whole register
   599     case Op_VecD:
   600     case Op_VecX:
   601       __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
   602       break;
   603     case Op_VecY:
   604       __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
   605       break;
   606     default:
   607       ShouldNotReachHere();
   608     }
   609     int size = __ offset() - offset;
   610 #ifdef ASSERT
   611     // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
   612     assert(!do_size || size == 4, "incorrect size calculattion");
   613 #endif
   614     return size;
   615 #ifndef PRODUCT
   616   } else if (!do_size) {
   617     switch (ireg) {
   618     case Op_VecS:
   619     case Op_VecD:
   620     case Op_VecX:
   621       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
   622       break;
   623     case Op_VecY:
   624       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
   625       break;
   626     default:
   627       ShouldNotReachHere();
   628     }
   629 #endif
   630   }
   631   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
   632   return 4;
   633 }
   635 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
   636                             int stack_offset, int reg, uint ireg, outputStream* st) {
   637   // In 64-bit VM size calculation is very complex. Emitting instructions
   638   // into scratch buffer is used to get size in 64-bit VM.
   639   LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
   640   if (cbuf) {
   641     MacroAssembler _masm(cbuf);
   642     int offset = __ offset();
   643     if (is_load) {
   644       switch (ireg) {
   645       case Op_VecS:
   646         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
   647         break;
   648       case Op_VecD:
   649         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
   650         break;
   651       case Op_VecX:
   652         __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
   653         break;
   654       case Op_VecY:
   655         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
   656         break;
   657       default:
   658         ShouldNotReachHere();
   659       }
   660     } else { // store
   661       switch (ireg) {
   662       case Op_VecS:
   663         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
   664         break;
   665       case Op_VecD:
   666         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
   667         break;
   668       case Op_VecX:
   669         __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
   670         break;
   671       case Op_VecY:
   672         __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
   673         break;
   674       default:
   675         ShouldNotReachHere();
   676       }
   677     }
   678     int size = __ offset() - offset;
   679 #ifdef ASSERT
   680     int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
   681     // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
   682     assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
   683 #endif
   684     return size;
   685 #ifndef PRODUCT
   686   } else if (!do_size) {
   687     if (is_load) {
   688       switch (ireg) {
   689       case Op_VecS:
   690         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
   691         break;
   692       case Op_VecD:
   693         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
   694         break;
   695        case Op_VecX:
   696         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
   697         break;
   698       case Op_VecY:
   699         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
   700         break;
   701       default:
   702         ShouldNotReachHere();
   703       }
   704     } else { // store
   705       switch (ireg) {
   706       case Op_VecS:
   707         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
   708         break;
   709       case Op_VecD:
   710         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
   711         break;
   712        case Op_VecX:
   713         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
   714         break;
   715       case Op_VecY:
   716         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
   717         break;
   718       default:
   719         ShouldNotReachHere();
   720       }
   721     }
   722 #endif
   723   }
   724   int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
   725   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
   726   return 5+offset_size;
   727 }
   729 static inline jfloat replicate4_imm(int con, int width) {
   730   // Load a constant of "width" (in bytes) and replicate it to fill 32bit.
   731   assert(width == 1 || width == 2, "only byte or short types here");
   732   int bit_width = width * 8;
   733   jint val = con;
   734   val &= (1 << bit_width) - 1;  // mask off sign bits
   735   while(bit_width < 32) {
   736     val |= (val << bit_width);
   737     bit_width <<= 1;
   738   }
   739   jfloat fval = *((jfloat*) &val);  // coerce to float type
   740   return fval;
   741 }
   743 static inline jdouble replicate8_imm(int con, int width) {
   744   // Load a constant of "width" (in bytes) and replicate it to fill 64bit.
   745   assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here");
   746   int bit_width = width * 8;
   747   jlong val = con;
   748   val &= (((jlong) 1) << bit_width) - 1;  // mask off sign bits
   749   while(bit_width < 64) {
   750     val |= (val << bit_width);
   751     bit_width <<= 1;
   752   }
   753   jdouble dval = *((jdouble*) &val);  // coerce to double type
   754   return dval;
   755 }
   757 #ifndef PRODUCT
   758   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
   759     st->print("nop \t# %d bytes pad for loops and calls", _count);
   760   }
   761 #endif
   763   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
   764     MacroAssembler _masm(&cbuf);
   765     __ nop(_count);
   766   }
   768   uint MachNopNode::size(PhaseRegAlloc*) const {
   769     return _count;
   770   }
   772 #ifndef PRODUCT
   773   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
   774     st->print("# breakpoint");
   775   }
   776 #endif
   778   void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
   779     MacroAssembler _masm(&cbuf);
   780     __ int3();
   781   }
   783   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
   784     return MachNode::size(ra_);
   785   }
   787 %}
   789 encode %{
   791   enc_class preserve_SP %{
   792     debug_only(int off0 = cbuf.insts_size());
   793     MacroAssembler _masm(&cbuf);
   794     // RBP is preserved across all calls, even compiled calls.
   795     // Use it to preserve RSP in places where the callee might change the SP.
   796     __ movptr(rbp_mh_SP_save, rsp);
   797     debug_only(int off1 = cbuf.insts_size());
   798     assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
   799   %}
   801   enc_class restore_SP %{
   802     MacroAssembler _masm(&cbuf);
   803     __ movptr(rsp, rbp_mh_SP_save);
   804   %}
   806   enc_class call_epilog %{
   807     if (VerifyStackAtCalls) {
   808       // Check that stack depth is unchanged: find majik cookie on stack
   809       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
   810       MacroAssembler _masm(&cbuf);
   811       Label L;
   812       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
   813       __ jccb(Assembler::equal, L);
   814       // Die if stack mismatch
   815       __ int3();
   816       __ bind(L);
   817     }
   818   %}
   820 %}
   823 //----------OPERANDS-----------------------------------------------------------
   824 // Operand definitions must precede instruction definitions for correct parsing
   825 // in the ADLC because operands constitute user defined types which are used in
   826 // instruction definitions.
   828 // Vectors
   829 operand vecS() %{
   830   constraint(ALLOC_IN_RC(vectors_reg));
   831   match(VecS);
   833   format %{ %}
   834   interface(REG_INTER);
   835 %}
   837 operand vecD() %{
   838   constraint(ALLOC_IN_RC(vectord_reg));
   839   match(VecD);
   841   format %{ %}
   842   interface(REG_INTER);
   843 %}
   845 operand vecX() %{
   846   constraint(ALLOC_IN_RC(vectorx_reg));
   847   match(VecX);
   849   format %{ %}
   850   interface(REG_INTER);
   851 %}
   853 operand vecY() %{
   854   constraint(ALLOC_IN_RC(vectory_reg));
   855   match(VecY);
   857   format %{ %}
   858   interface(REG_INTER);
   859 %}
   862 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
   864 // ============================================================================
   866 instruct ShouldNotReachHere() %{
   867   match(Halt);
   868   format %{ "int3\t# ShouldNotReachHere" %}
   869   ins_encode %{
   870     __ int3();
   871   %}
   872   ins_pipe(pipe_slow);
   873 %}
   875 // ============================================================================
   877 instruct addF_reg(regF dst, regF src) %{
   878   predicate((UseSSE>=1) && (UseAVX == 0));
   879   match(Set dst (AddF dst src));
   881   format %{ "addss   $dst, $src" %}
   882   ins_cost(150);
   883   ins_encode %{
   884     __ addss($dst$$XMMRegister, $src$$XMMRegister);
   885   %}
   886   ins_pipe(pipe_slow);
   887 %}
   889 instruct addF_mem(regF dst, memory src) %{
   890   predicate((UseSSE>=1) && (UseAVX == 0));
   891   match(Set dst (AddF dst (LoadF src)));
   893   format %{ "addss   $dst, $src" %}
   894   ins_cost(150);
   895   ins_encode %{
   896     __ addss($dst$$XMMRegister, $src$$Address);
   897   %}
   898   ins_pipe(pipe_slow);
   899 %}
   901 instruct addF_imm(regF dst, immF con) %{
   902   predicate((UseSSE>=1) && (UseAVX == 0));
   903   match(Set dst (AddF dst con));
   904   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
   905   ins_cost(150);
   906   ins_encode %{
   907     __ addss($dst$$XMMRegister, $constantaddress($con));
   908   %}
   909   ins_pipe(pipe_slow);
   910 %}
   912 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
   913   predicate(UseAVX > 0);
   914   match(Set dst (AddF src1 src2));
   916   format %{ "vaddss  $dst, $src1, $src2" %}
   917   ins_cost(150);
   918   ins_encode %{
   919     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
   920   %}
   921   ins_pipe(pipe_slow);
   922 %}
   924 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
   925   predicate(UseAVX > 0);
   926   match(Set dst (AddF src1 (LoadF src2)));
   928   format %{ "vaddss  $dst, $src1, $src2" %}
   929   ins_cost(150);
   930   ins_encode %{
   931     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
   932   %}
   933   ins_pipe(pipe_slow);
   934 %}
   936 instruct addF_reg_imm(regF dst, regF src, immF con) %{
   937   predicate(UseAVX > 0);
   938   match(Set dst (AddF src con));
   940   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
   941   ins_cost(150);
   942   ins_encode %{
   943     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
   944   %}
   945   ins_pipe(pipe_slow);
   946 %}
   948 instruct addD_reg(regD dst, regD src) %{
   949   predicate((UseSSE>=2) && (UseAVX == 0));
   950   match(Set dst (AddD dst src));
   952   format %{ "addsd   $dst, $src" %}
   953   ins_cost(150);
   954   ins_encode %{
   955     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
   956   %}
   957   ins_pipe(pipe_slow);
   958 %}
   960 instruct addD_mem(regD dst, memory src) %{
   961   predicate((UseSSE>=2) && (UseAVX == 0));
   962   match(Set dst (AddD dst (LoadD src)));
   964   format %{ "addsd   $dst, $src" %}
   965   ins_cost(150);
   966   ins_encode %{
   967     __ addsd($dst$$XMMRegister, $src$$Address);
   968   %}
   969   ins_pipe(pipe_slow);
   970 %}
   972 instruct addD_imm(regD dst, immD con) %{
   973   predicate((UseSSE>=2) && (UseAVX == 0));
   974   match(Set dst (AddD dst con));
   975   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
   976   ins_cost(150);
   977   ins_encode %{
   978     __ addsd($dst$$XMMRegister, $constantaddress($con));
   979   %}
   980   ins_pipe(pipe_slow);
   981 %}
   983 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
   984   predicate(UseAVX > 0);
   985   match(Set dst (AddD src1 src2));
   987   format %{ "vaddsd  $dst, $src1, $src2" %}
   988   ins_cost(150);
   989   ins_encode %{
   990     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
   991   %}
   992   ins_pipe(pipe_slow);
   993 %}
   995 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
   996   predicate(UseAVX > 0);
   997   match(Set dst (AddD src1 (LoadD src2)));
   999   format %{ "vaddsd  $dst, $src1, $src2" %}
  1000   ins_cost(150);
  1001   ins_encode %{
  1002     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
  1003   %}
  1004   ins_pipe(pipe_slow);
  1005 %}
  1007 instruct addD_reg_imm(regD dst, regD src, immD con) %{
  1008   predicate(UseAVX > 0);
  1009   match(Set dst (AddD src con));
  1011   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
  1012   ins_cost(150);
  1013   ins_encode %{
  1014     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
  1015   %}
  1016   ins_pipe(pipe_slow);
  1017 %}
  1019 instruct subF_reg(regF dst, regF src) %{
  1020   predicate((UseSSE>=1) && (UseAVX == 0));
  1021   match(Set dst (SubF dst src));
  1023   format %{ "subss   $dst, $src" %}
  1024   ins_cost(150);
  1025   ins_encode %{
  1026     __ subss($dst$$XMMRegister, $src$$XMMRegister);
  1027   %}
  1028   ins_pipe(pipe_slow);
  1029 %}
  1031 instruct subF_mem(regF dst, memory src) %{
  1032   predicate((UseSSE>=1) && (UseAVX == 0));
  1033   match(Set dst (SubF dst (LoadF src)));
  1035   format %{ "subss   $dst, $src" %}
  1036   ins_cost(150);
  1037   ins_encode %{
  1038     __ subss($dst$$XMMRegister, $src$$Address);
  1039   %}
  1040   ins_pipe(pipe_slow);
  1041 %}
  1043 instruct subF_imm(regF dst, immF con) %{
  1044   predicate((UseSSE>=1) && (UseAVX == 0));
  1045   match(Set dst (SubF dst con));
  1046   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
  1047   ins_cost(150);
  1048   ins_encode %{
  1049     __ subss($dst$$XMMRegister, $constantaddress($con));
  1050   %}
  1051   ins_pipe(pipe_slow);
  1052 %}
  1054 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
  1055   predicate(UseAVX > 0);
  1056   match(Set dst (SubF src1 src2));
  1058   format %{ "vsubss  $dst, $src1, $src2" %}
  1059   ins_cost(150);
  1060   ins_encode %{
  1061     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  1062   %}
  1063   ins_pipe(pipe_slow);
  1064 %}
  1066 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
  1067   predicate(UseAVX > 0);
  1068   match(Set dst (SubF src1 (LoadF src2)));
  1070   format %{ "vsubss  $dst, $src1, $src2" %}
  1071   ins_cost(150);
  1072   ins_encode %{
  1073     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
  1074   %}
  1075   ins_pipe(pipe_slow);
  1076 %}
  1078 instruct subF_reg_imm(regF dst, regF src, immF con) %{
  1079   predicate(UseAVX > 0);
  1080   match(Set dst (SubF src con));
  1082   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
  1083   ins_cost(150);
  1084   ins_encode %{
  1085     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
  1086   %}
  1087   ins_pipe(pipe_slow);
  1088 %}
  1090 instruct subD_reg(regD dst, regD src) %{
  1091   predicate((UseSSE>=2) && (UseAVX == 0));
  1092   match(Set dst (SubD dst src));
  1094   format %{ "subsd   $dst, $src" %}
  1095   ins_cost(150);
  1096   ins_encode %{
  1097     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
  1098   %}
  1099   ins_pipe(pipe_slow);
  1100 %}
  1102 instruct subD_mem(regD dst, memory src) %{
  1103   predicate((UseSSE>=2) && (UseAVX == 0));
  1104   match(Set dst (SubD dst (LoadD src)));
  1106   format %{ "subsd   $dst, $src" %}
  1107   ins_cost(150);
  1108   ins_encode %{
  1109     __ subsd($dst$$XMMRegister, $src$$Address);
  1110   %}
  1111   ins_pipe(pipe_slow);
  1112 %}
  1114 instruct subD_imm(regD dst, immD con) %{
  1115   predicate((UseSSE>=2) && (UseAVX == 0));
  1116   match(Set dst (SubD dst con));
  1117   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
  1118   ins_cost(150);
  1119   ins_encode %{
  1120     __ subsd($dst$$XMMRegister, $constantaddress($con));
  1121   %}
  1122   ins_pipe(pipe_slow);
  1123 %}
  1125 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
  1126   predicate(UseAVX > 0);
  1127   match(Set dst (SubD src1 src2));
  1129   format %{ "vsubsd  $dst, $src1, $src2" %}
  1130   ins_cost(150);
  1131   ins_encode %{
  1132     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  1133   %}
  1134   ins_pipe(pipe_slow);
  1135 %}
  1137 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
  1138   predicate(UseAVX > 0);
  1139   match(Set dst (SubD src1 (LoadD src2)));
  1141   format %{ "vsubsd  $dst, $src1, $src2" %}
  1142   ins_cost(150);
  1143   ins_encode %{
  1144     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
  1145   %}
  1146   ins_pipe(pipe_slow);
  1147 %}
  1149 instruct subD_reg_imm(regD dst, regD src, immD con) %{
  1150   predicate(UseAVX > 0);
  1151   match(Set dst (SubD src con));
  1153   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
  1154   ins_cost(150);
  1155   ins_encode %{
  1156     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
  1157   %}
  1158   ins_pipe(pipe_slow);
  1159 %}
  1161 instruct mulF_reg(regF dst, regF src) %{
  1162   predicate((UseSSE>=1) && (UseAVX == 0));
  1163   match(Set dst (MulF dst src));
  1165   format %{ "mulss   $dst, $src" %}
  1166   ins_cost(150);
  1167   ins_encode %{
  1168     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
  1169   %}
  1170   ins_pipe(pipe_slow);
  1171 %}
  1173 instruct mulF_mem(regF dst, memory src) %{
  1174   predicate((UseSSE>=1) && (UseAVX == 0));
  1175   match(Set dst (MulF dst (LoadF src)));
  1177   format %{ "mulss   $dst, $src" %}
  1178   ins_cost(150);
  1179   ins_encode %{
  1180     __ mulss($dst$$XMMRegister, $src$$Address);
  1181   %}
  1182   ins_pipe(pipe_slow);
  1183 %}
  1185 instruct mulF_imm(regF dst, immF con) %{
  1186   predicate((UseSSE>=1) && (UseAVX == 0));
  1187   match(Set dst (MulF dst con));
  1188   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
  1189   ins_cost(150);
  1190   ins_encode %{
  1191     __ mulss($dst$$XMMRegister, $constantaddress($con));
  1192   %}
  1193   ins_pipe(pipe_slow);
  1194 %}
  1196 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
  1197   predicate(UseAVX > 0);
  1198   match(Set dst (MulF src1 src2));
  1200   format %{ "vmulss  $dst, $src1, $src2" %}
  1201   ins_cost(150);
  1202   ins_encode %{
  1203     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  1204   %}
  1205   ins_pipe(pipe_slow);
  1206 %}
  1208 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
  1209   predicate(UseAVX > 0);
  1210   match(Set dst (MulF src1 (LoadF src2)));
  1212   format %{ "vmulss  $dst, $src1, $src2" %}
  1213   ins_cost(150);
  1214   ins_encode %{
  1215     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
  1216   %}
  1217   ins_pipe(pipe_slow);
  1218 %}
  1220 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
  1221   predicate(UseAVX > 0);
  1222   match(Set dst (MulF src con));
  1224   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
  1225   ins_cost(150);
  1226   ins_encode %{
  1227     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
  1228   %}
  1229   ins_pipe(pipe_slow);
  1230 %}
  1232 instruct mulD_reg(regD dst, regD src) %{
  1233   predicate((UseSSE>=2) && (UseAVX == 0));
  1234   match(Set dst (MulD dst src));
  1236   format %{ "mulsd   $dst, $src" %}
  1237   ins_cost(150);
  1238   ins_encode %{
  1239     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
  1240   %}
  1241   ins_pipe(pipe_slow);
  1242 %}
  1244 instruct mulD_mem(regD dst, memory src) %{
  1245   predicate((UseSSE>=2) && (UseAVX == 0));
  1246   match(Set dst (MulD dst (LoadD src)));
  1248   format %{ "mulsd   $dst, $src" %}
  1249   ins_cost(150);
  1250   ins_encode %{
  1251     __ mulsd($dst$$XMMRegister, $src$$Address);
  1252   %}
  1253   ins_pipe(pipe_slow);
  1254 %}
  1256 instruct mulD_imm(regD dst, immD con) %{
  1257   predicate((UseSSE>=2) && (UseAVX == 0));
  1258   match(Set dst (MulD dst con));
  1259   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
  1260   ins_cost(150);
  1261   ins_encode %{
  1262     __ mulsd($dst$$XMMRegister, $constantaddress($con));
  1263   %}
  1264   ins_pipe(pipe_slow);
  1265 %}
  1267 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
  1268   predicate(UseAVX > 0);
  1269   match(Set dst (MulD src1 src2));
  1271   format %{ "vmulsd  $dst, $src1, $src2" %}
  1272   ins_cost(150);
  1273   ins_encode %{
  1274     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  1275   %}
  1276   ins_pipe(pipe_slow);
  1277 %}
  1279 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
  1280   predicate(UseAVX > 0);
  1281   match(Set dst (MulD src1 (LoadD src2)));
  1283   format %{ "vmulsd  $dst, $src1, $src2" %}
  1284   ins_cost(150);
  1285   ins_encode %{
  1286     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
  1287   %}
  1288   ins_pipe(pipe_slow);
  1289 %}
  1291 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
  1292   predicate(UseAVX > 0);
  1293   match(Set dst (MulD src con));
  1295   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
  1296   ins_cost(150);
  1297   ins_encode %{
  1298     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
  1299   %}
  1300   ins_pipe(pipe_slow);
  1301 %}
  1303 instruct divF_reg(regF dst, regF src) %{
  1304   predicate((UseSSE>=1) && (UseAVX == 0));
  1305   match(Set dst (DivF dst src));
  1307   format %{ "divss   $dst, $src" %}
  1308   ins_cost(150);
  1309   ins_encode %{
  1310     __ divss($dst$$XMMRegister, $src$$XMMRegister);
  1311   %}
  1312   ins_pipe(pipe_slow);
  1313 %}
  1315 instruct divF_mem(regF dst, memory src) %{
  1316   predicate((UseSSE>=1) && (UseAVX == 0));
  1317   match(Set dst (DivF dst (LoadF src)));
  1319   format %{ "divss   $dst, $src" %}
  1320   ins_cost(150);
  1321   ins_encode %{
  1322     __ divss($dst$$XMMRegister, $src$$Address);
  1323   %}
  1324   ins_pipe(pipe_slow);
  1325 %}
  1327 instruct divF_imm(regF dst, immF con) %{
  1328   predicate((UseSSE>=1) && (UseAVX == 0));
  1329   match(Set dst (DivF dst con));
  1330   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
  1331   ins_cost(150);
  1332   ins_encode %{
  1333     __ divss($dst$$XMMRegister, $constantaddress($con));
  1334   %}
  1335   ins_pipe(pipe_slow);
  1336 %}
  1338 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
  1339   predicate(UseAVX > 0);
  1340   match(Set dst (DivF src1 src2));
  1342   format %{ "vdivss  $dst, $src1, $src2" %}
  1343   ins_cost(150);
  1344   ins_encode %{
  1345     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  1346   %}
  1347   ins_pipe(pipe_slow);
  1348 %}
  1350 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
  1351   predicate(UseAVX > 0);
  1352   match(Set dst (DivF src1 (LoadF src2)));
  1354   format %{ "vdivss  $dst, $src1, $src2" %}
  1355   ins_cost(150);
  1356   ins_encode %{
  1357     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
  1358   %}
  1359   ins_pipe(pipe_slow);
  1360 %}
  1362 instruct divF_reg_imm(regF dst, regF src, immF con) %{
  1363   predicate(UseAVX > 0);
  1364   match(Set dst (DivF src con));
  1366   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
  1367   ins_cost(150);
  1368   ins_encode %{
  1369     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
  1370   %}
  1371   ins_pipe(pipe_slow);
  1372 %}
  1374 instruct divD_reg(regD dst, regD src) %{
  1375   predicate((UseSSE>=2) && (UseAVX == 0));
  1376   match(Set dst (DivD dst src));
  1378   format %{ "divsd   $dst, $src" %}
  1379   ins_cost(150);
  1380   ins_encode %{
  1381     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
  1382   %}
  1383   ins_pipe(pipe_slow);
  1384 %}
  1386 instruct divD_mem(regD dst, memory src) %{
  1387   predicate((UseSSE>=2) && (UseAVX == 0));
  1388   match(Set dst (DivD dst (LoadD src)));
  1390   format %{ "divsd   $dst, $src" %}
  1391   ins_cost(150);
  1392   ins_encode %{
  1393     __ divsd($dst$$XMMRegister, $src$$Address);
  1394   %}
  1395   ins_pipe(pipe_slow);
  1396 %}
  1398 instruct divD_imm(regD dst, immD con) %{
  1399   predicate((UseSSE>=2) && (UseAVX == 0));
  1400   match(Set dst (DivD dst con));
  1401   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
  1402   ins_cost(150);
  1403   ins_encode %{
  1404     __ divsd($dst$$XMMRegister, $constantaddress($con));
  1405   %}
  1406   ins_pipe(pipe_slow);
  1407 %}
  1409 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
  1410   predicate(UseAVX > 0);
  1411   match(Set dst (DivD src1 src2));
  1413   format %{ "vdivsd  $dst, $src1, $src2" %}
  1414   ins_cost(150);
  1415   ins_encode %{
  1416     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  1417   %}
  1418   ins_pipe(pipe_slow);
  1419 %}
  1421 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
  1422   predicate(UseAVX > 0);
  1423   match(Set dst (DivD src1 (LoadD src2)));
  1425   format %{ "vdivsd  $dst, $src1, $src2" %}
  1426   ins_cost(150);
  1427   ins_encode %{
  1428     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
  1429   %}
  1430   ins_pipe(pipe_slow);
  1431 %}
  1433 instruct divD_reg_imm(regD dst, regD src, immD con) %{
  1434   predicate(UseAVX > 0);
  1435   match(Set dst (DivD src con));
  1437   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
  1438   ins_cost(150);
  1439   ins_encode %{
  1440     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
  1441   %}
  1442   ins_pipe(pipe_slow);
  1443 %}
  1445 instruct absF_reg(regF dst) %{
  1446   predicate((UseSSE>=1) && (UseAVX == 0));
  1447   match(Set dst (AbsF dst));
  1448   ins_cost(150);
  1449   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
  1450   ins_encode %{
  1451     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
  1452   %}
  1453   ins_pipe(pipe_slow);
  1454 %}
  1456 instruct absF_reg_reg(regF dst, regF src) %{
  1457   predicate(UseAVX > 0);
  1458   match(Set dst (AbsF src));
  1459   ins_cost(150);
  1460   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
  1461   ins_encode %{
  1462     bool vector256 = false;
  1463     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
  1464               ExternalAddress(float_signmask()), vector256);
  1465   %}
  1466   ins_pipe(pipe_slow);
  1467 %}
  1469 instruct absD_reg(regD dst) %{
  1470   predicate((UseSSE>=2) && (UseAVX == 0));
  1471   match(Set dst (AbsD dst));
  1472   ins_cost(150);
  1473   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
  1474             "# abs double by sign masking" %}
  1475   ins_encode %{
  1476     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
  1477   %}
  1478   ins_pipe(pipe_slow);
  1479 %}
  1481 instruct absD_reg_reg(regD dst, regD src) %{
  1482   predicate(UseAVX > 0);
  1483   match(Set dst (AbsD src));
  1484   ins_cost(150);
  1485   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
  1486             "# abs double by sign masking" %}
  1487   ins_encode %{
  1488     bool vector256 = false;
  1489     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
  1490               ExternalAddress(double_signmask()), vector256);
  1491   %}
  1492   ins_pipe(pipe_slow);
  1493 %}
  1495 instruct negF_reg(regF dst) %{
  1496   predicate((UseSSE>=1) && (UseAVX == 0));
  1497   match(Set dst (NegF dst));
  1498   ins_cost(150);
  1499   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
  1500   ins_encode %{
  1501     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
  1502   %}
  1503   ins_pipe(pipe_slow);
  1504 %}
  1506 instruct negF_reg_reg(regF dst, regF src) %{
  1507   predicate(UseAVX > 0);
  1508   match(Set dst (NegF src));
  1509   ins_cost(150);
  1510   format %{ "vxorps  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
  1511   ins_encode %{
  1512     bool vector256 = false;
  1513     __ vxorps($dst$$XMMRegister, $src$$XMMRegister,
  1514               ExternalAddress(float_signflip()), vector256);
  1515   %}
  1516   ins_pipe(pipe_slow);
  1517 %}
  1519 instruct negD_reg(regD dst) %{
  1520   predicate((UseSSE>=2) && (UseAVX == 0));
  1521   match(Set dst (NegD dst));
  1522   ins_cost(150);
  1523   format %{ "xorpd   $dst, [0x8000000000000000]\t"
  1524             "# neg double by sign flipping" %}
  1525   ins_encode %{
  1526     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
  1527   %}
  1528   ins_pipe(pipe_slow);
  1529 %}
  1531 instruct negD_reg_reg(regD dst, regD src) %{
  1532   predicate(UseAVX > 0);
  1533   match(Set dst (NegD src));
  1534   ins_cost(150);
  1535   format %{ "vxorpd  $dst, $src, [0x8000000000000000]\t"
  1536             "# neg double by sign flipping" %}
  1537   ins_encode %{
  1538     bool vector256 = false;
  1539     __ vxorpd($dst$$XMMRegister, $src$$XMMRegister,
  1540               ExternalAddress(double_signflip()), vector256);
  1541   %}
  1542   ins_pipe(pipe_slow);
  1543 %}
  1545 instruct sqrtF_reg(regF dst, regF src) %{
  1546   predicate(UseSSE>=1);
  1547   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
  1549   format %{ "sqrtss  $dst, $src" %}
  1550   ins_cost(150);
  1551   ins_encode %{
  1552     __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
  1553   %}
  1554   ins_pipe(pipe_slow);
  1555 %}
  1557 instruct sqrtF_mem(regF dst, memory src) %{
  1558   predicate(UseSSE>=1);
  1559   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
  1561   format %{ "sqrtss  $dst, $src" %}
  1562   ins_cost(150);
  1563   ins_encode %{
  1564     __ sqrtss($dst$$XMMRegister, $src$$Address);
  1565   %}
  1566   ins_pipe(pipe_slow);
  1567 %}
  1569 instruct sqrtF_imm(regF dst, immF con) %{
  1570   predicate(UseSSE>=1);
  1571   match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
  1572   format %{ "sqrtss  $dst, [$constantaddress]\t# load from constant table: float=$con" %}
  1573   ins_cost(150);
  1574   ins_encode %{
  1575     __ sqrtss($dst$$XMMRegister, $constantaddress($con));
  1576   %}
  1577   ins_pipe(pipe_slow);
  1578 %}
  1580 instruct sqrtD_reg(regD dst, regD src) %{
  1581   predicate(UseSSE>=2);
  1582   match(Set dst (SqrtD src));
  1584   format %{ "sqrtsd  $dst, $src" %}
  1585   ins_cost(150);
  1586   ins_encode %{
  1587     __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
  1588   %}
  1589   ins_pipe(pipe_slow);
  1590 %}
  1592 instruct sqrtD_mem(regD dst, memory src) %{
  1593   predicate(UseSSE>=2);
  1594   match(Set dst (SqrtD (LoadD src)));
  1596   format %{ "sqrtsd  $dst, $src" %}
  1597   ins_cost(150);
  1598   ins_encode %{
  1599     __ sqrtsd($dst$$XMMRegister, $src$$Address);
  1600   %}
  1601   ins_pipe(pipe_slow);
  1602 %}
  1604 instruct sqrtD_imm(regD dst, immD con) %{
  1605   predicate(UseSSE>=2);
  1606   match(Set dst (SqrtD con));
  1607   format %{ "sqrtsd  $dst, [$constantaddress]\t# load from constant table: double=$con" %}
  1608   ins_cost(150);
  1609   ins_encode %{
  1610     __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
  1611   %}
  1612   ins_pipe(pipe_slow);
  1613 %}
  1616 // ====================VECTOR INSTRUCTIONS=====================================
  1618 // Load vectors (4 bytes long)
  1619 instruct loadV4(vecS dst, memory mem) %{
  1620   predicate(n->as_LoadVector()->memory_size() == 4);
  1621   match(Set dst (LoadVector mem));
  1622   ins_cost(125);
  1623   format %{ "movd    $dst,$mem\t! load vector (4 bytes)" %}
  1624   ins_encode %{
  1625     __ movdl($dst$$XMMRegister, $mem$$Address);
  1626   %}
  1627   ins_pipe( pipe_slow );
  1628 %}
  1630 // Load vectors (8 bytes long)
  1631 instruct loadV8(vecD dst, memory mem) %{
  1632   predicate(n->as_LoadVector()->memory_size() == 8);
  1633   match(Set dst (LoadVector mem));
  1634   ins_cost(125);
  1635   format %{ "movq    $dst,$mem\t! load vector (8 bytes)" %}
  1636   ins_encode %{
  1637     __ movq($dst$$XMMRegister, $mem$$Address);
  1638   %}
  1639   ins_pipe( pipe_slow );
  1640 %}
  1642 // Load vectors (16 bytes long)
  1643 instruct loadV16(vecX dst, memory mem) %{
  1644   predicate(n->as_LoadVector()->memory_size() == 16);
  1645   match(Set dst (LoadVector mem));
  1646   ins_cost(125);
  1647   format %{ "movdqu  $dst,$mem\t! load vector (16 bytes)" %}
  1648   ins_encode %{
  1649     __ movdqu($dst$$XMMRegister, $mem$$Address);
  1650   %}
  1651   ins_pipe( pipe_slow );
  1652 %}
  1654 // Load vectors (32 bytes long)
  1655 instruct loadV32(vecY dst, memory mem) %{
  1656   predicate(n->as_LoadVector()->memory_size() == 32);
  1657   match(Set dst (LoadVector mem));
  1658   ins_cost(125);
  1659   format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %}
  1660   ins_encode %{
  1661     __ vmovdqu($dst$$XMMRegister, $mem$$Address);
  1662   %}
  1663   ins_pipe( pipe_slow );
  1664 %}
  1666 // Store vectors
  1667 instruct storeV4(memory mem, vecS src) %{
  1668   predicate(n->as_StoreVector()->memory_size() == 4);
  1669   match(Set mem (StoreVector mem src));
  1670   ins_cost(145);
  1671   format %{ "movd    $mem,$src\t! store vector (4 bytes)" %}
  1672   ins_encode %{
  1673     __ movdl($mem$$Address, $src$$XMMRegister);
  1674   %}
  1675   ins_pipe( pipe_slow );
  1676 %}
  1678 instruct storeV8(memory mem, vecD src) %{
  1679   predicate(n->as_StoreVector()->memory_size() == 8);
  1680   match(Set mem (StoreVector mem src));
  1681   ins_cost(145);
  1682   format %{ "movq    $mem,$src\t! store vector (8 bytes)" %}
  1683   ins_encode %{
  1684     __ movq($mem$$Address, $src$$XMMRegister);
  1685   %}
  1686   ins_pipe( pipe_slow );
  1687 %}
  1689 instruct storeV16(memory mem, vecX src) %{
  1690   predicate(n->as_StoreVector()->memory_size() == 16);
  1691   match(Set mem (StoreVector mem src));
  1692   ins_cost(145);
  1693   format %{ "movdqu  $mem,$src\t! store vector (16 bytes)" %}
  1694   ins_encode %{
  1695     __ movdqu($mem$$Address, $src$$XMMRegister);
  1696   %}
  1697   ins_pipe( pipe_slow );
  1698 %}
  1700 instruct storeV32(memory mem, vecY src) %{
  1701   predicate(n->as_StoreVector()->memory_size() == 32);
  1702   match(Set mem (StoreVector mem src));
  1703   ins_cost(145);
  1704   format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %}
  1705   ins_encode %{
  1706     __ vmovdqu($mem$$Address, $src$$XMMRegister);
  1707   %}
  1708   ins_pipe( pipe_slow );
  1709 %}
  1711 // Replicate byte scalar to be vector
  1712 instruct Repl4B(vecS dst, rRegI src) %{
  1713   predicate(n->as_Vector()->length() == 4);
  1714   match(Set dst (ReplicateB src));
  1715   format %{ "movd    $dst,$src\n\t"
  1716             "punpcklbw $dst,$dst\n\t"
  1717             "pshuflw $dst,$dst,0x00\t! replicate4B" %}
  1718   ins_encode %{
  1719     __ movdl($dst$$XMMRegister, $src$$Register);
  1720     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
  1721     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  1722   %}
  1723   ins_pipe( pipe_slow );
  1724 %}
  1726 instruct Repl8B(vecD dst, rRegI src) %{
  1727   predicate(n->as_Vector()->length() == 8);
  1728   match(Set dst (ReplicateB src));
  1729   format %{ "movd    $dst,$src\n\t"
  1730             "punpcklbw $dst,$dst\n\t"
  1731             "pshuflw $dst,$dst,0x00\t! replicate8B" %}
  1732   ins_encode %{
  1733     __ movdl($dst$$XMMRegister, $src$$Register);
  1734     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
  1735     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  1736   %}
  1737   ins_pipe( pipe_slow );
  1738 %}
  1740 instruct Repl16B(vecX dst, rRegI src) %{
  1741   predicate(n->as_Vector()->length() == 16);
  1742   match(Set dst (ReplicateB src));
  1743   format %{ "movd    $dst,$src\n\t"
  1744             "punpcklbw $dst,$dst\n\t"
  1745             "pshuflw $dst,$dst,0x00\n\t"
  1746             "punpcklqdq $dst,$dst\t! replicate16B" %}
  1747   ins_encode %{
  1748     __ movdl($dst$$XMMRegister, $src$$Register);
  1749     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
  1750     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  1751     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  1752   %}
  1753   ins_pipe( pipe_slow );
  1754 %}
  1756 instruct Repl32B(vecY dst, rRegI src) %{
  1757   predicate(n->as_Vector()->length() == 32);
  1758   match(Set dst (ReplicateB src));
  1759   format %{ "movd    $dst,$src\n\t"
  1760             "punpcklbw $dst,$dst\n\t"
  1761             "pshuflw $dst,$dst,0x00\n\t"
  1762             "punpcklqdq $dst,$dst\n\t"
  1763             "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
  1764   ins_encode %{
  1765     __ movdl($dst$$XMMRegister, $src$$Register);
  1766     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
  1767     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  1768     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  1769     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  1770   %}
  1771   ins_pipe( pipe_slow );
  1772 %}
  1774 // Replicate byte scalar immediate to be vector by loading from const table.
  1775 instruct Repl4B_imm(vecS dst, immI con) %{
  1776   predicate(n->as_Vector()->length() == 4);
  1777   match(Set dst (ReplicateB con));
  1778   format %{ "movdl   $dst,[$constantaddress]\t! replicate4B($con)" %}
  1779   ins_encode %{
  1780     __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1)));
  1781   %}
  1782   ins_pipe( pipe_slow );
  1783 %}
  1785 instruct Repl8B_imm(vecD dst, immI con) %{
  1786   predicate(n->as_Vector()->length() == 8);
  1787   match(Set dst (ReplicateB con));
  1788   format %{ "movq    $dst,[$constantaddress]\t! replicate8B($con)" %}
  1789   ins_encode %{
  1790     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
  1791   %}
  1792   ins_pipe( pipe_slow );
  1793 %}
  1795 instruct Repl16B_imm(vecX dst, immI con) %{
  1796   predicate(n->as_Vector()->length() == 16);
  1797   match(Set dst (ReplicateB con));
  1798   format %{ "movq    $dst,[$constantaddress]\n\t"
  1799             "punpcklqdq $dst,$dst\t! replicate16B($con)" %}
  1800   ins_encode %{
  1801     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
  1802     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  1803   %}
  1804   ins_pipe( pipe_slow );
  1805 %}
  1807 instruct Repl32B_imm(vecY dst, immI con) %{
  1808   predicate(n->as_Vector()->length() == 32);
  1809   match(Set dst (ReplicateB con));
  1810   format %{ "movq    $dst,[$constantaddress]\n\t"
  1811             "punpcklqdq $dst,$dst\n\t"
  1812             "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %}
  1813   ins_encode %{
  1814     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
  1815     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  1816     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  1817   %}
  1818   ins_pipe( pipe_slow );
  1819 %}
  1821 // Replicate byte scalar zero to be vector
  1822 instruct Repl4B_zero(vecS dst, immI0 zero) %{
  1823   predicate(n->as_Vector()->length() == 4);
  1824   match(Set dst (ReplicateB zero));
  1825   format %{ "pxor    $dst,$dst\t! replicate4B zero" %}
  1826   ins_encode %{
  1827     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  1828   %}
  1829   ins_pipe( fpu_reg_reg );
  1830 %}
  1832 instruct Repl8B_zero(vecD dst, immI0 zero) %{
  1833   predicate(n->as_Vector()->length() == 8);
  1834   match(Set dst (ReplicateB zero));
  1835   format %{ "pxor    $dst,$dst\t! replicate8B zero" %}
  1836   ins_encode %{
  1837     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  1838   %}
  1839   ins_pipe( fpu_reg_reg );
  1840 %}
  1842 instruct Repl16B_zero(vecX dst, immI0 zero) %{
  1843   predicate(n->as_Vector()->length() == 16);
  1844   match(Set dst (ReplicateB zero));
  1845   format %{ "pxor    $dst,$dst\t! replicate16B zero" %}
  1846   ins_encode %{
  1847     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  1848   %}
  1849   ins_pipe( fpu_reg_reg );
  1850 %}
  1852 instruct Repl32B_zero(vecY dst, immI0 zero) %{
  1853   predicate(n->as_Vector()->length() == 32);
  1854   match(Set dst (ReplicateB zero));
  1855   format %{ "vpxor   $dst,$dst,$dst\t! replicate32B zero" %}
  1856   ins_encode %{
  1857     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
  1858     bool vector256 = true;
  1859     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
  1860   %}
  1861   ins_pipe( fpu_reg_reg );
  1862 %}
  1864 // Replicate char/short (2 byte) scalar to be vector
  1865 instruct Repl2S(vecS dst, rRegI src) %{
  1866   predicate(n->as_Vector()->length() == 2);
  1867   match(Set dst (ReplicateS src));
  1868   format %{ "movd    $dst,$src\n\t"
  1869             "pshuflw $dst,$dst,0x00\t! replicate2S" %}
  1870   ins_encode %{
  1871     __ movdl($dst$$XMMRegister, $src$$Register);
  1872     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  1873   %}
  1874   ins_pipe( fpu_reg_reg );
  1875 %}
  1877 instruct Repl4S(vecD dst, rRegI src) %{
  1878   predicate(n->as_Vector()->length() == 4);
  1879   match(Set dst (ReplicateS src));
  1880   format %{ "movd    $dst,$src\n\t"
  1881             "pshuflw $dst,$dst,0x00\t! replicate4S" %}
  1882   ins_encode %{
  1883     __ movdl($dst$$XMMRegister, $src$$Register);
  1884     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  1885   %}
  1886   ins_pipe( fpu_reg_reg );
  1887 %}
  1889 instruct Repl8S(vecX dst, rRegI src) %{
  1890   predicate(n->as_Vector()->length() == 8);
  1891   match(Set dst (ReplicateS src));
  1892   format %{ "movd    $dst,$src\n\t"
  1893             "pshuflw $dst,$dst,0x00\n\t"
  1894             "punpcklqdq $dst,$dst\t! replicate8S" %}
  1895   ins_encode %{
  1896     __ movdl($dst$$XMMRegister, $src$$Register);
  1897     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  1898     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  1899   %}
  1900   ins_pipe( pipe_slow );
  1901 %}
  1903 instruct Repl16S(vecY dst, rRegI src) %{
  1904   predicate(n->as_Vector()->length() == 16);
  1905   match(Set dst (ReplicateS src));
  1906   format %{ "movd    $dst,$src\n\t"
  1907             "pshuflw $dst,$dst,0x00\n\t"
  1908             "punpcklqdq $dst,$dst\n\t"
  1909             "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
  1910   ins_encode %{
  1911     __ movdl($dst$$XMMRegister, $src$$Register);
  1912     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  1913     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  1914     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  1915   %}
  1916   ins_pipe( pipe_slow );
  1917 %}
  1919 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table.
  1920 instruct Repl2S_imm(vecS dst, immI con) %{
  1921   predicate(n->as_Vector()->length() == 2);
  1922   match(Set dst (ReplicateS con));
  1923   format %{ "movdl   $dst,[$constantaddress]\t! replicate2S($con)" %}
  1924   ins_encode %{
  1925     __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2)));
  1926   %}
  1927   ins_pipe( fpu_reg_reg );
  1928 %}
  1930 instruct Repl4S_imm(vecD dst, immI con) %{
  1931   predicate(n->as_Vector()->length() == 4);
  1932   match(Set dst (ReplicateS con));
  1933   format %{ "movq    $dst,[$constantaddress]\t! replicate4S($con)" %}
  1934   ins_encode %{
  1935     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
  1936   %}
  1937   ins_pipe( fpu_reg_reg );
  1938 %}
  1940 instruct Repl8S_imm(vecX dst, immI con) %{
  1941   predicate(n->as_Vector()->length() == 8);
  1942   match(Set dst (ReplicateS con));
  1943   format %{ "movq    $dst,[$constantaddress]\n\t"
  1944             "punpcklqdq $dst,$dst\t! replicate8S($con)" %}
  1945   ins_encode %{
  1946     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
  1947     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  1948   %}
  1949   ins_pipe( pipe_slow );
  1950 %}
  1952 instruct Repl16S_imm(vecY dst, immI con) %{
  1953   predicate(n->as_Vector()->length() == 16);
  1954   match(Set dst (ReplicateS con));
  1955   format %{ "movq    $dst,[$constantaddress]\n\t"
  1956             "punpcklqdq $dst,$dst\n\t"
  1957             "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %}
  1958   ins_encode %{
  1959     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
  1960     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  1961     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  1962   %}
  1963   ins_pipe( pipe_slow );
  1964 %}
  1966 // Replicate char/short (2 byte) scalar zero to be vector
  1967 instruct Repl2S_zero(vecS dst, immI0 zero) %{
  1968   predicate(n->as_Vector()->length() == 2);
  1969   match(Set dst (ReplicateS zero));
  1970   format %{ "pxor    $dst,$dst\t! replicate2S zero" %}
  1971   ins_encode %{
  1972     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  1973   %}
  1974   ins_pipe( fpu_reg_reg );
  1975 %}
  1977 instruct Repl4S_zero(vecD dst, immI0 zero) %{
  1978   predicate(n->as_Vector()->length() == 4);
  1979   match(Set dst (ReplicateS zero));
  1980   format %{ "pxor    $dst,$dst\t! replicate4S zero" %}
  1981   ins_encode %{
  1982     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  1983   %}
  1984   ins_pipe( fpu_reg_reg );
  1985 %}
  1987 instruct Repl8S_zero(vecX dst, immI0 zero) %{
  1988   predicate(n->as_Vector()->length() == 8);
  1989   match(Set dst (ReplicateS zero));
  1990   format %{ "pxor    $dst,$dst\t! replicate8S zero" %}
  1991   ins_encode %{
  1992     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  1993   %}
  1994   ins_pipe( fpu_reg_reg );
  1995 %}
  1997 instruct Repl16S_zero(vecY dst, immI0 zero) %{
  1998   predicate(n->as_Vector()->length() == 16);
  1999   match(Set dst (ReplicateS zero));
  2000   format %{ "vpxor   $dst,$dst,$dst\t! replicate16S zero" %}
  2001   ins_encode %{
  2002     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
  2003     bool vector256 = true;
  2004     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
  2005   %}
  2006   ins_pipe( fpu_reg_reg );
  2007 %}
  2009 // Replicate integer (4 byte) scalar to be vector
  2010 instruct Repl2I(vecD dst, rRegI src) %{
  2011   predicate(n->as_Vector()->length() == 2);
  2012   match(Set dst (ReplicateI src));
  2013   format %{ "movd    $dst,$src\n\t"
  2014             "pshufd  $dst,$dst,0x00\t! replicate2I" %}
  2015   ins_encode %{
  2016     __ movdl($dst$$XMMRegister, $src$$Register);
  2017     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  2018   %}
  2019   ins_pipe( fpu_reg_reg );
  2020 %}
  2022 instruct Repl4I(vecX dst, rRegI src) %{
  2023   predicate(n->as_Vector()->length() == 4);
  2024   match(Set dst (ReplicateI src));
  2025   format %{ "movd    $dst,$src\n\t"
  2026             "pshufd  $dst,$dst,0x00\t! replicate4I" %}
  2027   ins_encode %{
  2028     __ movdl($dst$$XMMRegister, $src$$Register);
  2029     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  2030   %}
  2031   ins_pipe( pipe_slow );
  2032 %}
  2034 instruct Repl8I(vecY dst, rRegI src) %{
  2035   predicate(n->as_Vector()->length() == 8);
  2036   match(Set dst (ReplicateI src));
  2037   format %{ "movd    $dst,$src\n\t"
  2038             "pshufd  $dst,$dst,0x00\n\t"
  2039             "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
  2040   ins_encode %{
  2041     __ movdl($dst$$XMMRegister, $src$$Register);
  2042     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  2043     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2044   %}
  2045   ins_pipe( pipe_slow );
  2046 %}
  2048 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
  2049 instruct Repl2I_imm(vecD dst, immI con) %{
  2050   predicate(n->as_Vector()->length() == 2);
  2051   match(Set dst (ReplicateI con));
  2052   format %{ "movq    $dst,[$constantaddress]\t! replicate2I($con)" %}
  2053   ins_encode %{
  2054     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
  2055   %}
  2056   ins_pipe( fpu_reg_reg );
  2057 %}
  2059 instruct Repl4I_imm(vecX dst, immI con) %{
  2060   predicate(n->as_Vector()->length() == 4);
  2061   match(Set dst (ReplicateI con));
  2062   format %{ "movq    $dst,[$constantaddress]\t! replicate4I($con)\n\t"
  2063             "punpcklqdq $dst,$dst" %}
  2064   ins_encode %{
  2065     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
  2066     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  2067   %}
  2068   ins_pipe( pipe_slow );
  2069 %}
  2071 instruct Repl8I_imm(vecY dst, immI con) %{
  2072   predicate(n->as_Vector()->length() == 8);
  2073   match(Set dst (ReplicateI con));
  2074   format %{ "movq    $dst,[$constantaddress]\t! replicate8I($con)\n\t"
  2075             "punpcklqdq $dst,$dst\n\t"
  2076             "vinserti128h $dst,$dst,$dst" %}
  2077   ins_encode %{
  2078     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
  2079     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  2080     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2081   %}
  2082   ins_pipe( pipe_slow );
  2083 %}
  2085 // Integer could be loaded into xmm register directly from memory.
  2086 instruct Repl2I_mem(vecD dst, memory mem) %{
  2087   predicate(n->as_Vector()->length() == 2);
  2088   match(Set dst (ReplicateI (LoadI mem)));
  2089   format %{ "movd    $dst,$mem\n\t"
  2090             "pshufd  $dst,$dst,0x00\t! replicate2I" %}
  2091   ins_encode %{
  2092     __ movdl($dst$$XMMRegister, $mem$$Address);
  2093     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  2094   %}
  2095   ins_pipe( fpu_reg_reg );
  2096 %}
  2098 instruct Repl4I_mem(vecX dst, memory mem) %{
  2099   predicate(n->as_Vector()->length() == 4);
  2100   match(Set dst (ReplicateI (LoadI mem)));
  2101   format %{ "movd    $dst,$mem\n\t"
  2102             "pshufd  $dst,$dst,0x00\t! replicate4I" %}
  2103   ins_encode %{
  2104     __ movdl($dst$$XMMRegister, $mem$$Address);
  2105     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  2106   %}
  2107   ins_pipe( pipe_slow );
  2108 %}
  2110 instruct Repl8I_mem(vecY dst, memory mem) %{
  2111   predicate(n->as_Vector()->length() == 8);
  2112   match(Set dst (ReplicateI (LoadI mem)));
  2113   format %{ "movd    $dst,$mem\n\t"
  2114             "pshufd  $dst,$dst,0x00\n\t"
  2115             "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
  2116   ins_encode %{
  2117     __ movdl($dst$$XMMRegister, $mem$$Address);
  2118     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  2119     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2120   %}
  2121   ins_pipe( pipe_slow );
  2122 %}
  2124 // Replicate integer (4 byte) scalar zero to be vector
  2125 instruct Repl2I_zero(vecD dst, immI0 zero) %{
  2126   predicate(n->as_Vector()->length() == 2);
  2127   match(Set dst (ReplicateI zero));
  2128   format %{ "pxor    $dst,$dst\t! replicate2I" %}
  2129   ins_encode %{
  2130     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  2131   %}
  2132   ins_pipe( fpu_reg_reg );
  2133 %}
  2135 instruct Repl4I_zero(vecX dst, immI0 zero) %{
  2136   predicate(n->as_Vector()->length() == 4);
  2137   match(Set dst (ReplicateI zero));
  2138   format %{ "pxor    $dst,$dst\t! replicate4I zero)" %}
  2139   ins_encode %{
  2140     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  2141   %}
  2142   ins_pipe( fpu_reg_reg );
  2143 %}
  2145 instruct Repl8I_zero(vecY dst, immI0 zero) %{
  2146   predicate(n->as_Vector()->length() == 8);
  2147   match(Set dst (ReplicateI zero));
  2148   format %{ "vpxor   $dst,$dst,$dst\t! replicate8I zero" %}
  2149   ins_encode %{
  2150     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
  2151     bool vector256 = true;
  2152     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
  2153   %}
  2154   ins_pipe( fpu_reg_reg );
  2155 %}
  2157 // Replicate long (8 byte) scalar to be vector
  2158 #ifdef _LP64
  2159 instruct Repl2L(vecX dst, rRegL src) %{
  2160   predicate(n->as_Vector()->length() == 2);
  2161   match(Set dst (ReplicateL src));
  2162   format %{ "movdq   $dst,$src\n\t"
  2163             "punpcklqdq $dst,$dst\t! replicate2L" %}
  2164   ins_encode %{
  2165     __ movdq($dst$$XMMRegister, $src$$Register);
  2166     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  2167   %}
  2168   ins_pipe( pipe_slow );
  2169 %}
  2171 instruct Repl4L(vecY dst, rRegL src) %{
  2172   predicate(n->as_Vector()->length() == 4);
  2173   match(Set dst (ReplicateL src));
  2174   format %{ "movdq   $dst,$src\n\t"
  2175             "punpcklqdq $dst,$dst\n\t"
  2176             "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
  2177   ins_encode %{
  2178     __ movdq($dst$$XMMRegister, $src$$Register);
  2179     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  2180     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2181   %}
  2182   ins_pipe( pipe_slow );
  2183 %}
  2184 #else // _LP64
  2185 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{
  2186   predicate(n->as_Vector()->length() == 2);
  2187   match(Set dst (ReplicateL src));
  2188   effect(TEMP dst, USE src, TEMP tmp);
  2189   format %{ "movdl   $dst,$src.lo\n\t"
  2190             "movdl   $tmp,$src.hi\n\t"
  2191             "punpckldq $dst,$tmp\n\t"
  2192             "punpcklqdq $dst,$dst\t! replicate2L"%}
  2193   ins_encode %{
  2194     __ movdl($dst$$XMMRegister, $src$$Register);
  2195     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
  2196     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
  2197     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  2198   %}
  2199   ins_pipe( pipe_slow );
  2200 %}
  2202 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
  2203   predicate(n->as_Vector()->length() == 4);
  2204   match(Set dst (ReplicateL src));
  2205   effect(TEMP dst, USE src, TEMP tmp);
  2206   format %{ "movdl   $dst,$src.lo\n\t"
  2207             "movdl   $tmp,$src.hi\n\t"
  2208             "punpckldq $dst,$tmp\n\t"
  2209             "punpcklqdq $dst,$dst\n\t"
  2210             "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
  2211   ins_encode %{
  2212     __ movdl($dst$$XMMRegister, $src$$Register);
  2213     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
  2214     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
  2215     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  2216     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2217   %}
  2218   ins_pipe( pipe_slow );
  2219 %}
  2220 #endif // _LP64
  2222 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
  2223 instruct Repl2L_imm(vecX dst, immL con) %{
  2224   predicate(n->as_Vector()->length() == 2);
  2225   match(Set dst (ReplicateL con));
  2226   format %{ "movq    $dst,[$constantaddress]\n\t"
  2227             "punpcklqdq $dst,$dst\t! replicate2L($con)" %}
  2228   ins_encode %{
  2229     __ movq($dst$$XMMRegister, $constantaddress($con));
  2230     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  2231   %}
  2232   ins_pipe( pipe_slow );
  2233 %}
  2235 instruct Repl4L_imm(vecY dst, immL con) %{
  2236   predicate(n->as_Vector()->length() == 4);
  2237   match(Set dst (ReplicateL con));
  2238   format %{ "movq    $dst,[$constantaddress]\n\t"
  2239             "punpcklqdq $dst,$dst\n\t"
  2240             "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %}
  2241   ins_encode %{
  2242     __ movq($dst$$XMMRegister, $constantaddress($con));
  2243     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  2244     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2245   %}
  2246   ins_pipe( pipe_slow );
  2247 %}
  2249 // Long could be loaded into xmm register directly from memory.
  2250 instruct Repl2L_mem(vecX dst, memory mem) %{
  2251   predicate(n->as_Vector()->length() == 2);
  2252   match(Set dst (ReplicateL (LoadL mem)));
  2253   format %{ "movq    $dst,$mem\n\t"
  2254             "punpcklqdq $dst,$dst\t! replicate2L" %}
  2255   ins_encode %{
  2256     __ movq($dst$$XMMRegister, $mem$$Address);
  2257     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  2258   %}
  2259   ins_pipe( pipe_slow );
  2260 %}
  2262 instruct Repl4L_mem(vecY dst, memory mem) %{
  2263   predicate(n->as_Vector()->length() == 4);
  2264   match(Set dst (ReplicateL (LoadL mem)));
  2265   format %{ "movq    $dst,$mem\n\t"
  2266             "punpcklqdq $dst,$dst\n\t"
  2267             "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
  2268   ins_encode %{
  2269     __ movq($dst$$XMMRegister, $mem$$Address);
  2270     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  2271     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2272   %}
  2273   ins_pipe( pipe_slow );
  2274 %}
  2276 // Replicate long (8 byte) scalar zero to be vector
  2277 instruct Repl2L_zero(vecX dst, immL0 zero) %{
  2278   predicate(n->as_Vector()->length() == 2);
  2279   match(Set dst (ReplicateL zero));
  2280   format %{ "pxor    $dst,$dst\t! replicate2L zero" %}
  2281   ins_encode %{
  2282     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  2283   %}
  2284   ins_pipe( fpu_reg_reg );
  2285 %}
  2287 instruct Repl4L_zero(vecY dst, immL0 zero) %{
  2288   predicate(n->as_Vector()->length() == 4);
  2289   match(Set dst (ReplicateL zero));
  2290   format %{ "vpxor   $dst,$dst,$dst\t! replicate4L zero" %}
  2291   ins_encode %{
  2292     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
  2293     bool vector256 = true;
  2294     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
  2295   %}
  2296   ins_pipe( fpu_reg_reg );
  2297 %}
  2299 // Replicate float (4 byte) scalar to be vector
  2300 instruct Repl2F(vecD dst, regF src) %{
  2301   predicate(n->as_Vector()->length() == 2);
  2302   match(Set dst (ReplicateF src));
  2303   format %{ "pshufd  $dst,$dst,0x00\t! replicate2F" %}
  2304   ins_encode %{
  2305     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
  2306   %}
  2307   ins_pipe( fpu_reg_reg );
  2308 %}
  2310 instruct Repl4F(vecX dst, regF src) %{
  2311   predicate(n->as_Vector()->length() == 4);
  2312   match(Set dst (ReplicateF src));
  2313   format %{ "pshufd  $dst,$dst,0x00\t! replicate4F" %}
  2314   ins_encode %{
  2315     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
  2316   %}
  2317   ins_pipe( pipe_slow );
  2318 %}
  2320 instruct Repl8F(vecY dst, regF src) %{
  2321   predicate(n->as_Vector()->length() == 8);
  2322   match(Set dst (ReplicateF src));
  2323   format %{ "pshufd  $dst,$src,0x00\n\t"
  2324             "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
  2325   ins_encode %{
  2326     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
  2327     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2328   %}
  2329   ins_pipe( pipe_slow );
  2330 %}
  2332 // Replicate float (4 byte) scalar zero to be vector
  2333 instruct Repl2F_zero(vecD dst, immF0 zero) %{
  2334   predicate(n->as_Vector()->length() == 2);
  2335   match(Set dst (ReplicateF zero));
  2336   format %{ "xorps   $dst,$dst\t! replicate2F zero" %}
  2337   ins_encode %{
  2338     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
  2339   %}
  2340   ins_pipe( fpu_reg_reg );
  2341 %}
  2343 instruct Repl4F_zero(vecX dst, immF0 zero) %{
  2344   predicate(n->as_Vector()->length() == 4);
  2345   match(Set dst (ReplicateF zero));
  2346   format %{ "xorps   $dst,$dst\t! replicate4F zero" %}
  2347   ins_encode %{
  2348     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
  2349   %}
  2350   ins_pipe( fpu_reg_reg );
  2351 %}
  2353 instruct Repl8F_zero(vecY dst, immF0 zero) %{
  2354   predicate(n->as_Vector()->length() == 8);
  2355   match(Set dst (ReplicateF zero));
  2356   format %{ "vxorps  $dst,$dst,$dst\t! replicate8F zero" %}
  2357   ins_encode %{
  2358     bool vector256 = true;
  2359     __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
  2360   %}
  2361   ins_pipe( fpu_reg_reg );
  2362 %}
  2364 // Replicate double (8 bytes) scalar to be vector
  2365 instruct Repl2D(vecX dst, regD src) %{
  2366   predicate(n->as_Vector()->length() == 2);
  2367   match(Set dst (ReplicateD src));
  2368   format %{ "pshufd  $dst,$src,0x44\t! replicate2D" %}
  2369   ins_encode %{
  2370     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
  2371   %}
  2372   ins_pipe( pipe_slow );
  2373 %}
  2375 instruct Repl4D(vecY dst, regD src) %{
  2376   predicate(n->as_Vector()->length() == 4);
  2377   match(Set dst (ReplicateD src));
  2378   format %{ "pshufd  $dst,$src,0x44\n\t"
  2379             "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
  2380   ins_encode %{
  2381     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
  2382     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2383   %}
  2384   ins_pipe( pipe_slow );
  2385 %}
  2387 // Replicate double (8 byte) scalar zero to be vector
  2388 instruct Repl2D_zero(vecX dst, immD0 zero) %{
  2389   predicate(n->as_Vector()->length() == 2);
  2390   match(Set dst (ReplicateD zero));
  2391   format %{ "xorpd   $dst,$dst\t! replicate2D zero" %}
  2392   ins_encode %{
  2393     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
  2394   %}
  2395   ins_pipe( fpu_reg_reg );
  2396 %}
  2398 instruct Repl4D_zero(vecY dst, immD0 zero) %{
  2399   predicate(n->as_Vector()->length() == 4);
  2400   match(Set dst (ReplicateD zero));
  2401   format %{ "vxorpd  $dst,$dst,$dst,vect256\t! replicate4D zero" %}
  2402   ins_encode %{
  2403     bool vector256 = true;
  2404     __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
  2405   %}
  2406   ins_pipe( fpu_reg_reg );
  2407 %}
  2409 // ====================VECTOR ARITHMETIC=======================================
  2411 // --------------------------------- ADD --------------------------------------
  2413 // Bytes vector add
  2414 instruct vadd4B(vecS dst, vecS src) %{
  2415   predicate(n->as_Vector()->length() == 4);
  2416   match(Set dst (AddVB dst src));
  2417   format %{ "paddb   $dst,$src\t! add packed4B" %}
  2418   ins_encode %{
  2419     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
  2420   %}
  2421   ins_pipe( pipe_slow );
  2422 %}
  2424 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{
  2425   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  2426   match(Set dst (AddVB src1 src2));
  2427   format %{ "vpaddb  $dst,$src1,$src2\t! add packed4B" %}
  2428   ins_encode %{
  2429     bool vector256 = false;
  2430     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2431   %}
  2432   ins_pipe( pipe_slow );
  2433 %}
  2435 instruct vadd8B(vecD dst, vecD src) %{
  2436   predicate(n->as_Vector()->length() == 8);
  2437   match(Set dst (AddVB dst src));
  2438   format %{ "paddb   $dst,$src\t! add packed8B" %}
  2439   ins_encode %{
  2440     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
  2441   %}
  2442   ins_pipe( pipe_slow );
  2443 %}
  2445 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
  2446   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  2447   match(Set dst (AddVB src1 src2));
  2448   format %{ "vpaddb  $dst,$src1,$src2\t! add packed8B" %}
  2449   ins_encode %{
  2450     bool vector256 = false;
  2451     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2452   %}
  2453   ins_pipe( pipe_slow );
  2454 %}
  2456 instruct vadd16B(vecX dst, vecX src) %{
  2457   predicate(n->as_Vector()->length() == 16);
  2458   match(Set dst (AddVB dst src));
  2459   format %{ "paddb   $dst,$src\t! add packed16B" %}
  2460   ins_encode %{
  2461     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
  2462   %}
  2463   ins_pipe( pipe_slow );
  2464 %}
  2466 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
  2467   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
  2468   match(Set dst (AddVB src1 src2));
  2469   format %{ "vpaddb  $dst,$src1,$src2\t! add packed16B" %}
  2470   ins_encode %{
  2471     bool vector256 = false;
  2472     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2473   %}
  2474   ins_pipe( pipe_slow );
  2475 %}
  2477 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{
  2478   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
  2479   match(Set dst (AddVB src (LoadVector mem)));
  2480   format %{ "vpaddb  $dst,$src,$mem\t! add packed16B" %}
  2481   ins_encode %{
  2482     bool vector256 = false;
  2483     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2484   %}
  2485   ins_pipe( pipe_slow );
  2486 %}
  2488 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{
  2489   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
  2490   match(Set dst (AddVB src1 src2));
  2491   format %{ "vpaddb  $dst,$src1,$src2\t! add packed32B" %}
  2492   ins_encode %{
  2493     bool vector256 = true;
  2494     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2495   %}
  2496   ins_pipe( pipe_slow );
  2497 %}
  2499 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{
  2500   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
  2501   match(Set dst (AddVB src (LoadVector mem)));
  2502   format %{ "vpaddb  $dst,$src,$mem\t! add packed32B" %}
  2503   ins_encode %{
  2504     bool vector256 = true;
  2505     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2506   %}
  2507   ins_pipe( pipe_slow );
  2508 %}
  2510 // Shorts/Chars vector add
  2511 instruct vadd2S(vecS dst, vecS src) %{
  2512   predicate(n->as_Vector()->length() == 2);
  2513   match(Set dst (AddVS dst src));
  2514   format %{ "paddw   $dst,$src\t! add packed2S" %}
  2515   ins_encode %{
  2516     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
  2517   %}
  2518   ins_pipe( pipe_slow );
  2519 %}
  2521 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{
  2522   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  2523   match(Set dst (AddVS src1 src2));
  2524   format %{ "vpaddw  $dst,$src1,$src2\t! add packed2S" %}
  2525   ins_encode %{
  2526     bool vector256 = false;
  2527     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2528   %}
  2529   ins_pipe( pipe_slow );
  2530 %}
  2532 instruct vadd4S(vecD dst, vecD src) %{
  2533   predicate(n->as_Vector()->length() == 4);
  2534   match(Set dst (AddVS dst src));
  2535   format %{ "paddw   $dst,$src\t! add packed4S" %}
  2536   ins_encode %{
  2537     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
  2538   %}
  2539   ins_pipe( pipe_slow );
  2540 %}
  2542 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
  2543   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  2544   match(Set dst (AddVS src1 src2));
  2545   format %{ "vpaddw  $dst,$src1,$src2\t! add packed4S" %}
  2546   ins_encode %{
  2547     bool vector256 = false;
  2548     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2549   %}
  2550   ins_pipe( pipe_slow );
  2551 %}
  2553 instruct vadd8S(vecX dst, vecX src) %{
  2554   predicate(n->as_Vector()->length() == 8);
  2555   match(Set dst (AddVS dst src));
  2556   format %{ "paddw   $dst,$src\t! add packed8S" %}
  2557   ins_encode %{
  2558     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
  2559   %}
  2560   ins_pipe( pipe_slow );
  2561 %}
  2563 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
  2564   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  2565   match(Set dst (AddVS src1 src2));
  2566   format %{ "vpaddw  $dst,$src1,$src2\t! add packed8S" %}
  2567   ins_encode %{
  2568     bool vector256 = false;
  2569     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2570   %}
  2571   ins_pipe( pipe_slow );
  2572 %}
  2574 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{
  2575   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  2576   match(Set dst (AddVS src (LoadVector mem)));
  2577   format %{ "vpaddw  $dst,$src,$mem\t! add packed8S" %}
  2578   ins_encode %{
  2579     bool vector256 = false;
  2580     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2581   %}
  2582   ins_pipe( pipe_slow );
  2583 %}
  2585 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{
  2586   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  2587   match(Set dst (AddVS src1 src2));
  2588   format %{ "vpaddw  $dst,$src1,$src2\t! add packed16S" %}
  2589   ins_encode %{
  2590     bool vector256 = true;
  2591     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2592   %}
  2593   ins_pipe( pipe_slow );
  2594 %}
  2596 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{
  2597   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  2598   match(Set dst (AddVS src (LoadVector mem)));
  2599   format %{ "vpaddw  $dst,$src,$mem\t! add packed16S" %}
  2600   ins_encode %{
  2601     bool vector256 = true;
  2602     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2603   %}
  2604   ins_pipe( pipe_slow );
  2605 %}
  2607 // Integers vector add
  2608 instruct vadd2I(vecD dst, vecD src) %{
  2609   predicate(n->as_Vector()->length() == 2);
  2610   match(Set dst (AddVI dst src));
  2611   format %{ "paddd   $dst,$src\t! add packed2I" %}
  2612   ins_encode %{
  2613     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
  2614   %}
  2615   ins_pipe( pipe_slow );
  2616 %}
  2618 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{
  2619   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  2620   match(Set dst (AddVI src1 src2));
  2621   format %{ "vpaddd  $dst,$src1,$src2\t! add packed2I" %}
  2622   ins_encode %{
  2623     bool vector256 = false;
  2624     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2625   %}
  2626   ins_pipe( pipe_slow );
  2627 %}
  2629 instruct vadd4I(vecX dst, vecX src) %{
  2630   predicate(n->as_Vector()->length() == 4);
  2631   match(Set dst (AddVI dst src));
  2632   format %{ "paddd   $dst,$src\t! add packed4I" %}
  2633   ins_encode %{
  2634     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
  2635   %}
  2636   ins_pipe( pipe_slow );
  2637 %}
  2639 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
  2640   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  2641   match(Set dst (AddVI src1 src2));
  2642   format %{ "vpaddd  $dst,$src1,$src2\t! add packed4I" %}
  2643   ins_encode %{
  2644     bool vector256 = false;
  2645     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2646   %}
  2647   ins_pipe( pipe_slow );
  2648 %}
  2650 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{
  2651   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  2652   match(Set dst (AddVI src (LoadVector mem)));
  2653   format %{ "vpaddd  $dst,$src,$mem\t! add packed4I" %}
  2654   ins_encode %{
  2655     bool vector256 = false;
  2656     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2657   %}
  2658   ins_pipe( pipe_slow );
  2659 %}
  2661 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{
  2662   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  2663   match(Set dst (AddVI src1 src2));
  2664   format %{ "vpaddd  $dst,$src1,$src2\t! add packed8I" %}
  2665   ins_encode %{
  2666     bool vector256 = true;
  2667     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2668   %}
  2669   ins_pipe( pipe_slow );
  2670 %}
  2672 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{
  2673   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  2674   match(Set dst (AddVI src (LoadVector mem)));
  2675   format %{ "vpaddd  $dst,$src,$mem\t! add packed8I" %}
  2676   ins_encode %{
  2677     bool vector256 = true;
  2678     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2679   %}
  2680   ins_pipe( pipe_slow );
  2681 %}
  2683 // Longs vector add
  2684 instruct vadd2L(vecX dst, vecX src) %{
  2685   predicate(n->as_Vector()->length() == 2);
  2686   match(Set dst (AddVL dst src));
  2687   format %{ "paddq   $dst,$src\t! add packed2L" %}
  2688   ins_encode %{
  2689     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
  2690   %}
  2691   ins_pipe( pipe_slow );
  2692 %}
  2694 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
  2695   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  2696   match(Set dst (AddVL src1 src2));
  2697   format %{ "vpaddq  $dst,$src1,$src2\t! add packed2L" %}
  2698   ins_encode %{
  2699     bool vector256 = false;
  2700     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2701   %}
  2702   ins_pipe( pipe_slow );
  2703 %}
  2705 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{
  2706   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  2707   match(Set dst (AddVL src (LoadVector mem)));
  2708   format %{ "vpaddq  $dst,$src,$mem\t! add packed2L" %}
  2709   ins_encode %{
  2710     bool vector256 = false;
  2711     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2712   %}
  2713   ins_pipe( pipe_slow );
  2714 %}
  2716 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{
  2717   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
  2718   match(Set dst (AddVL src1 src2));
  2719   format %{ "vpaddq  $dst,$src1,$src2\t! add packed4L" %}
  2720   ins_encode %{
  2721     bool vector256 = true;
  2722     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2723   %}
  2724   ins_pipe( pipe_slow );
  2725 %}
  2727 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{
  2728   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
  2729   match(Set dst (AddVL src (LoadVector mem)));
  2730   format %{ "vpaddq  $dst,$src,$mem\t! add packed4L" %}
  2731   ins_encode %{
  2732     bool vector256 = true;
  2733     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2734   %}
  2735   ins_pipe( pipe_slow );
  2736 %}
  2738 // Floats vector add
  2739 instruct vadd2F(vecD dst, vecD src) %{
  2740   predicate(n->as_Vector()->length() == 2);
  2741   match(Set dst (AddVF dst src));
  2742   format %{ "addps   $dst,$src\t! add packed2F" %}
  2743   ins_encode %{
  2744     __ addps($dst$$XMMRegister, $src$$XMMRegister);
  2745   %}
  2746   ins_pipe( pipe_slow );
  2747 %}
  2749 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{
  2750   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  2751   match(Set dst (AddVF src1 src2));
  2752   format %{ "vaddps  $dst,$src1,$src2\t! add packed2F" %}
  2753   ins_encode %{
  2754     bool vector256 = false;
  2755     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2756   %}
  2757   ins_pipe( pipe_slow );
  2758 %}
  2760 instruct vadd4F(vecX dst, vecX src) %{
  2761   predicate(n->as_Vector()->length() == 4);
  2762   match(Set dst (AddVF dst src));
  2763   format %{ "addps   $dst,$src\t! add packed4F" %}
  2764   ins_encode %{
  2765     __ addps($dst$$XMMRegister, $src$$XMMRegister);
  2766   %}
  2767   ins_pipe( pipe_slow );
  2768 %}
  2770 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
  2771   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  2772   match(Set dst (AddVF src1 src2));
  2773   format %{ "vaddps  $dst,$src1,$src2\t! add packed4F" %}
  2774   ins_encode %{
  2775     bool vector256 = false;
  2776     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2777   %}
  2778   ins_pipe( pipe_slow );
  2779 %}
  2781 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{
  2782   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  2783   match(Set dst (AddVF src (LoadVector mem)));
  2784   format %{ "vaddps  $dst,$src,$mem\t! add packed4F" %}
  2785   ins_encode %{
  2786     bool vector256 = false;
  2787     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2788   %}
  2789   ins_pipe( pipe_slow );
  2790 %}
  2792 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{
  2793   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  2794   match(Set dst (AddVF src1 src2));
  2795   format %{ "vaddps  $dst,$src1,$src2\t! add packed8F" %}
  2796   ins_encode %{
  2797     bool vector256 = true;
  2798     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2799   %}
  2800   ins_pipe( pipe_slow );
  2801 %}
  2803 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{
  2804   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  2805   match(Set dst (AddVF src (LoadVector mem)));
  2806   format %{ "vaddps  $dst,$src,$mem\t! add packed8F" %}
  2807   ins_encode %{
  2808     bool vector256 = true;
  2809     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2810   %}
  2811   ins_pipe( pipe_slow );
  2812 %}
  2814 // Doubles vector add
  2815 instruct vadd2D(vecX dst, vecX src) %{
  2816   predicate(n->as_Vector()->length() == 2);
  2817   match(Set dst (AddVD dst src));
  2818   format %{ "addpd   $dst,$src\t! add packed2D" %}
  2819   ins_encode %{
  2820     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
  2821   %}
  2822   ins_pipe( pipe_slow );
  2823 %}
  2825 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
  2826   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  2827   match(Set dst (AddVD src1 src2));
  2828   format %{ "vaddpd  $dst,$src1,$src2\t! add packed2D" %}
  2829   ins_encode %{
  2830     bool vector256 = false;
  2831     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2832   %}
  2833   ins_pipe( pipe_slow );
  2834 %}
  2836 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{
  2837   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  2838   match(Set dst (AddVD src (LoadVector mem)));
  2839   format %{ "vaddpd  $dst,$src,$mem\t! add packed2D" %}
  2840   ins_encode %{
  2841     bool vector256 = false;
  2842     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2843   %}
  2844   ins_pipe( pipe_slow );
  2845 %}
  2847 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{
  2848   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  2849   match(Set dst (AddVD src1 src2));
  2850   format %{ "vaddpd  $dst,$src1,$src2\t! add packed4D" %}
  2851   ins_encode %{
  2852     bool vector256 = true;
  2853     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2854   %}
  2855   ins_pipe( pipe_slow );
  2856 %}
  2858 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{
  2859   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  2860   match(Set dst (AddVD src (LoadVector mem)));
  2861   format %{ "vaddpd  $dst,$src,$mem\t! add packed4D" %}
  2862   ins_encode %{
  2863     bool vector256 = true;
  2864     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2865   %}
  2866   ins_pipe( pipe_slow );
  2867 %}
  2869 // --------------------------------- SUB --------------------------------------
  2871 // Bytes vector sub
  2872 instruct vsub4B(vecS dst, vecS src) %{
  2873   predicate(n->as_Vector()->length() == 4);
  2874   match(Set dst (SubVB dst src));
  2875   format %{ "psubb   $dst,$src\t! sub packed4B" %}
  2876   ins_encode %{
  2877     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
  2878   %}
  2879   ins_pipe( pipe_slow );
  2880 %}
  2882 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{
  2883   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  2884   match(Set dst (SubVB src1 src2));
  2885   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed4B" %}
  2886   ins_encode %{
  2887     bool vector256 = false;
  2888     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2889   %}
  2890   ins_pipe( pipe_slow );
  2891 %}
  2893 instruct vsub8B(vecD dst, vecD src) %{
  2894   predicate(n->as_Vector()->length() == 8);
  2895   match(Set dst (SubVB dst src));
  2896   format %{ "psubb   $dst,$src\t! sub packed8B" %}
  2897   ins_encode %{
  2898     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
  2899   %}
  2900   ins_pipe( pipe_slow );
  2901 %}
  2903 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{
  2904   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  2905   match(Set dst (SubVB src1 src2));
  2906   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed8B" %}
  2907   ins_encode %{
  2908     bool vector256 = false;
  2909     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2910   %}
  2911   ins_pipe( pipe_slow );
  2912 %}
  2914 instruct vsub16B(vecX dst, vecX src) %{
  2915   predicate(n->as_Vector()->length() == 16);
  2916   match(Set dst (SubVB dst src));
  2917   format %{ "psubb   $dst,$src\t! sub packed16B" %}
  2918   ins_encode %{
  2919     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
  2920   %}
  2921   ins_pipe( pipe_slow );
  2922 %}
  2924 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
  2925   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
  2926   match(Set dst (SubVB src1 src2));
  2927   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed16B" %}
  2928   ins_encode %{
  2929     bool vector256 = false;
  2930     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2931   %}
  2932   ins_pipe( pipe_slow );
  2933 %}
  2935 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{
  2936   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
  2937   match(Set dst (SubVB src (LoadVector mem)));
  2938   format %{ "vpsubb  $dst,$src,$mem\t! sub packed16B" %}
  2939   ins_encode %{
  2940     bool vector256 = false;
  2941     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2942   %}
  2943   ins_pipe( pipe_slow );
  2944 %}
  2946 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{
  2947   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
  2948   match(Set dst (SubVB src1 src2));
  2949   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed32B" %}
  2950   ins_encode %{
  2951     bool vector256 = true;
  2952     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2953   %}
  2954   ins_pipe( pipe_slow );
  2955 %}
  2957 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{
  2958   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
  2959   match(Set dst (SubVB src (LoadVector mem)));
  2960   format %{ "vpsubb  $dst,$src,$mem\t! sub packed32B" %}
  2961   ins_encode %{
  2962     bool vector256 = true;
  2963     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2964   %}
  2965   ins_pipe( pipe_slow );
  2966 %}
  2968 // Shorts/Chars vector sub
  2969 instruct vsub2S(vecS dst, vecS src) %{
  2970   predicate(n->as_Vector()->length() == 2);
  2971   match(Set dst (SubVS dst src));
  2972   format %{ "psubw   $dst,$src\t! sub packed2S" %}
  2973   ins_encode %{
  2974     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
  2975   %}
  2976   ins_pipe( pipe_slow );
  2977 %}
  2979 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{
  2980   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  2981   match(Set dst (SubVS src1 src2));
  2982   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed2S" %}
  2983   ins_encode %{
  2984     bool vector256 = false;
  2985     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2986   %}
  2987   ins_pipe( pipe_slow );
  2988 %}
  2990 instruct vsub4S(vecD dst, vecD src) %{
  2991   predicate(n->as_Vector()->length() == 4);
  2992   match(Set dst (SubVS dst src));
  2993   format %{ "psubw   $dst,$src\t! sub packed4S" %}
  2994   ins_encode %{
  2995     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
  2996   %}
  2997   ins_pipe( pipe_slow );
  2998 %}
  3000 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{
  3001   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3002   match(Set dst (SubVS src1 src2));
  3003   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed4S" %}
  3004   ins_encode %{
  3005     bool vector256 = false;
  3006     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3007   %}
  3008   ins_pipe( pipe_slow );
  3009 %}
  3011 instruct vsub8S(vecX dst, vecX src) %{
  3012   predicate(n->as_Vector()->length() == 8);
  3013   match(Set dst (SubVS dst src));
  3014   format %{ "psubw   $dst,$src\t! sub packed8S" %}
  3015   ins_encode %{
  3016     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
  3017   %}
  3018   ins_pipe( pipe_slow );
  3019 %}
  3021 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
  3022   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3023   match(Set dst (SubVS src1 src2));
  3024   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed8S" %}
  3025   ins_encode %{
  3026     bool vector256 = false;
  3027     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3028   %}
  3029   ins_pipe( pipe_slow );
  3030 %}
  3032 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{
  3033   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3034   match(Set dst (SubVS src (LoadVector mem)));
  3035   format %{ "vpsubw  $dst,$src,$mem\t! sub packed8S" %}
  3036   ins_encode %{
  3037     bool vector256 = false;
  3038     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3039   %}
  3040   ins_pipe( pipe_slow );
  3041 %}
  3043 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{
  3044   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  3045   match(Set dst (SubVS src1 src2));
  3046   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed16S" %}
  3047   ins_encode %{
  3048     bool vector256 = true;
  3049     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3050   %}
  3051   ins_pipe( pipe_slow );
  3052 %}
  3054 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{
  3055   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  3056   match(Set dst (SubVS src (LoadVector mem)));
  3057   format %{ "vpsubw  $dst,$src,$mem\t! sub packed16S" %}
  3058   ins_encode %{
  3059     bool vector256 = true;
  3060     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3061   %}
  3062   ins_pipe( pipe_slow );
  3063 %}
  3065 // Integers vector sub
  3066 instruct vsub2I(vecD dst, vecD src) %{
  3067   predicate(n->as_Vector()->length() == 2);
  3068   match(Set dst (SubVI dst src));
  3069   format %{ "psubd   $dst,$src\t! sub packed2I" %}
  3070   ins_encode %{
  3071     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
  3072   %}
  3073   ins_pipe( pipe_slow );
  3074 %}
  3076 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{
  3077   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3078   match(Set dst (SubVI src1 src2));
  3079   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed2I" %}
  3080   ins_encode %{
  3081     bool vector256 = false;
  3082     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3083   %}
  3084   ins_pipe( pipe_slow );
  3085 %}
  3087 instruct vsub4I(vecX dst, vecX src) %{
  3088   predicate(n->as_Vector()->length() == 4);
  3089   match(Set dst (SubVI dst src));
  3090   format %{ "psubd   $dst,$src\t! sub packed4I" %}
  3091   ins_encode %{
  3092     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
  3093   %}
  3094   ins_pipe( pipe_slow );
  3095 %}
  3097 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
  3098   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3099   match(Set dst (SubVI src1 src2));
  3100   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed4I" %}
  3101   ins_encode %{
  3102     bool vector256 = false;
  3103     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3104   %}
  3105   ins_pipe( pipe_slow );
  3106 %}
  3108 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{
  3109   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3110   match(Set dst (SubVI src (LoadVector mem)));
  3111   format %{ "vpsubd  $dst,$src,$mem\t! sub packed4I" %}
  3112   ins_encode %{
  3113     bool vector256 = false;
  3114     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3115   %}
  3116   ins_pipe( pipe_slow );
  3117 %}
  3119 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{
  3120   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  3121   match(Set dst (SubVI src1 src2));
  3122   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed8I" %}
  3123   ins_encode %{
  3124     bool vector256 = true;
  3125     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3126   %}
  3127   ins_pipe( pipe_slow );
  3128 %}
  3130 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{
  3131   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  3132   match(Set dst (SubVI src (LoadVector mem)));
  3133   format %{ "vpsubd  $dst,$src,$mem\t! sub packed8I" %}
  3134   ins_encode %{
  3135     bool vector256 = true;
  3136     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3137   %}
  3138   ins_pipe( pipe_slow );
  3139 %}
  3141 // Longs vector sub
  3142 instruct vsub2L(vecX dst, vecX src) %{
  3143   predicate(n->as_Vector()->length() == 2);
  3144   match(Set dst (SubVL dst src));
  3145   format %{ "psubq   $dst,$src\t! sub packed2L" %}
  3146   ins_encode %{
  3147     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
  3148   %}
  3149   ins_pipe( pipe_slow );
  3150 %}
  3152 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
  3153   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3154   match(Set dst (SubVL src1 src2));
  3155   format %{ "vpsubq  $dst,$src1,$src2\t! sub packed2L" %}
  3156   ins_encode %{
  3157     bool vector256 = false;
  3158     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3159   %}
  3160   ins_pipe( pipe_slow );
  3161 %}
  3163 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{
  3164   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3165   match(Set dst (SubVL src (LoadVector mem)));
  3166   format %{ "vpsubq  $dst,$src,$mem\t! sub packed2L" %}
  3167   ins_encode %{
  3168     bool vector256 = false;
  3169     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3170   %}
  3171   ins_pipe( pipe_slow );
  3172 %}
  3174 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{
  3175   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
  3176   match(Set dst (SubVL src1 src2));
  3177   format %{ "vpsubq  $dst,$src1,$src2\t! sub packed4L" %}
  3178   ins_encode %{
  3179     bool vector256 = true;
  3180     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3181   %}
  3182   ins_pipe( pipe_slow );
  3183 %}
  3185 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{
  3186   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
  3187   match(Set dst (SubVL src (LoadVector mem)));
  3188   format %{ "vpsubq  $dst,$src,$mem\t! sub packed4L" %}
  3189   ins_encode %{
  3190     bool vector256 = true;
  3191     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3192   %}
  3193   ins_pipe( pipe_slow );
  3194 %}
  3196 // Floats vector sub
  3197 instruct vsub2F(vecD dst, vecD src) %{
  3198   predicate(n->as_Vector()->length() == 2);
  3199   match(Set dst (SubVF dst src));
  3200   format %{ "subps   $dst,$src\t! sub packed2F" %}
  3201   ins_encode %{
  3202     __ subps($dst$$XMMRegister, $src$$XMMRegister);
  3203   %}
  3204   ins_pipe( pipe_slow );
  3205 %}
  3207 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{
  3208   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3209   match(Set dst (SubVF src1 src2));
  3210   format %{ "vsubps  $dst,$src1,$src2\t! sub packed2F" %}
  3211   ins_encode %{
  3212     bool vector256 = false;
  3213     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3214   %}
  3215   ins_pipe( pipe_slow );
  3216 %}
  3218 instruct vsub4F(vecX dst, vecX src) %{
  3219   predicate(n->as_Vector()->length() == 4);
  3220   match(Set dst (SubVF dst src));
  3221   format %{ "subps   $dst,$src\t! sub packed4F" %}
  3222   ins_encode %{
  3223     __ subps($dst$$XMMRegister, $src$$XMMRegister);
  3224   %}
  3225   ins_pipe( pipe_slow );
  3226 %}
  3228 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
  3229   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3230   match(Set dst (SubVF src1 src2));
  3231   format %{ "vsubps  $dst,$src1,$src2\t! sub packed4F" %}
  3232   ins_encode %{
  3233     bool vector256 = false;
  3234     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3235   %}
  3236   ins_pipe( pipe_slow );
  3237 %}
  3239 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{
  3240   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3241   match(Set dst (SubVF src (LoadVector mem)));
  3242   format %{ "vsubps  $dst,$src,$mem\t! sub packed4F" %}
  3243   ins_encode %{
  3244     bool vector256 = false;
  3245     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3246   %}
  3247   ins_pipe( pipe_slow );
  3248 %}
  3250 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{
  3251   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3252   match(Set dst (SubVF src1 src2));
  3253   format %{ "vsubps  $dst,$src1,$src2\t! sub packed8F" %}
  3254   ins_encode %{
  3255     bool vector256 = true;
  3256     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3257   %}
  3258   ins_pipe( pipe_slow );
  3259 %}
  3261 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{
  3262   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3263   match(Set dst (SubVF src (LoadVector mem)));
  3264   format %{ "vsubps  $dst,$src,$mem\t! sub packed8F" %}
  3265   ins_encode %{
  3266     bool vector256 = true;
  3267     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3268   %}
  3269   ins_pipe( pipe_slow );
  3270 %}
  3272 // Doubles vector sub
  3273 instruct vsub2D(vecX dst, vecX src) %{
  3274   predicate(n->as_Vector()->length() == 2);
  3275   match(Set dst (SubVD dst src));
  3276   format %{ "subpd   $dst,$src\t! sub packed2D" %}
  3277   ins_encode %{
  3278     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
  3279   %}
  3280   ins_pipe( pipe_slow );
  3281 %}
  3283 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{
  3284   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3285   match(Set dst (SubVD src1 src2));
  3286   format %{ "vsubpd  $dst,$src1,$src2\t! sub packed2D" %}
  3287   ins_encode %{
  3288     bool vector256 = false;
  3289     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3290   %}
  3291   ins_pipe( pipe_slow );
  3292 %}
  3294 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{
  3295   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3296   match(Set dst (SubVD src (LoadVector mem)));
  3297   format %{ "vsubpd  $dst,$src,$mem\t! sub packed2D" %}
  3298   ins_encode %{
  3299     bool vector256 = false;
  3300     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3301   %}
  3302   ins_pipe( pipe_slow );
  3303 %}
  3305 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{
  3306   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3307   match(Set dst (SubVD src1 src2));
  3308   format %{ "vsubpd  $dst,$src1,$src2\t! sub packed4D" %}
  3309   ins_encode %{
  3310     bool vector256 = true;
  3311     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3312   %}
  3313   ins_pipe( pipe_slow );
  3314 %}
  3316 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{
  3317   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3318   match(Set dst (SubVD src (LoadVector mem)));
  3319   format %{ "vsubpd  $dst,$src,$mem\t! sub packed4D" %}
  3320   ins_encode %{
  3321     bool vector256 = true;
  3322     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3323   %}
  3324   ins_pipe( pipe_slow );
  3325 %}
  3327 // --------------------------------- MUL --------------------------------------
  3329 // Shorts/Chars vector mul
  3330 instruct vmul2S(vecS dst, vecS src) %{
  3331   predicate(n->as_Vector()->length() == 2);
  3332   match(Set dst (MulVS dst src));
  3333   format %{ "pmullw $dst,$src\t! mul packed2S" %}
  3334   ins_encode %{
  3335     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
  3336   %}
  3337   ins_pipe( pipe_slow );
  3338 %}
  3340 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{
  3341   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3342   match(Set dst (MulVS src1 src2));
  3343   format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
  3344   ins_encode %{
  3345     bool vector256 = false;
  3346     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3347   %}
  3348   ins_pipe( pipe_slow );
  3349 %}
  3351 instruct vmul4S(vecD dst, vecD src) %{
  3352   predicate(n->as_Vector()->length() == 4);
  3353   match(Set dst (MulVS dst src));
  3354   format %{ "pmullw  $dst,$src\t! mul packed4S" %}
  3355   ins_encode %{
  3356     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
  3357   %}
  3358   ins_pipe( pipe_slow );
  3359 %}
  3361 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
  3362   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3363   match(Set dst (MulVS src1 src2));
  3364   format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
  3365   ins_encode %{
  3366     bool vector256 = false;
  3367     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3368   %}
  3369   ins_pipe( pipe_slow );
  3370 %}
  3372 instruct vmul8S(vecX dst, vecX src) %{
  3373   predicate(n->as_Vector()->length() == 8);
  3374   match(Set dst (MulVS dst src));
  3375   format %{ "pmullw  $dst,$src\t! mul packed8S" %}
  3376   ins_encode %{
  3377     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
  3378   %}
  3379   ins_pipe( pipe_slow );
  3380 %}
  3382 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
  3383   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3384   match(Set dst (MulVS src1 src2));
  3385   format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
  3386   ins_encode %{
  3387     bool vector256 = false;
  3388     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3389   %}
  3390   ins_pipe( pipe_slow );
  3391 %}
  3393 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{
  3394   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3395   match(Set dst (MulVS src (LoadVector mem)));
  3396   format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
  3397   ins_encode %{
  3398     bool vector256 = false;
  3399     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3400   %}
  3401   ins_pipe( pipe_slow );
  3402 %}
  3404 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{
  3405   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  3406   match(Set dst (MulVS src1 src2));
  3407   format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
  3408   ins_encode %{
  3409     bool vector256 = true;
  3410     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3411   %}
  3412   ins_pipe( pipe_slow );
  3413 %}
  3415 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{
  3416   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  3417   match(Set dst (MulVS src (LoadVector mem)));
  3418   format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
  3419   ins_encode %{
  3420     bool vector256 = true;
  3421     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3422   %}
  3423   ins_pipe( pipe_slow );
  3424 %}
  3426 // Integers vector mul (sse4_1)
  3427 instruct vmul2I(vecD dst, vecD src) %{
  3428   predicate(UseSSE > 3 && n->as_Vector()->length() == 2);
  3429   match(Set dst (MulVI dst src));
  3430   format %{ "pmulld  $dst,$src\t! mul packed2I" %}
  3431   ins_encode %{
  3432     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
  3433   %}
  3434   ins_pipe( pipe_slow );
  3435 %}
  3437 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{
  3438   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3439   match(Set dst (MulVI src1 src2));
  3440   format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %}
  3441   ins_encode %{
  3442     bool vector256 = false;
  3443     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3444   %}
  3445   ins_pipe( pipe_slow );
  3446 %}
  3448 instruct vmul4I(vecX dst, vecX src) %{
  3449   predicate(UseSSE > 3 && n->as_Vector()->length() == 4);
  3450   match(Set dst (MulVI dst src));
  3451   format %{ "pmulld  $dst,$src\t! mul packed4I" %}
  3452   ins_encode %{
  3453     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
  3454   %}
  3455   ins_pipe( pipe_slow );
  3456 %}
  3458 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
  3459   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3460   match(Set dst (MulVI src1 src2));
  3461   format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %}
  3462   ins_encode %{
  3463     bool vector256 = false;
  3464     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3465   %}
  3466   ins_pipe( pipe_slow );
  3467 %}
  3469 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{
  3470   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3471   match(Set dst (MulVI src (LoadVector mem)));
  3472   format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %}
  3473   ins_encode %{
  3474     bool vector256 = false;
  3475     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3476   %}
  3477   ins_pipe( pipe_slow );
  3478 %}
  3480 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{
  3481   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  3482   match(Set dst (MulVI src1 src2));
  3483   format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %}
  3484   ins_encode %{
  3485     bool vector256 = true;
  3486     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3487   %}
  3488   ins_pipe( pipe_slow );
  3489 %}
  3491 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{
  3492   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  3493   match(Set dst (MulVI src (LoadVector mem)));
  3494   format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %}
  3495   ins_encode %{
  3496     bool vector256 = true;
  3497     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3498   %}
  3499   ins_pipe( pipe_slow );
  3500 %}
  3502 // Floats vector mul
  3503 instruct vmul2F(vecD dst, vecD src) %{
  3504   predicate(n->as_Vector()->length() == 2);
  3505   match(Set dst (MulVF dst src));
  3506   format %{ "mulps   $dst,$src\t! mul packed2F" %}
  3507   ins_encode %{
  3508     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
  3509   %}
  3510   ins_pipe( pipe_slow );
  3511 %}
  3513 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{
  3514   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3515   match(Set dst (MulVF src1 src2));
  3516   format %{ "vmulps  $dst,$src1,$src2\t! mul packed2F" %}
  3517   ins_encode %{
  3518     bool vector256 = false;
  3519     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3520   %}
  3521   ins_pipe( pipe_slow );
  3522 %}
  3524 instruct vmul4F(vecX dst, vecX src) %{
  3525   predicate(n->as_Vector()->length() == 4);
  3526   match(Set dst (MulVF dst src));
  3527   format %{ "mulps   $dst,$src\t! mul packed4F" %}
  3528   ins_encode %{
  3529     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
  3530   %}
  3531   ins_pipe( pipe_slow );
  3532 %}
  3534 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
  3535   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3536   match(Set dst (MulVF src1 src2));
  3537   format %{ "vmulps  $dst,$src1,$src2\t! mul packed4F" %}
  3538   ins_encode %{
  3539     bool vector256 = false;
  3540     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3541   %}
  3542   ins_pipe( pipe_slow );
  3543 %}
  3545 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{
  3546   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3547   match(Set dst (MulVF src (LoadVector mem)));
  3548   format %{ "vmulps  $dst,$src,$mem\t! mul packed4F" %}
  3549   ins_encode %{
  3550     bool vector256 = false;
  3551     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3552   %}
  3553   ins_pipe( pipe_slow );
  3554 %}
  3556 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{
  3557   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3558   match(Set dst (MulVF src1 src2));
  3559   format %{ "vmulps  $dst,$src1,$src2\t! mul packed8F" %}
  3560   ins_encode %{
  3561     bool vector256 = true;
  3562     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3563   %}
  3564   ins_pipe( pipe_slow );
  3565 %}
  3567 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{
  3568   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3569   match(Set dst (MulVF src (LoadVector mem)));
  3570   format %{ "vmulps  $dst,$src,$mem\t! mul packed8F" %}
  3571   ins_encode %{
  3572     bool vector256 = true;
  3573     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3574   %}
  3575   ins_pipe( pipe_slow );
  3576 %}
  3578 // Doubles vector mul
  3579 instruct vmul2D(vecX dst, vecX src) %{
  3580   predicate(n->as_Vector()->length() == 2);
  3581   match(Set dst (MulVD dst src));
  3582   format %{ "mulpd   $dst,$src\t! mul packed2D" %}
  3583   ins_encode %{
  3584     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
  3585   %}
  3586   ins_pipe( pipe_slow );
  3587 %}
  3589 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
  3590   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3591   match(Set dst (MulVD src1 src2));
  3592   format %{ "vmulpd  $dst,$src1,$src2\t! mul packed2D" %}
  3593   ins_encode %{
  3594     bool vector256 = false;
  3595     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3596   %}
  3597   ins_pipe( pipe_slow );
  3598 %}
  3600 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{
  3601   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3602   match(Set dst (MulVD src (LoadVector mem)));
  3603   format %{ "vmulpd  $dst,$src,$mem\t! mul packed2D" %}
  3604   ins_encode %{
  3605     bool vector256 = false;
  3606     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3607   %}
  3608   ins_pipe( pipe_slow );
  3609 %}
  3611 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{
  3612   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3613   match(Set dst (MulVD src1 src2));
  3614   format %{ "vmulpd  $dst,$src1,$src2\t! mul packed4D" %}
  3615   ins_encode %{
  3616     bool vector256 = true;
  3617     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3618   %}
  3619   ins_pipe( pipe_slow );
  3620 %}
  3622 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{
  3623   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3624   match(Set dst (MulVD src (LoadVector mem)));
  3625   format %{ "vmulpd  $dst,$src,$mem\t! mul packed4D" %}
  3626   ins_encode %{
  3627     bool vector256 = true;
  3628     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3629   %}
  3630   ins_pipe( pipe_slow );
  3631 %}
  3633 // --------------------------------- DIV --------------------------------------
  3635 // Floats vector div
  3636 instruct vdiv2F(vecD dst, vecD src) %{
  3637   predicate(n->as_Vector()->length() == 2);
  3638   match(Set dst (DivVF dst src));
  3639   format %{ "divps   $dst,$src\t! div packed2F" %}
  3640   ins_encode %{
  3641     __ divps($dst$$XMMRegister, $src$$XMMRegister);
  3642   %}
  3643   ins_pipe( pipe_slow );
  3644 %}
  3646 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{
  3647   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3648   match(Set dst (DivVF src1 src2));
  3649   format %{ "vdivps  $dst,$src1,$src2\t! div packed2F" %}
  3650   ins_encode %{
  3651     bool vector256 = false;
  3652     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3653   %}
  3654   ins_pipe( pipe_slow );
  3655 %}
  3657 instruct vdiv4F(vecX dst, vecX src) %{
  3658   predicate(n->as_Vector()->length() == 4);
  3659   match(Set dst (DivVF dst src));
  3660   format %{ "divps   $dst,$src\t! div packed4F" %}
  3661   ins_encode %{
  3662     __ divps($dst$$XMMRegister, $src$$XMMRegister);
  3663   %}
  3664   ins_pipe( pipe_slow );
  3665 %}
  3667 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{
  3668   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3669   match(Set dst (DivVF src1 src2));
  3670   format %{ "vdivps  $dst,$src1,$src2\t! div packed4F" %}
  3671   ins_encode %{
  3672     bool vector256 = false;
  3673     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3674   %}
  3675   ins_pipe( pipe_slow );
  3676 %}
  3678 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{
  3679   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3680   match(Set dst (DivVF src (LoadVector mem)));
  3681   format %{ "vdivps  $dst,$src,$mem\t! div packed4F" %}
  3682   ins_encode %{
  3683     bool vector256 = false;
  3684     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3685   %}
  3686   ins_pipe( pipe_slow );
  3687 %}
  3689 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{
  3690   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3691   match(Set dst (DivVF src1 src2));
  3692   format %{ "vdivps  $dst,$src1,$src2\t! div packed8F" %}
  3693   ins_encode %{
  3694     bool vector256 = true;
  3695     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3696   %}
  3697   ins_pipe( pipe_slow );
  3698 %}
  3700 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{
  3701   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3702   match(Set dst (DivVF src (LoadVector mem)));
  3703   format %{ "vdivps  $dst,$src,$mem\t! div packed8F" %}
  3704   ins_encode %{
  3705     bool vector256 = true;
  3706     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3707   %}
  3708   ins_pipe( pipe_slow );
  3709 %}
  3711 // Doubles vector div
  3712 instruct vdiv2D(vecX dst, vecX src) %{
  3713   predicate(n->as_Vector()->length() == 2);
  3714   match(Set dst (DivVD dst src));
  3715   format %{ "divpd   $dst,$src\t! div packed2D" %}
  3716   ins_encode %{
  3717     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
  3718   %}
  3719   ins_pipe( pipe_slow );
  3720 %}
  3722 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
  3723   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3724   match(Set dst (DivVD src1 src2));
  3725   format %{ "vdivpd  $dst,$src1,$src2\t! div packed2D" %}
  3726   ins_encode %{
  3727     bool vector256 = false;
  3728     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3729   %}
  3730   ins_pipe( pipe_slow );
  3731 %}
  3733 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{
  3734   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3735   match(Set dst (DivVD src (LoadVector mem)));
  3736   format %{ "vdivpd  $dst,$src,$mem\t! div packed2D" %}
  3737   ins_encode %{
  3738     bool vector256 = false;
  3739     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3740   %}
  3741   ins_pipe( pipe_slow );
  3742 %}
  3744 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{
  3745   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3746   match(Set dst (DivVD src1 src2));
  3747   format %{ "vdivpd  $dst,$src1,$src2\t! div packed4D" %}
  3748   ins_encode %{
  3749     bool vector256 = true;
  3750     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3751   %}
  3752   ins_pipe( pipe_slow );
  3753 %}
  3755 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{
  3756   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3757   match(Set dst (DivVD src (LoadVector mem)));
  3758   format %{ "vdivpd  $dst,$src,$mem\t! div packed4D" %}
  3759   ins_encode %{
  3760     bool vector256 = true;
  3761     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3762   %}
  3763   ins_pipe( pipe_slow );
  3764 %}
  3766 // ------------------------------ Shift ---------------------------------------
  3768 // Left and right shift count vectors are the same on x86
  3769 // (only lowest bits of xmm reg are used for count).
  3770 instruct vshiftcnt(vecS dst, rRegI cnt) %{
  3771   match(Set dst (LShiftCntV cnt));
  3772   match(Set dst (RShiftCntV cnt));
  3773   format %{ "movd    $dst,$cnt\t! load shift count" %}
  3774   ins_encode %{
  3775     __ movdl($dst$$XMMRegister, $cnt$$Register);
  3776   %}
  3777   ins_pipe( pipe_slow );
  3778 %}
  3780 // ------------------------------ LeftShift -----------------------------------
  3782 // Shorts/Chars vector left shift
  3783 instruct vsll2S(vecS dst, vecS shift) %{
  3784   predicate(n->as_Vector()->length() == 2);
  3785   match(Set dst (LShiftVS dst shift));
  3786   format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
  3787   ins_encode %{
  3788     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
  3789   %}
  3790   ins_pipe( pipe_slow );
  3791 %}
  3793 instruct vsll2S_imm(vecS dst, immI8 shift) %{
  3794   predicate(n->as_Vector()->length() == 2);
  3795   match(Set dst (LShiftVS dst shift));
  3796   format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
  3797   ins_encode %{
  3798     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
  3799   %}
  3800   ins_pipe( pipe_slow );
  3801 %}
  3803 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{
  3804   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3805   match(Set dst (LShiftVS src shift));
  3806   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
  3807   ins_encode %{
  3808     bool vector256 = false;
  3809     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  3810   %}
  3811   ins_pipe( pipe_slow );
  3812 %}
  3814 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
  3815   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3816   match(Set dst (LShiftVS src shift));
  3817   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
  3818   ins_encode %{
  3819     bool vector256 = false;
  3820     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  3821   %}
  3822   ins_pipe( pipe_slow );
  3823 %}
  3825 instruct vsll4S(vecD dst, vecS shift) %{
  3826   predicate(n->as_Vector()->length() == 4);
  3827   match(Set dst (LShiftVS dst shift));
  3828   format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
  3829   ins_encode %{
  3830     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
  3831   %}
  3832   ins_pipe( pipe_slow );
  3833 %}
  3835 instruct vsll4S_imm(vecD dst, immI8 shift) %{
  3836   predicate(n->as_Vector()->length() == 4);
  3837   match(Set dst (LShiftVS dst shift));
  3838   format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
  3839   ins_encode %{
  3840     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
  3841   %}
  3842   ins_pipe( pipe_slow );
  3843 %}
  3845 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{
  3846   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3847   match(Set dst (LShiftVS src shift));
  3848   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
  3849   ins_encode %{
  3850     bool vector256 = false;
  3851     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  3852   %}
  3853   ins_pipe( pipe_slow );
  3854 %}
  3856 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
  3857   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3858   match(Set dst (LShiftVS src shift));
  3859   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
  3860   ins_encode %{
  3861     bool vector256 = false;
  3862     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  3863   %}
  3864   ins_pipe( pipe_slow );
  3865 %}
  3867 instruct vsll8S(vecX dst, vecS shift) %{
  3868   predicate(n->as_Vector()->length() == 8);
  3869   match(Set dst (LShiftVS dst shift));
  3870   format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
  3871   ins_encode %{
  3872     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
  3873   %}
  3874   ins_pipe( pipe_slow );
  3875 %}
  3877 instruct vsll8S_imm(vecX dst, immI8 shift) %{
  3878   predicate(n->as_Vector()->length() == 8);
  3879   match(Set dst (LShiftVS dst shift));
  3880   format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
  3881   ins_encode %{
  3882     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
  3883   %}
  3884   ins_pipe( pipe_slow );
  3885 %}
  3887 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{
  3888   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3889   match(Set dst (LShiftVS src shift));
  3890   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
  3891   ins_encode %{
  3892     bool vector256 = false;
  3893     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  3894   %}
  3895   ins_pipe( pipe_slow );
  3896 %}
  3898 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
  3899   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3900   match(Set dst (LShiftVS src shift));
  3901   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
  3902   ins_encode %{
  3903     bool vector256 = false;
  3904     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  3905   %}
  3906   ins_pipe( pipe_slow );
  3907 %}
  3909 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{
  3910   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  3911   match(Set dst (LShiftVS src shift));
  3912   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
  3913   ins_encode %{
  3914     bool vector256 = true;
  3915     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  3916   %}
  3917   ins_pipe( pipe_slow );
  3918 %}
  3920 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
  3921   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  3922   match(Set dst (LShiftVS src shift));
  3923   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
  3924   ins_encode %{
  3925     bool vector256 = true;
  3926     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  3927   %}
  3928   ins_pipe( pipe_slow );
  3929 %}
  3931 // Integers vector left shift
  3932 instruct vsll2I(vecD dst, vecS shift) %{
  3933   predicate(n->as_Vector()->length() == 2);
  3934   match(Set dst (LShiftVI dst shift));
  3935   format %{ "pslld   $dst,$shift\t! left shift packed2I" %}
  3936   ins_encode %{
  3937     __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
  3938   %}
  3939   ins_pipe( pipe_slow );
  3940 %}
  3942 instruct vsll2I_imm(vecD dst, immI8 shift) %{
  3943   predicate(n->as_Vector()->length() == 2);
  3944   match(Set dst (LShiftVI dst shift));
  3945   format %{ "pslld   $dst,$shift\t! left shift packed2I" %}
  3946   ins_encode %{
  3947     __ pslld($dst$$XMMRegister, (int)$shift$$constant);
  3948   %}
  3949   ins_pipe( pipe_slow );
  3950 %}
  3952 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{
  3953   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3954   match(Set dst (LShiftVI src shift));
  3955   format %{ "vpslld  $dst,$src,$shift\t! left shift packed2I" %}
  3956   ins_encode %{
  3957     bool vector256 = false;
  3958     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  3959   %}
  3960   ins_pipe( pipe_slow );
  3961 %}
  3963 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
  3964   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3965   match(Set dst (LShiftVI src shift));
  3966   format %{ "vpslld  $dst,$src,$shift\t! left shift packed2I" %}
  3967   ins_encode %{
  3968     bool vector256 = false;
  3969     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  3970   %}
  3971   ins_pipe( pipe_slow );
  3972 %}
  3974 instruct vsll4I(vecX dst, vecS shift) %{
  3975   predicate(n->as_Vector()->length() == 4);
  3976   match(Set dst (LShiftVI dst shift));
  3977   format %{ "pslld   $dst,$shift\t! left shift packed4I" %}
  3978   ins_encode %{
  3979     __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
  3980   %}
  3981   ins_pipe( pipe_slow );
  3982 %}
  3984 instruct vsll4I_imm(vecX dst, immI8 shift) %{
  3985   predicate(n->as_Vector()->length() == 4);
  3986   match(Set dst (LShiftVI dst shift));
  3987   format %{ "pslld   $dst,$shift\t! left shift packed4I" %}
  3988   ins_encode %{
  3989     __ pslld($dst$$XMMRegister, (int)$shift$$constant);
  3990   %}
  3991   ins_pipe( pipe_slow );
  3992 %}
  3994 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{
  3995   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3996   match(Set dst (LShiftVI src shift));
  3997   format %{ "vpslld  $dst,$src,$shift\t! left shift packed4I" %}
  3998   ins_encode %{
  3999     bool vector256 = false;
  4000     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4001   %}
  4002   ins_pipe( pipe_slow );
  4003 %}
  4005 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
  4006   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  4007   match(Set dst (LShiftVI src shift));
  4008   format %{ "vpslld  $dst,$src,$shift\t! left shift packed4I" %}
  4009   ins_encode %{
  4010     bool vector256 = false;
  4011     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4012   %}
  4013   ins_pipe( pipe_slow );
  4014 %}
  4016 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{
  4017   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  4018   match(Set dst (LShiftVI src shift));
  4019   format %{ "vpslld  $dst,$src,$shift\t! left shift packed8I" %}
  4020   ins_encode %{
  4021     bool vector256 = true;
  4022     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4023   %}
  4024   ins_pipe( pipe_slow );
  4025 %}
  4027 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
  4028   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  4029   match(Set dst (LShiftVI src shift));
  4030   format %{ "vpslld  $dst,$src,$shift\t! left shift packed8I" %}
  4031   ins_encode %{
  4032     bool vector256 = true;
  4033     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4034   %}
  4035   ins_pipe( pipe_slow );
  4036 %}
  4038 // Longs vector left shift
  4039 instruct vsll2L(vecX dst, vecS shift) %{
  4040   predicate(n->as_Vector()->length() == 2);
  4041   match(Set dst (LShiftVL dst shift));
  4042   format %{ "psllq   $dst,$shift\t! left shift packed2L" %}
  4043   ins_encode %{
  4044     __ psllq($dst$$XMMRegister, $shift$$XMMRegister);
  4045   %}
  4046   ins_pipe( pipe_slow );
  4047 %}
  4049 instruct vsll2L_imm(vecX dst, immI8 shift) %{
  4050   predicate(n->as_Vector()->length() == 2);
  4051   match(Set dst (LShiftVL dst shift));
  4052   format %{ "psllq   $dst,$shift\t! left shift packed2L" %}
  4053   ins_encode %{
  4054     __ psllq($dst$$XMMRegister, (int)$shift$$constant);
  4055   %}
  4056   ins_pipe( pipe_slow );
  4057 %}
  4059 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{
  4060   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  4061   match(Set dst (LShiftVL src shift));
  4062   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed2L" %}
  4063   ins_encode %{
  4064     bool vector256 = false;
  4065     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4066   %}
  4067   ins_pipe( pipe_slow );
  4068 %}
  4070 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
  4071   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  4072   match(Set dst (LShiftVL src shift));
  4073   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed2L" %}
  4074   ins_encode %{
  4075     bool vector256 = false;
  4076     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4077   %}
  4078   ins_pipe( pipe_slow );
  4079 %}
  4081 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{
  4082   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
  4083   match(Set dst (LShiftVL src shift));
  4084   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed4L" %}
  4085   ins_encode %{
  4086     bool vector256 = true;
  4087     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4088   %}
  4089   ins_pipe( pipe_slow );
  4090 %}
  4092 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
  4093   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
  4094   match(Set dst (LShiftVL src shift));
  4095   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed4L" %}
  4096   ins_encode %{
  4097     bool vector256 = true;
  4098     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4099   %}
  4100   ins_pipe( pipe_slow );
  4101 %}
  4103 // ----------------------- LogicalRightShift -----------------------------------
  4105 // Shorts vector logical right shift produces incorrect Java result
  4106 // for negative data because java code convert short value into int with
  4107 // sign extension before a shift. But char vectors are fine since chars are
  4108 // unsigned values.
  4110 instruct vsrl2S(vecS dst, vecS shift) %{
  4111   predicate(n->as_Vector()->length() == 2);
  4112   match(Set dst (URShiftVS dst shift));
  4113   format %{ "psrlw   $dst,$shift\t! logical right shift packed2S" %}
  4114   ins_encode %{
  4115     __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
  4116   %}
  4117   ins_pipe( pipe_slow );
  4118 %}
  4120 instruct vsrl2S_imm(vecS dst, immI8 shift) %{
  4121   predicate(n->as_Vector()->length() == 2);
  4122   match(Set dst (URShiftVS dst shift));
  4123   format %{ "psrlw   $dst,$shift\t! logical right shift packed2S" %}
  4124   ins_encode %{
  4125     __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
  4126   %}
  4127   ins_pipe( pipe_slow );
  4128 %}
  4130 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{
  4131   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  4132   match(Set dst (URShiftVS src shift));
  4133   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
  4134   ins_encode %{
  4135     bool vector256 = false;
  4136     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4137   %}
  4138   ins_pipe( pipe_slow );
  4139 %}
  4141 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
  4142   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  4143   match(Set dst (URShiftVS src shift));
  4144   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
  4145   ins_encode %{
  4146     bool vector256 = false;
  4147     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4148   %}
  4149   ins_pipe( pipe_slow );
  4150 %}
  4152 instruct vsrl4S(vecD dst, vecS shift) %{
  4153   predicate(n->as_Vector()->length() == 4);
  4154   match(Set dst (URShiftVS dst shift));
  4155   format %{ "psrlw   $dst,$shift\t! logical right shift packed4S" %}
  4156   ins_encode %{
  4157     __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
  4158   %}
  4159   ins_pipe( pipe_slow );
  4160 %}
  4162 instruct vsrl4S_imm(vecD dst, immI8 shift) %{
  4163   predicate(n->as_Vector()->length() == 4);
  4164   match(Set dst (URShiftVS dst shift));
  4165   format %{ "psrlw   $dst,$shift\t! logical right shift packed4S" %}
  4166   ins_encode %{
  4167     __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
  4168   %}
  4169   ins_pipe( pipe_slow );
  4170 %}
  4172 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{
  4173   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  4174   match(Set dst (URShiftVS src shift));
  4175   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
  4176   ins_encode %{
  4177     bool vector256 = false;
  4178     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4179   %}
  4180   ins_pipe( pipe_slow );
  4181 %}
  4183 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
  4184   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  4185   match(Set dst (URShiftVS src shift));
  4186   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
  4187   ins_encode %{
  4188     bool vector256 = false;
  4189     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4190   %}
  4191   ins_pipe( pipe_slow );
  4192 %}
  4194 instruct vsrl8S(vecX dst, vecS shift) %{
  4195   predicate(n->as_Vector()->length() == 8);
  4196   match(Set dst (URShiftVS dst shift));
  4197   format %{ "psrlw   $dst,$shift\t! logical right shift packed8S" %}
  4198   ins_encode %{
  4199     __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
  4200   %}
  4201   ins_pipe( pipe_slow );
  4202 %}
  4204 instruct vsrl8S_imm(vecX dst, immI8 shift) %{
  4205   predicate(n->as_Vector()->length() == 8);
  4206   match(Set dst (URShiftVS dst shift));
  4207   format %{ "psrlw   $dst,$shift\t! logical right shift packed8S" %}
  4208   ins_encode %{
  4209     __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
  4210   %}
  4211   ins_pipe( pipe_slow );
  4212 %}
  4214 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{
  4215   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  4216   match(Set dst (URShiftVS src shift));
  4217   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
  4218   ins_encode %{
  4219     bool vector256 = false;
  4220     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4221   %}
  4222   ins_pipe( pipe_slow );
  4223 %}
  4225 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
  4226   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  4227   match(Set dst (URShiftVS src shift));
  4228   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
  4229   ins_encode %{
  4230     bool vector256 = false;
  4231     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4232   %}
  4233   ins_pipe( pipe_slow );
  4234 %}
  4236 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{
  4237   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  4238   match(Set dst (URShiftVS src shift));
  4239   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
  4240   ins_encode %{
  4241     bool vector256 = true;
  4242     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4243   %}
  4244   ins_pipe( pipe_slow );
  4245 %}
  4247 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
  4248   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  4249   match(Set dst (URShiftVS src shift));
  4250   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
  4251   ins_encode %{
  4252     bool vector256 = true;
  4253     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4254   %}
  4255   ins_pipe( pipe_slow );
  4256 %}
  4258 // Integers vector logical right shift
  4259 instruct vsrl2I(vecD dst, vecS shift) %{
  4260   predicate(n->as_Vector()->length() == 2);
  4261   match(Set dst (URShiftVI dst shift));
  4262   format %{ "psrld   $dst,$shift\t! logical right shift packed2I" %}
  4263   ins_encode %{
  4264     __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
  4265   %}
  4266   ins_pipe( pipe_slow );
  4267 %}
  4269 instruct vsrl2I_imm(vecD dst, immI8 shift) %{
  4270   predicate(n->as_Vector()->length() == 2);
  4271   match(Set dst (URShiftVI dst shift));
  4272   format %{ "psrld   $dst,$shift\t! logical right shift packed2I" %}
  4273   ins_encode %{
  4274     __ psrld($dst$$XMMRegister, (int)$shift$$constant);
  4275   %}
  4276   ins_pipe( pipe_slow );
  4277 %}
  4279 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{
  4280   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  4281   match(Set dst (URShiftVI src shift));
  4282   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed2I" %}
  4283   ins_encode %{
  4284     bool vector256 = false;
  4285     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4286   %}
  4287   ins_pipe( pipe_slow );
  4288 %}
  4290 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
  4291   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  4292   match(Set dst (URShiftVI src shift));
  4293   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed2I" %}
  4294   ins_encode %{
  4295     bool vector256 = false;
  4296     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4297   %}
  4298   ins_pipe( pipe_slow );
  4299 %}
  4301 instruct vsrl4I(vecX dst, vecS shift) %{
  4302   predicate(n->as_Vector()->length() == 4);
  4303   match(Set dst (URShiftVI dst shift));
  4304   format %{ "psrld   $dst,$shift\t! logical right shift packed4I" %}
  4305   ins_encode %{
  4306     __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
  4307   %}
  4308   ins_pipe( pipe_slow );
  4309 %}
  4311 instruct vsrl4I_imm(vecX dst, immI8 shift) %{
  4312   predicate(n->as_Vector()->length() == 4);
  4313   match(Set dst (URShiftVI dst shift));
  4314   format %{ "psrld   $dst,$shift\t! logical right shift packed4I" %}
  4315   ins_encode %{
  4316     __ psrld($dst$$XMMRegister, (int)$shift$$constant);
  4317   %}
  4318   ins_pipe( pipe_slow );
  4319 %}
  4321 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{
  4322   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  4323   match(Set dst (URShiftVI src shift));
  4324   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed4I" %}
  4325   ins_encode %{
  4326     bool vector256 = false;
  4327     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4328   %}
  4329   ins_pipe( pipe_slow );
  4330 %}
  4332 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
  4333   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  4334   match(Set dst (URShiftVI src shift));
  4335   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed4I" %}
  4336   ins_encode %{
  4337     bool vector256 = false;
  4338     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4339   %}
  4340   ins_pipe( pipe_slow );
  4341 %}
  4343 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{
  4344   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  4345   match(Set dst (URShiftVI src shift));
  4346   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed8I" %}
  4347   ins_encode %{
  4348     bool vector256 = true;
  4349     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4350   %}
  4351   ins_pipe( pipe_slow );
  4352 %}
  4354 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
  4355   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  4356   match(Set dst (URShiftVI src shift));
  4357   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed8I" %}
  4358   ins_encode %{
  4359     bool vector256 = true;
  4360     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4361   %}
  4362   ins_pipe( pipe_slow );
  4363 %}
  4365 // Longs vector logical right shift
  4366 instruct vsrl2L(vecX dst, vecS shift) %{
  4367   predicate(n->as_Vector()->length() == 2);
  4368   match(Set dst (URShiftVL dst shift));
  4369   format %{ "psrlq   $dst,$shift\t! logical right shift packed2L" %}
  4370   ins_encode %{
  4371     __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
  4372   %}
  4373   ins_pipe( pipe_slow );
  4374 %}
  4376 instruct vsrl2L_imm(vecX dst, immI8 shift) %{
  4377   predicate(n->as_Vector()->length() == 2);
  4378   match(Set dst (URShiftVL dst shift));
  4379   format %{ "psrlq   $dst,$shift\t! logical right shift packed2L" %}
  4380   ins_encode %{
  4381     __ psrlq($dst$$XMMRegister, (int)$shift$$constant);
  4382   %}
  4383   ins_pipe( pipe_slow );
  4384 %}
  4386 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{
  4387   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  4388   match(Set dst (URShiftVL src shift));
  4389   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed2L" %}
  4390   ins_encode %{
  4391     bool vector256 = false;
  4392     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4393   %}
  4394   ins_pipe( pipe_slow );
  4395 %}
  4397 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
  4398   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  4399   match(Set dst (URShiftVL src shift));
  4400   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed2L" %}
  4401   ins_encode %{
  4402     bool vector256 = false;
  4403     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4404   %}
  4405   ins_pipe( pipe_slow );
  4406 %}
  4408 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{
  4409   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
  4410   match(Set dst (URShiftVL src shift));
  4411   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed4L" %}
  4412   ins_encode %{
  4413     bool vector256 = true;
  4414     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4415   %}
  4416   ins_pipe( pipe_slow );
  4417 %}
  4419 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
  4420   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
  4421   match(Set dst (URShiftVL src shift));
  4422   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed4L" %}
  4423   ins_encode %{
  4424     bool vector256 = true;
  4425     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4426   %}
  4427   ins_pipe( pipe_slow );
  4428 %}
  4430 // ------------------- ArithmeticRightShift -----------------------------------
  4432 // Shorts/Chars vector arithmetic right shift
  4433 instruct vsra2S(vecS dst, vecS shift) %{
  4434   predicate(n->as_Vector()->length() == 2);
  4435   match(Set dst (RShiftVS dst shift));
  4436   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
  4437   ins_encode %{
  4438     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
  4439   %}
  4440   ins_pipe( pipe_slow );
  4441 %}
  4443 instruct vsra2S_imm(vecS dst, immI8 shift) %{
  4444   predicate(n->as_Vector()->length() == 2);
  4445   match(Set dst (RShiftVS dst shift));
  4446   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
  4447   ins_encode %{
  4448     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
  4449   %}
  4450   ins_pipe( pipe_slow );
  4451 %}
  4453 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{
  4454   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  4455   match(Set dst (RShiftVS src shift));
  4456   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
  4457   ins_encode %{
  4458     bool vector256 = false;
  4459     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4460   %}
  4461   ins_pipe( pipe_slow );
  4462 %}
  4464 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
  4465   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  4466   match(Set dst (RShiftVS src shift));
  4467   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
  4468   ins_encode %{
  4469     bool vector256 = false;
  4470     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4471   %}
  4472   ins_pipe( pipe_slow );
  4473 %}
  4475 instruct vsra4S(vecD dst, vecS shift) %{
  4476   predicate(n->as_Vector()->length() == 4);
  4477   match(Set dst (RShiftVS dst shift));
  4478   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
  4479   ins_encode %{
  4480     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
  4481   %}
  4482   ins_pipe( pipe_slow );
  4483 %}
  4485 instruct vsra4S_imm(vecD dst, immI8 shift) %{
  4486   predicate(n->as_Vector()->length() == 4);
  4487   match(Set dst (RShiftVS dst shift));
  4488   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
  4489   ins_encode %{
  4490     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
  4491   %}
  4492   ins_pipe( pipe_slow );
  4493 %}
  4495 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{
  4496   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  4497   match(Set dst (RShiftVS src shift));
  4498   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
  4499   ins_encode %{
  4500     bool vector256 = false;
  4501     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4502   %}
  4503   ins_pipe( pipe_slow );
  4504 %}
  4506 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
  4507   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  4508   match(Set dst (RShiftVS src shift));
  4509   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
  4510   ins_encode %{
  4511     bool vector256 = false;
  4512     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4513   %}
  4514   ins_pipe( pipe_slow );
  4515 %}
  4517 instruct vsra8S(vecX dst, vecS shift) %{
  4518   predicate(n->as_Vector()->length() == 8);
  4519   match(Set dst (RShiftVS dst shift));
  4520   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
  4521   ins_encode %{
  4522     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
  4523   %}
  4524   ins_pipe( pipe_slow );
  4525 %}
  4527 instruct vsra8S_imm(vecX dst, immI8 shift) %{
  4528   predicate(n->as_Vector()->length() == 8);
  4529   match(Set dst (RShiftVS dst shift));
  4530   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
  4531   ins_encode %{
  4532     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
  4533   %}
  4534   ins_pipe( pipe_slow );
  4535 %}
  4537 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{
  4538   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  4539   match(Set dst (RShiftVS src shift));
  4540   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
  4541   ins_encode %{
  4542     bool vector256 = false;
  4543     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4544   %}
  4545   ins_pipe( pipe_slow );
  4546 %}
  4548 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
  4549   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  4550   match(Set dst (RShiftVS src shift));
  4551   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
  4552   ins_encode %{
  4553     bool vector256 = false;
  4554     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4555   %}
  4556   ins_pipe( pipe_slow );
  4557 %}
  4559 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{
  4560   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  4561   match(Set dst (RShiftVS src shift));
  4562   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
  4563   ins_encode %{
  4564     bool vector256 = true;
  4565     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4566   %}
  4567   ins_pipe( pipe_slow );
  4568 %}
  4570 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
  4571   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  4572   match(Set dst (RShiftVS src shift));
  4573   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
  4574   ins_encode %{
  4575     bool vector256 = true;
  4576     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4577   %}
  4578   ins_pipe( pipe_slow );
  4579 %}
  4581 // Integers vector arithmetic right shift
  4582 instruct vsra2I(vecD dst, vecS shift) %{
  4583   predicate(n->as_Vector()->length() == 2);
  4584   match(Set dst (RShiftVI dst shift));
  4585   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed2I" %}
  4586   ins_encode %{
  4587     __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
  4588   %}
  4589   ins_pipe( pipe_slow );
  4590 %}
  4592 instruct vsra2I_imm(vecD dst, immI8 shift) %{
  4593   predicate(n->as_Vector()->length() == 2);
  4594   match(Set dst (RShiftVI dst shift));
  4595   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed2I" %}
  4596   ins_encode %{
  4597     __ psrad($dst$$XMMRegister, (int)$shift$$constant);
  4598   %}
  4599   ins_pipe( pipe_slow );
  4600 %}
  4602 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{
  4603   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  4604   match(Set dst (RShiftVI src shift));
  4605   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed2I" %}
  4606   ins_encode %{
  4607     bool vector256 = false;
  4608     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4609   %}
  4610   ins_pipe( pipe_slow );
  4611 %}
  4613 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
  4614   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  4615   match(Set dst (RShiftVI src shift));
  4616   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed2I" %}
  4617   ins_encode %{
  4618     bool vector256 = false;
  4619     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4620   %}
  4621   ins_pipe( pipe_slow );
  4622 %}
  4624 instruct vsra4I(vecX dst, vecS shift) %{
  4625   predicate(n->as_Vector()->length() == 4);
  4626   match(Set dst (RShiftVI dst shift));
  4627   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed4I" %}
  4628   ins_encode %{
  4629     __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
  4630   %}
  4631   ins_pipe( pipe_slow );
  4632 %}
  4634 instruct vsra4I_imm(vecX dst, immI8 shift) %{
  4635   predicate(n->as_Vector()->length() == 4);
  4636   match(Set dst (RShiftVI dst shift));
  4637   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed4I" %}
  4638   ins_encode %{
  4639     __ psrad($dst$$XMMRegister, (int)$shift$$constant);
  4640   %}
  4641   ins_pipe( pipe_slow );
  4642 %}
  4644 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{
  4645   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  4646   match(Set dst (RShiftVI src shift));
  4647   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed4I" %}
  4648   ins_encode %{
  4649     bool vector256 = false;
  4650     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4651   %}
  4652   ins_pipe( pipe_slow );
  4653 %}
  4655 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
  4656   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  4657   match(Set dst (RShiftVI src shift));
  4658   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed4I" %}
  4659   ins_encode %{
  4660     bool vector256 = false;
  4661     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4662   %}
  4663   ins_pipe( pipe_slow );
  4664 %}
  4666 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{
  4667   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  4668   match(Set dst (RShiftVI src shift));
  4669   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed8I" %}
  4670   ins_encode %{
  4671     bool vector256 = true;
  4672     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4673   %}
  4674   ins_pipe( pipe_slow );
  4675 %}
  4677 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
  4678   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  4679   match(Set dst (RShiftVI src shift));
  4680   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed8I" %}
  4681   ins_encode %{
  4682     bool vector256 = true;
  4683     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4684   %}
  4685   ins_pipe( pipe_slow );
  4686 %}
  4688 // There are no longs vector arithmetic right shift instructions.
  4691 // --------------------------------- AND --------------------------------------
  4693 instruct vand4B(vecS dst, vecS src) %{
  4694   predicate(n->as_Vector()->length_in_bytes() == 4);
  4695   match(Set dst (AndV dst src));
  4696   format %{ "pand    $dst,$src\t! and vectors (4 bytes)" %}
  4697   ins_encode %{
  4698     __ pand($dst$$XMMRegister, $src$$XMMRegister);
  4699   %}
  4700   ins_pipe( pipe_slow );
  4701 %}
  4703 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{
  4704   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
  4705   match(Set dst (AndV src1 src2));
  4706   format %{ "vpand   $dst,$src1,$src2\t! and vectors (4 bytes)" %}
  4707   ins_encode %{
  4708     bool vector256 = false;
  4709     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4710   %}
  4711   ins_pipe( pipe_slow );
  4712 %}
  4714 instruct vand8B(vecD dst, vecD src) %{
  4715   predicate(n->as_Vector()->length_in_bytes() == 8);
  4716   match(Set dst (AndV dst src));
  4717   format %{ "pand    $dst,$src\t! and vectors (8 bytes)" %}
  4718   ins_encode %{
  4719     __ pand($dst$$XMMRegister, $src$$XMMRegister);
  4720   %}
  4721   ins_pipe( pipe_slow );
  4722 %}
  4724 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{
  4725   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
  4726   match(Set dst (AndV src1 src2));
  4727   format %{ "vpand   $dst,$src1,$src2\t! and vectors (8 bytes)" %}
  4728   ins_encode %{
  4729     bool vector256 = false;
  4730     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4731   %}
  4732   ins_pipe( pipe_slow );
  4733 %}
  4735 instruct vand16B(vecX dst, vecX src) %{
  4736   predicate(n->as_Vector()->length_in_bytes() == 16);
  4737   match(Set dst (AndV dst src));
  4738   format %{ "pand    $dst,$src\t! and vectors (16 bytes)" %}
  4739   ins_encode %{
  4740     __ pand($dst$$XMMRegister, $src$$XMMRegister);
  4741   %}
  4742   ins_pipe( pipe_slow );
  4743 %}
  4745 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{
  4746   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
  4747   match(Set dst (AndV src1 src2));
  4748   format %{ "vpand   $dst,$src1,$src2\t! and vectors (16 bytes)" %}
  4749   ins_encode %{
  4750     bool vector256 = false;
  4751     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4752   %}
  4753   ins_pipe( pipe_slow );
  4754 %}
  4756 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{
  4757   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
  4758   match(Set dst (AndV src (LoadVector mem)));
  4759   format %{ "vpand   $dst,$src,$mem\t! and vectors (16 bytes)" %}
  4760   ins_encode %{
  4761     bool vector256 = false;
  4762     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  4763   %}
  4764   ins_pipe( pipe_slow );
  4765 %}
  4767 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{
  4768   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
  4769   match(Set dst (AndV src1 src2));
  4770   format %{ "vpand   $dst,$src1,$src2\t! and vectors (32 bytes)" %}
  4771   ins_encode %{
  4772     bool vector256 = true;
  4773     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4774   %}
  4775   ins_pipe( pipe_slow );
  4776 %}
  4778 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{
  4779   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
  4780   match(Set dst (AndV src (LoadVector mem)));
  4781   format %{ "vpand   $dst,$src,$mem\t! and vectors (32 bytes)" %}
  4782   ins_encode %{
  4783     bool vector256 = true;
  4784     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  4785   %}
  4786   ins_pipe( pipe_slow );
  4787 %}
  4789 // --------------------------------- OR ---------------------------------------
  4791 instruct vor4B(vecS dst, vecS src) %{
  4792   predicate(n->as_Vector()->length_in_bytes() == 4);
  4793   match(Set dst (OrV dst src));
  4794   format %{ "por     $dst,$src\t! or vectors (4 bytes)" %}
  4795   ins_encode %{
  4796     __ por($dst$$XMMRegister, $src$$XMMRegister);
  4797   %}
  4798   ins_pipe( pipe_slow );
  4799 %}
  4801 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{
  4802   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
  4803   match(Set dst (OrV src1 src2));
  4804   format %{ "vpor    $dst,$src1,$src2\t! or vectors (4 bytes)" %}
  4805   ins_encode %{
  4806     bool vector256 = false;
  4807     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4808   %}
  4809   ins_pipe( pipe_slow );
  4810 %}
  4812 instruct vor8B(vecD dst, vecD src) %{
  4813   predicate(n->as_Vector()->length_in_bytes() == 8);
  4814   match(Set dst (OrV dst src));
  4815   format %{ "por     $dst,$src\t! or vectors (8 bytes)" %}
  4816   ins_encode %{
  4817     __ por($dst$$XMMRegister, $src$$XMMRegister);
  4818   %}
  4819   ins_pipe( pipe_slow );
  4820 %}
  4822 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{
  4823   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
  4824   match(Set dst (OrV src1 src2));
  4825   format %{ "vpor    $dst,$src1,$src2\t! or vectors (8 bytes)" %}
  4826   ins_encode %{
  4827     bool vector256 = false;
  4828     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4829   %}
  4830   ins_pipe( pipe_slow );
  4831 %}
  4833 instruct vor16B(vecX dst, vecX src) %{
  4834   predicate(n->as_Vector()->length_in_bytes() == 16);
  4835   match(Set dst (OrV dst src));
  4836   format %{ "por     $dst,$src\t! or vectors (16 bytes)" %}
  4837   ins_encode %{
  4838     __ por($dst$$XMMRegister, $src$$XMMRegister);
  4839   %}
  4840   ins_pipe( pipe_slow );
  4841 %}
  4843 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{
  4844   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
  4845   match(Set dst (OrV src1 src2));
  4846   format %{ "vpor    $dst,$src1,$src2\t! or vectors (16 bytes)" %}
  4847   ins_encode %{
  4848     bool vector256 = false;
  4849     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4850   %}
  4851   ins_pipe( pipe_slow );
  4852 %}
  4854 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{
  4855   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
  4856   match(Set dst (OrV src (LoadVector mem)));
  4857   format %{ "vpor    $dst,$src,$mem\t! or vectors (16 bytes)" %}
  4858   ins_encode %{
  4859     bool vector256 = false;
  4860     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  4861   %}
  4862   ins_pipe( pipe_slow );
  4863 %}
  4865 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{
  4866   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
  4867   match(Set dst (OrV src1 src2));
  4868   format %{ "vpor    $dst,$src1,$src2\t! or vectors (32 bytes)" %}
  4869   ins_encode %{
  4870     bool vector256 = true;
  4871     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4872   %}
  4873   ins_pipe( pipe_slow );
  4874 %}
  4876 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{
  4877   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
  4878   match(Set dst (OrV src (LoadVector mem)));
  4879   format %{ "vpor    $dst,$src,$mem\t! or vectors (32 bytes)" %}
  4880   ins_encode %{
  4881     bool vector256 = true;
  4882     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  4883   %}
  4884   ins_pipe( pipe_slow );
  4885 %}
  4887 // --------------------------------- XOR --------------------------------------
  4889 instruct vxor4B(vecS dst, vecS src) %{
  4890   predicate(n->as_Vector()->length_in_bytes() == 4);
  4891   match(Set dst (XorV dst src));
  4892   format %{ "pxor    $dst,$src\t! xor vectors (4 bytes)" %}
  4893   ins_encode %{
  4894     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
  4895   %}
  4896   ins_pipe( pipe_slow );
  4897 %}
  4899 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{
  4900   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
  4901   match(Set dst (XorV src1 src2));
  4902   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (4 bytes)" %}
  4903   ins_encode %{
  4904     bool vector256 = false;
  4905     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4906   %}
  4907   ins_pipe( pipe_slow );
  4908 %}
  4910 instruct vxor8B(vecD dst, vecD src) %{
  4911   predicate(n->as_Vector()->length_in_bytes() == 8);
  4912   match(Set dst (XorV dst src));
  4913   format %{ "pxor    $dst,$src\t! xor vectors (8 bytes)" %}
  4914   ins_encode %{
  4915     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
  4916   %}
  4917   ins_pipe( pipe_slow );
  4918 %}
  4920 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{
  4921   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
  4922   match(Set dst (XorV src1 src2));
  4923   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (8 bytes)" %}
  4924   ins_encode %{
  4925     bool vector256 = false;
  4926     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4927   %}
  4928   ins_pipe( pipe_slow );
  4929 %}
  4931 instruct vxor16B(vecX dst, vecX src) %{
  4932   predicate(n->as_Vector()->length_in_bytes() == 16);
  4933   match(Set dst (XorV dst src));
  4934   format %{ "pxor    $dst,$src\t! xor vectors (16 bytes)" %}
  4935   ins_encode %{
  4936     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
  4937   %}
  4938   ins_pipe( pipe_slow );
  4939 %}
  4941 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{
  4942   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
  4943   match(Set dst (XorV src1 src2));
  4944   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (16 bytes)" %}
  4945   ins_encode %{
  4946     bool vector256 = false;
  4947     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4948   %}
  4949   ins_pipe( pipe_slow );
  4950 %}
  4952 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{
  4953   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
  4954   match(Set dst (XorV src (LoadVector mem)));
  4955   format %{ "vpxor   $dst,$src,$mem\t! xor vectors (16 bytes)" %}
  4956   ins_encode %{
  4957     bool vector256 = false;
  4958     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  4959   %}
  4960   ins_pipe( pipe_slow );
  4961 %}
  4963 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{
  4964   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
  4965   match(Set dst (XorV src1 src2));
  4966   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (32 bytes)" %}
  4967   ins_encode %{
  4968     bool vector256 = true;
  4969     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4970   %}
  4971   ins_pipe( pipe_slow );
  4972 %}
  4974 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{
  4975   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
  4976   match(Set dst (XorV src (LoadVector mem)));
  4977   format %{ "vpxor   $dst,$src,$mem\t! xor vectors (32 bytes)" %}
  4978   ins_encode %{
  4979     bool vector256 = true;
  4980     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  4981   %}
  4982   ins_pipe( pipe_slow );
  4983 %}

mercurial