src/cpu/x86/vm/x86.ad

Tue, 14 Jan 2014 17:46:48 -0800

author
kvn
date
Tue, 14 Jan 2014 17:46:48 -0800
changeset 6312
04d32e7fad07
parent 4204
b2c669fd8114
child 6517
a433eb716ce1
permissions
-rw-r--r--

8002074: Support for AES on SPARC
Summary: Add intrinsics/stub routines support for single-block and multi-block (as used by Cipher Block Chaining mode) AES encryption and decryption operations on the SPARC platform.
Reviewed-by: kvn, roland
Contributed-by: shrinivas.joshi@oracle.com

     1 //
     2 // Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
     3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4 //
     5 // This code is free software; you can redistribute it and/or modify it
     6 // under the terms of the GNU General Public License version 2 only, as
     7 // published by the Free Software Foundation.
     8 //
     9 // This code is distributed in the hope that it will be useful, but WITHOUT
    10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12 // version 2 for more details (a copy is included in the LICENSE file that
    13 // accompanied this code).
    14 //
    15 // You should have received a copy of the GNU General Public License version
    16 // 2 along with this work; if not, write to the Free Software Foundation,
    17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18 //
    19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    20 // or visit www.oracle.com if you need additional information or have any
    21 // questions.
    22 //
    23 //
    25 // X86 Common Architecture Description File
    27 //----------REGISTER DEFINITION BLOCK------------------------------------------
    28 // This information is used by the matcher and the register allocator to
    29 // describe individual registers and classes of registers within the target
    30 // archtecture.
    32 register %{
    33 //----------Architecture Description Register Definitions----------------------
    34 // General Registers
    35 // "reg_def"  name ( register save type, C convention save type,
    36 //                   ideal register type, encoding );
    37 // Register Save Types:
    38 //
    39 // NS  = No-Save:       The register allocator assumes that these registers
    40 //                      can be used without saving upon entry to the method, &
    41 //                      that they do not need to be saved at call sites.
    42 //
    43 // SOC = Save-On-Call:  The register allocator assumes that these registers
    44 //                      can be used without saving upon entry to the method,
    45 //                      but that they must be saved at call sites.
    46 //
    47 // SOE = Save-On-Entry: The register allocator assumes that these registers
    48 //                      must be saved before using them upon entry to the
    49 //                      method, but they do not need to be saved at call
    50 //                      sites.
    51 //
    52 // AS  = Always-Save:   The register allocator assumes that these registers
    53 //                      must be saved before using them upon entry to the
    54 //                      method, & that they must be saved at call sites.
    55 //
    56 // Ideal Register Type is used to determine how to save & restore a
    57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
    58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
    59 //
    60 // The encoding number is the actual bit-pattern placed into the opcodes.
    62 // XMM registers.  256-bit registers or 8 words each, labeled (a)-h.
    63 // Word a in each register holds a Float, words ab hold a Double.
    64 // The whole registers are used in SSE4.2 version intrinsics,
    65 // array copy stubs and superword operations (see UseSSE42Intrinsics,
    66 // UseXMMForArrayCopy and UseSuperword flags).
    67 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX).
    68 // Linux ABI:   No register preserved across function calls
    69 //              XMM0-XMM7 might hold parameters
    70 // Windows ABI: XMM6-XMM15 preserved across function calls
    71 //              XMM0-XMM3 might hold parameters
    73 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
    74 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
    75 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
    76 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
    77 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
    78 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
    79 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
    80 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
    82 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
    83 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
    84 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
    85 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
    86 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
    87 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
    88 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
    89 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
    91 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
    92 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
    93 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
    94 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
    95 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
    96 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
    97 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
    98 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
   100 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
   101 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
   102 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
   103 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
   104 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
   105 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
   106 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
   107 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
   109 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
   110 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
   111 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
   112 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
   113 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
   114 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
   115 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
   116 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
   118 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
   119 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
   120 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
   121 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
   122 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
   123 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
   124 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
   125 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
   127 #ifdef _WIN64
   129 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg());
   130 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1));
   131 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2));
   132 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3));
   133 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4));
   134 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5));
   135 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6));
   136 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7));
   138 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg());
   139 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1));
   140 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2));
   141 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3));
   142 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4));
   143 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5));
   144 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6));
   145 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7));
   147 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg());
   148 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1));
   149 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2));
   150 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3));
   151 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4));
   152 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5));
   153 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6));
   154 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7));
   156 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg());
   157 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1));
   158 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2));
   159 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3));
   160 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4));
   161 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5));
   162 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6));
   163 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7));
   165 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
   166 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1));
   167 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2));
   168 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3));
   169 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4));
   170 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5));
   171 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6));
   172 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7));
   174 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
   175 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1));
   176 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2));
   177 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3));
   178 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4));
   179 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5));
   180 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6));
   181 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7));
   183 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
   184 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1));
   185 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2));
   186 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3));
   187 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4));
   188 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5));
   189 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6));
   190 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7));
   192 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
   193 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1));
   194 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2));
   195 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3));
   196 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4));
   197 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5));
   198 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6));
   199 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7));
   201 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
   202 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1));
   203 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2));
   204 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3));
   205 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4));
   206 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5));
   207 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6));
   208 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7));
   210 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
   211 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1));
   212 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2));
   213 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3));
   214 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4));
   215 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5));
   216 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6));
   217 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7));
   219 #else // _WIN64
   221 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
   222 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
   223 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
   224 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
   225 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
   226 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
   227 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
   228 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
   230 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
   231 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
   232 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
   233 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
   234 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
   235 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
   236 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
   237 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
   239 #ifdef _LP64
   241 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
   242 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
   243 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
   244 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
   245 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
   246 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
   247 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
   248 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
   250 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
   251 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
   252 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
   253 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
   254 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
   255 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
   256 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
   257 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
   259 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
   260 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
   261 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
   262 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
   263 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
   264 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
   265 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
   266 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
   268 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
   269 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
   270 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
   271 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
   272 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
   273 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
   274 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
   275 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
   277 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
   278 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
   279 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
   280 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
   281 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
   282 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
   283 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
   284 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
   286 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
   287 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
   288 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
   289 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
   290 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
   291 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
   292 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
   293 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
   295 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
   296 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
   297 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
   298 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
   299 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
   300 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
   301 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
   302 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
   304 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
   305 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
   306 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
   307 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
   308 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
   309 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
   310 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
   311 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
   313 #endif // _LP64
   315 #endif // _WIN64
   317 #ifdef _LP64
   318 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
   319 #else
   320 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
   321 #endif // _LP64
   323 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
   324                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
   325                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
   326                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
   327                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
   328                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
   329                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
   330                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h
   331 #ifdef _LP64
   332                   ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
   333                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
   334                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
   335                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
   336                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
   337                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
   338                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
   339                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
   340 #endif
   341                    );
   343 // flags allocation class should be last.
   344 alloc_class chunk2(RFLAGS);
   346 // Singleton class for condition codes
   347 reg_class int_flags(RFLAGS);
   349 // Class for all float registers
   350 reg_class float_reg(XMM0,
   351                     XMM1,
   352                     XMM2,
   353                     XMM3,
   354                     XMM4,
   355                     XMM5,
   356                     XMM6,
   357                     XMM7
   358 #ifdef _LP64
   359                    ,XMM8,
   360                     XMM9,
   361                     XMM10,
   362                     XMM11,
   363                     XMM12,
   364                     XMM13,
   365                     XMM14,
   366                     XMM15
   367 #endif
   368                     );
   370 // Class for all double registers
   371 reg_class double_reg(XMM0,  XMM0b,
   372                      XMM1,  XMM1b,
   373                      XMM2,  XMM2b,
   374                      XMM3,  XMM3b,
   375                      XMM4,  XMM4b,
   376                      XMM5,  XMM5b,
   377                      XMM6,  XMM6b,
   378                      XMM7,  XMM7b
   379 #ifdef _LP64
   380                     ,XMM8,  XMM8b,
   381                      XMM9,  XMM9b,
   382                      XMM10, XMM10b,
   383                      XMM11, XMM11b,
   384                      XMM12, XMM12b,
   385                      XMM13, XMM13b,
   386                      XMM14, XMM14b,
   387                      XMM15, XMM15b
   388 #endif
   389                      );
   391 // Class for all 32bit vector registers
   392 reg_class vectors_reg(XMM0,
   393                       XMM1,
   394                       XMM2,
   395                       XMM3,
   396                       XMM4,
   397                       XMM5,
   398                       XMM6,
   399                       XMM7
   400 #ifdef _LP64
   401                      ,XMM8,
   402                       XMM9,
   403                       XMM10,
   404                       XMM11,
   405                       XMM12,
   406                       XMM13,
   407                       XMM14,
   408                       XMM15
   409 #endif
   410                       );
   412 // Class for all 64bit vector registers
   413 reg_class vectord_reg(XMM0,  XMM0b,
   414                       XMM1,  XMM1b,
   415                       XMM2,  XMM2b,
   416                       XMM3,  XMM3b,
   417                       XMM4,  XMM4b,
   418                       XMM5,  XMM5b,
   419                       XMM6,  XMM6b,
   420                       XMM7,  XMM7b
   421 #ifdef _LP64
   422                      ,XMM8,  XMM8b,
   423                       XMM9,  XMM9b,
   424                       XMM10, XMM10b,
   425                       XMM11, XMM11b,
   426                       XMM12, XMM12b,
   427                       XMM13, XMM13b,
   428                       XMM14, XMM14b,
   429                       XMM15, XMM15b
   430 #endif
   431                       );
   433 // Class for all 128bit vector registers
   434 reg_class vectorx_reg(XMM0,  XMM0b,  XMM0c,  XMM0d,
   435                       XMM1,  XMM1b,  XMM1c,  XMM1d,
   436                       XMM2,  XMM2b,  XMM2c,  XMM2d,
   437                       XMM3,  XMM3b,  XMM3c,  XMM3d,
   438                       XMM4,  XMM4b,  XMM4c,  XMM4d,
   439                       XMM5,  XMM5b,  XMM5c,  XMM5d,
   440                       XMM6,  XMM6b,  XMM6c,  XMM6d,
   441                       XMM7,  XMM7b,  XMM7c,  XMM7d
   442 #ifdef _LP64
   443                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,
   444                       XMM9,  XMM9b,  XMM9c,  XMM9d,
   445                       XMM10, XMM10b, XMM10c, XMM10d,
   446                       XMM11, XMM11b, XMM11c, XMM11d,
   447                       XMM12, XMM12b, XMM12c, XMM12d,
   448                       XMM13, XMM13b, XMM13c, XMM13d,
   449                       XMM14, XMM14b, XMM14c, XMM14d,
   450                       XMM15, XMM15b, XMM15c, XMM15d
   451 #endif
   452                       );
   454 // Class for all 256bit vector registers
   455 reg_class vectory_reg(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
   456                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
   457                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
   458                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
   459                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
   460                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
   461                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
   462                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h
   463 #ifdef _LP64
   464                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
   465                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
   466                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
   467                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
   468                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
   469                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
   470                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
   471                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
   472 #endif
   473                       );
   475 %}
   477 source %{
   478   // Float masks come from different places depending on platform.
   479 #ifdef _LP64
   480   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
   481   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
   482   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
   483   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
   484 #else
   485   static address float_signmask()  { return (address)float_signmask_pool; }
   486   static address float_signflip()  { return (address)float_signflip_pool; }
   487   static address double_signmask() { return (address)double_signmask_pool; }
   488   static address double_signflip() { return (address)double_signflip_pool; }
   489 #endif
   492 const bool Matcher::match_rule_supported(int opcode) {
   493   if (!has_match_rule(opcode))
   494     return false;
   496   switch (opcode) {
   497     case Op_PopCountI:
   498     case Op_PopCountL:
   499       if (!UsePopCountInstruction)
   500         return false;
   501     break;
   502     case Op_MulVI:
   503       if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
   504         return false;
   505     break;
   506     case Op_CompareAndSwapL:
   507 #ifdef _LP64
   508     case Op_CompareAndSwapP:
   509 #endif
   510       if (!VM_Version::supports_cx8())
   511         return false;
   512     break;
   513   }
   515   return true;  // Per default match rules are supported.
   516 }
   518 // Max vector size in bytes. 0 if not supported.
   519 const int Matcher::vector_width_in_bytes(BasicType bt) {
   520   assert(is_java_primitive(bt), "only primitive type vectors");
   521   if (UseSSE < 2) return 0;
   522   // SSE2 supports 128bit vectors for all types.
   523   // AVX2 supports 256bit vectors for all types.
   524   int size = (UseAVX > 1) ? 32 : 16;
   525   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
   526   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
   527     size = 32;
   528   // Use flag to limit vector size.
   529   size = MIN2(size,(int)MaxVectorSize);
   530   // Minimum 2 values in vector (or 4 for bytes).
   531   switch (bt) {
   532   case T_DOUBLE:
   533   case T_LONG:
   534     if (size < 16) return 0;
   535   case T_FLOAT:
   536   case T_INT:
   537     if (size < 8) return 0;
   538   case T_BOOLEAN:
   539   case T_BYTE:
   540   case T_CHAR:
   541   case T_SHORT:
   542     if (size < 4) return 0;
   543     break;
   544   default:
   545     ShouldNotReachHere();
   546   }
   547   return size;
   548 }
   550 // Limits on vector size (number of elements) loaded into vector.
   551 const int Matcher::max_vector_size(const BasicType bt) {
   552   return vector_width_in_bytes(bt)/type2aelembytes(bt);
   553 }
   554 const int Matcher::min_vector_size(const BasicType bt) {
   555   int max_size = max_vector_size(bt);
   556   // Min size which can be loaded into vector is 4 bytes.
   557   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
   558   return MIN2(size,max_size);
   559 }
   561 // Vector ideal reg corresponding to specidied size in bytes
   562 const int Matcher::vector_ideal_reg(int size) {
   563   assert(MaxVectorSize >= size, "");
   564   switch(size) {
   565     case  4: return Op_VecS;
   566     case  8: return Op_VecD;
   567     case 16: return Op_VecX;
   568     case 32: return Op_VecY;
   569   }
   570   ShouldNotReachHere();
   571   return 0;
   572 }
   574 // Only lowest bits of xmm reg are used for vector shift count.
   575 const int Matcher::vector_shift_count_ideal_reg(int size) {
   576   return Op_VecS;
   577 }
   579 // x86 supports misaligned vectors store/load.
   580 const bool Matcher::misaligned_vectors_ok() {
   581   return !AlignVector; // can be changed by flag
   582 }
   584 // x86 AES instructions are compatible with SunJCE expanded
   585 // keys, hence we do not need to pass the original key to stubs
   586 const bool Matcher::pass_original_key_for_aes() {
   587   return false;
   588 }
   590 // Helper methods for MachSpillCopyNode::implementation().
   591 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
   592                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
   593   // In 64-bit VM size calculation is very complex. Emitting instructions
   594   // into scratch buffer is used to get size in 64-bit VM.
   595   LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
   596   assert(ireg == Op_VecS || // 32bit vector
   597          (src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
   598          (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
   599          "no non-adjacent vector moves" );
   600   if (cbuf) {
   601     MacroAssembler _masm(cbuf);
   602     int offset = __ offset();
   603     switch (ireg) {
   604     case Op_VecS: // copy whole register
   605     case Op_VecD:
   606     case Op_VecX:
   607       __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
   608       break;
   609     case Op_VecY:
   610       __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
   611       break;
   612     default:
   613       ShouldNotReachHere();
   614     }
   615     int size = __ offset() - offset;
   616 #ifdef ASSERT
   617     // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
   618     assert(!do_size || size == 4, "incorrect size calculattion");
   619 #endif
   620     return size;
   621 #ifndef PRODUCT
   622   } else if (!do_size) {
   623     switch (ireg) {
   624     case Op_VecS:
   625     case Op_VecD:
   626     case Op_VecX:
   627       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
   628       break;
   629     case Op_VecY:
   630       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
   631       break;
   632     default:
   633       ShouldNotReachHere();
   634     }
   635 #endif
   636   }
   637   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
   638   return 4;
   639 }
   641 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
   642                             int stack_offset, int reg, uint ireg, outputStream* st) {
   643   // In 64-bit VM size calculation is very complex. Emitting instructions
   644   // into scratch buffer is used to get size in 64-bit VM.
   645   LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
   646   if (cbuf) {
   647     MacroAssembler _masm(cbuf);
   648     int offset = __ offset();
   649     if (is_load) {
   650       switch (ireg) {
   651       case Op_VecS:
   652         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
   653         break;
   654       case Op_VecD:
   655         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
   656         break;
   657       case Op_VecX:
   658         __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
   659         break;
   660       case Op_VecY:
   661         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
   662         break;
   663       default:
   664         ShouldNotReachHere();
   665       }
   666     } else { // store
   667       switch (ireg) {
   668       case Op_VecS:
   669         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
   670         break;
   671       case Op_VecD:
   672         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
   673         break;
   674       case Op_VecX:
   675         __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
   676         break;
   677       case Op_VecY:
   678         __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
   679         break;
   680       default:
   681         ShouldNotReachHere();
   682       }
   683     }
   684     int size = __ offset() - offset;
   685 #ifdef ASSERT
   686     int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
   687     // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
   688     assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
   689 #endif
   690     return size;
   691 #ifndef PRODUCT
   692   } else if (!do_size) {
   693     if (is_load) {
   694       switch (ireg) {
   695       case Op_VecS:
   696         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
   697         break;
   698       case Op_VecD:
   699         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
   700         break;
   701        case Op_VecX:
   702         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
   703         break;
   704       case Op_VecY:
   705         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
   706         break;
   707       default:
   708         ShouldNotReachHere();
   709       }
   710     } else { // store
   711       switch (ireg) {
   712       case Op_VecS:
   713         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
   714         break;
   715       case Op_VecD:
   716         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
   717         break;
   718        case Op_VecX:
   719         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
   720         break;
   721       case Op_VecY:
   722         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
   723         break;
   724       default:
   725         ShouldNotReachHere();
   726       }
   727     }
   728 #endif
   729   }
   730   int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
   731   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
   732   return 5+offset_size;
   733 }
   735 static inline jfloat replicate4_imm(int con, int width) {
   736   // Load a constant of "width" (in bytes) and replicate it to fill 32bit.
   737   assert(width == 1 || width == 2, "only byte or short types here");
   738   int bit_width = width * 8;
   739   jint val = con;
   740   val &= (1 << bit_width) - 1;  // mask off sign bits
   741   while(bit_width < 32) {
   742     val |= (val << bit_width);
   743     bit_width <<= 1;
   744   }
   745   jfloat fval = *((jfloat*) &val);  // coerce to float type
   746   return fval;
   747 }
   749 static inline jdouble replicate8_imm(int con, int width) {
   750   // Load a constant of "width" (in bytes) and replicate it to fill 64bit.
   751   assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here");
   752   int bit_width = width * 8;
   753   jlong val = con;
   754   val &= (((jlong) 1) << bit_width) - 1;  // mask off sign bits
   755   while(bit_width < 64) {
   756     val |= (val << bit_width);
   757     bit_width <<= 1;
   758   }
   759   jdouble dval = *((jdouble*) &val);  // coerce to double type
   760   return dval;
   761 }
   763 #ifndef PRODUCT
   764   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
   765     st->print("nop \t# %d bytes pad for loops and calls", _count);
   766   }
   767 #endif
   769   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
   770     MacroAssembler _masm(&cbuf);
   771     __ nop(_count);
   772   }
   774   uint MachNopNode::size(PhaseRegAlloc*) const {
   775     return _count;
   776   }
   778 #ifndef PRODUCT
   779   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
   780     st->print("# breakpoint");
   781   }
   782 #endif
   784   void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
   785     MacroAssembler _masm(&cbuf);
   786     __ int3();
   787   }
   789   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
   790     return MachNode::size(ra_);
   791   }
   793 %}
   795 encode %{
   797   enc_class preserve_SP %{
   798     debug_only(int off0 = cbuf.insts_size());
   799     MacroAssembler _masm(&cbuf);
   800     // RBP is preserved across all calls, even compiled calls.
   801     // Use it to preserve RSP in places where the callee might change the SP.
   802     __ movptr(rbp_mh_SP_save, rsp);
   803     debug_only(int off1 = cbuf.insts_size());
   804     assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
   805   %}
   807   enc_class restore_SP %{
   808     MacroAssembler _masm(&cbuf);
   809     __ movptr(rsp, rbp_mh_SP_save);
   810   %}
   812   enc_class call_epilog %{
   813     if (VerifyStackAtCalls) {
   814       // Check that stack depth is unchanged: find majik cookie on stack
   815       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
   816       MacroAssembler _masm(&cbuf);
   817       Label L;
   818       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
   819       __ jccb(Assembler::equal, L);
   820       // Die if stack mismatch
   821       __ int3();
   822       __ bind(L);
   823     }
   824   %}
   826 %}
   829 //----------OPERANDS-----------------------------------------------------------
   830 // Operand definitions must precede instruction definitions for correct parsing
   831 // in the ADLC because operands constitute user defined types which are used in
   832 // instruction definitions.
   834 // Vectors
   835 operand vecS() %{
   836   constraint(ALLOC_IN_RC(vectors_reg));
   837   match(VecS);
   839   format %{ %}
   840   interface(REG_INTER);
   841 %}
   843 operand vecD() %{
   844   constraint(ALLOC_IN_RC(vectord_reg));
   845   match(VecD);
   847   format %{ %}
   848   interface(REG_INTER);
   849 %}
   851 operand vecX() %{
   852   constraint(ALLOC_IN_RC(vectorx_reg));
   853   match(VecX);
   855   format %{ %}
   856   interface(REG_INTER);
   857 %}
   859 operand vecY() %{
   860   constraint(ALLOC_IN_RC(vectory_reg));
   861   match(VecY);
   863   format %{ %}
   864   interface(REG_INTER);
   865 %}
   868 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
   870 // ============================================================================
   872 instruct ShouldNotReachHere() %{
   873   match(Halt);
   874   format %{ "int3\t# ShouldNotReachHere" %}
   875   ins_encode %{
   876     __ int3();
   877   %}
   878   ins_pipe(pipe_slow);
   879 %}
   881 // ============================================================================
   883 instruct addF_reg(regF dst, regF src) %{
   884   predicate((UseSSE>=1) && (UseAVX == 0));
   885   match(Set dst (AddF dst src));
   887   format %{ "addss   $dst, $src" %}
   888   ins_cost(150);
   889   ins_encode %{
   890     __ addss($dst$$XMMRegister, $src$$XMMRegister);
   891   %}
   892   ins_pipe(pipe_slow);
   893 %}
   895 instruct addF_mem(regF dst, memory src) %{
   896   predicate((UseSSE>=1) && (UseAVX == 0));
   897   match(Set dst (AddF dst (LoadF src)));
   899   format %{ "addss   $dst, $src" %}
   900   ins_cost(150);
   901   ins_encode %{
   902     __ addss($dst$$XMMRegister, $src$$Address);
   903   %}
   904   ins_pipe(pipe_slow);
   905 %}
   907 instruct addF_imm(regF dst, immF con) %{
   908   predicate((UseSSE>=1) && (UseAVX == 0));
   909   match(Set dst (AddF dst con));
   910   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
   911   ins_cost(150);
   912   ins_encode %{
   913     __ addss($dst$$XMMRegister, $constantaddress($con));
   914   %}
   915   ins_pipe(pipe_slow);
   916 %}
   918 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
   919   predicate(UseAVX > 0);
   920   match(Set dst (AddF src1 src2));
   922   format %{ "vaddss  $dst, $src1, $src2" %}
   923   ins_cost(150);
   924   ins_encode %{
   925     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
   926   %}
   927   ins_pipe(pipe_slow);
   928 %}
   930 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
   931   predicate(UseAVX > 0);
   932   match(Set dst (AddF src1 (LoadF src2)));
   934   format %{ "vaddss  $dst, $src1, $src2" %}
   935   ins_cost(150);
   936   ins_encode %{
   937     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
   938   %}
   939   ins_pipe(pipe_slow);
   940 %}
   942 instruct addF_reg_imm(regF dst, regF src, immF con) %{
   943   predicate(UseAVX > 0);
   944   match(Set dst (AddF src con));
   946   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
   947   ins_cost(150);
   948   ins_encode %{
   949     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
   950   %}
   951   ins_pipe(pipe_slow);
   952 %}
   954 instruct addD_reg(regD dst, regD src) %{
   955   predicate((UseSSE>=2) && (UseAVX == 0));
   956   match(Set dst (AddD dst src));
   958   format %{ "addsd   $dst, $src" %}
   959   ins_cost(150);
   960   ins_encode %{
   961     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
   962   %}
   963   ins_pipe(pipe_slow);
   964 %}
   966 instruct addD_mem(regD dst, memory src) %{
   967   predicate((UseSSE>=2) && (UseAVX == 0));
   968   match(Set dst (AddD dst (LoadD src)));
   970   format %{ "addsd   $dst, $src" %}
   971   ins_cost(150);
   972   ins_encode %{
   973     __ addsd($dst$$XMMRegister, $src$$Address);
   974   %}
   975   ins_pipe(pipe_slow);
   976 %}
   978 instruct addD_imm(regD dst, immD con) %{
   979   predicate((UseSSE>=2) && (UseAVX == 0));
   980   match(Set dst (AddD dst con));
   981   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
   982   ins_cost(150);
   983   ins_encode %{
   984     __ addsd($dst$$XMMRegister, $constantaddress($con));
   985   %}
   986   ins_pipe(pipe_slow);
   987 %}
   989 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
   990   predicate(UseAVX > 0);
   991   match(Set dst (AddD src1 src2));
   993   format %{ "vaddsd  $dst, $src1, $src2" %}
   994   ins_cost(150);
   995   ins_encode %{
   996     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
   997   %}
   998   ins_pipe(pipe_slow);
   999 %}
  1001 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
  1002   predicate(UseAVX > 0);
  1003   match(Set dst (AddD src1 (LoadD src2)));
  1005   format %{ "vaddsd  $dst, $src1, $src2" %}
  1006   ins_cost(150);
  1007   ins_encode %{
  1008     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
  1009   %}
  1010   ins_pipe(pipe_slow);
  1011 %}
  1013 instruct addD_reg_imm(regD dst, regD src, immD con) %{
  1014   predicate(UseAVX > 0);
  1015   match(Set dst (AddD src con));
  1017   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
  1018   ins_cost(150);
  1019   ins_encode %{
  1020     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
  1021   %}
  1022   ins_pipe(pipe_slow);
  1023 %}
  1025 instruct subF_reg(regF dst, regF src) %{
  1026   predicate((UseSSE>=1) && (UseAVX == 0));
  1027   match(Set dst (SubF dst src));
  1029   format %{ "subss   $dst, $src" %}
  1030   ins_cost(150);
  1031   ins_encode %{
  1032     __ subss($dst$$XMMRegister, $src$$XMMRegister);
  1033   %}
  1034   ins_pipe(pipe_slow);
  1035 %}
  1037 instruct subF_mem(regF dst, memory src) %{
  1038   predicate((UseSSE>=1) && (UseAVX == 0));
  1039   match(Set dst (SubF dst (LoadF src)));
  1041   format %{ "subss   $dst, $src" %}
  1042   ins_cost(150);
  1043   ins_encode %{
  1044     __ subss($dst$$XMMRegister, $src$$Address);
  1045   %}
  1046   ins_pipe(pipe_slow);
  1047 %}
  1049 instruct subF_imm(regF dst, immF con) %{
  1050   predicate((UseSSE>=1) && (UseAVX == 0));
  1051   match(Set dst (SubF dst con));
  1052   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
  1053   ins_cost(150);
  1054   ins_encode %{
  1055     __ subss($dst$$XMMRegister, $constantaddress($con));
  1056   %}
  1057   ins_pipe(pipe_slow);
  1058 %}
  1060 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
  1061   predicate(UseAVX > 0);
  1062   match(Set dst (SubF src1 src2));
  1064   format %{ "vsubss  $dst, $src1, $src2" %}
  1065   ins_cost(150);
  1066   ins_encode %{
  1067     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  1068   %}
  1069   ins_pipe(pipe_slow);
  1070 %}
  1072 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
  1073   predicate(UseAVX > 0);
  1074   match(Set dst (SubF src1 (LoadF src2)));
  1076   format %{ "vsubss  $dst, $src1, $src2" %}
  1077   ins_cost(150);
  1078   ins_encode %{
  1079     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
  1080   %}
  1081   ins_pipe(pipe_slow);
  1082 %}
  1084 instruct subF_reg_imm(regF dst, regF src, immF con) %{
  1085   predicate(UseAVX > 0);
  1086   match(Set dst (SubF src con));
  1088   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
  1089   ins_cost(150);
  1090   ins_encode %{
  1091     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
  1092   %}
  1093   ins_pipe(pipe_slow);
  1094 %}
  1096 instruct subD_reg(regD dst, regD src) %{
  1097   predicate((UseSSE>=2) && (UseAVX == 0));
  1098   match(Set dst (SubD dst src));
  1100   format %{ "subsd   $dst, $src" %}
  1101   ins_cost(150);
  1102   ins_encode %{
  1103     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
  1104   %}
  1105   ins_pipe(pipe_slow);
  1106 %}
  1108 instruct subD_mem(regD dst, memory src) %{
  1109   predicate((UseSSE>=2) && (UseAVX == 0));
  1110   match(Set dst (SubD dst (LoadD src)));
  1112   format %{ "subsd   $dst, $src" %}
  1113   ins_cost(150);
  1114   ins_encode %{
  1115     __ subsd($dst$$XMMRegister, $src$$Address);
  1116   %}
  1117   ins_pipe(pipe_slow);
  1118 %}
  1120 instruct subD_imm(regD dst, immD con) %{
  1121   predicate((UseSSE>=2) && (UseAVX == 0));
  1122   match(Set dst (SubD dst con));
  1123   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
  1124   ins_cost(150);
  1125   ins_encode %{
  1126     __ subsd($dst$$XMMRegister, $constantaddress($con));
  1127   %}
  1128   ins_pipe(pipe_slow);
  1129 %}
  1131 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
  1132   predicate(UseAVX > 0);
  1133   match(Set dst (SubD src1 src2));
  1135   format %{ "vsubsd  $dst, $src1, $src2" %}
  1136   ins_cost(150);
  1137   ins_encode %{
  1138     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  1139   %}
  1140   ins_pipe(pipe_slow);
  1141 %}
  1143 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
  1144   predicate(UseAVX > 0);
  1145   match(Set dst (SubD src1 (LoadD src2)));
  1147   format %{ "vsubsd  $dst, $src1, $src2" %}
  1148   ins_cost(150);
  1149   ins_encode %{
  1150     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
  1151   %}
  1152   ins_pipe(pipe_slow);
  1153 %}
  1155 instruct subD_reg_imm(regD dst, regD src, immD con) %{
  1156   predicate(UseAVX > 0);
  1157   match(Set dst (SubD src con));
  1159   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
  1160   ins_cost(150);
  1161   ins_encode %{
  1162     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
  1163   %}
  1164   ins_pipe(pipe_slow);
  1165 %}
  1167 instruct mulF_reg(regF dst, regF src) %{
  1168   predicate((UseSSE>=1) && (UseAVX == 0));
  1169   match(Set dst (MulF dst src));
  1171   format %{ "mulss   $dst, $src" %}
  1172   ins_cost(150);
  1173   ins_encode %{
  1174     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
  1175   %}
  1176   ins_pipe(pipe_slow);
  1177 %}
  1179 instruct mulF_mem(regF dst, memory src) %{
  1180   predicate((UseSSE>=1) && (UseAVX == 0));
  1181   match(Set dst (MulF dst (LoadF src)));
  1183   format %{ "mulss   $dst, $src" %}
  1184   ins_cost(150);
  1185   ins_encode %{
  1186     __ mulss($dst$$XMMRegister, $src$$Address);
  1187   %}
  1188   ins_pipe(pipe_slow);
  1189 %}
  1191 instruct mulF_imm(regF dst, immF con) %{
  1192   predicate((UseSSE>=1) && (UseAVX == 0));
  1193   match(Set dst (MulF dst con));
  1194   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
  1195   ins_cost(150);
  1196   ins_encode %{
  1197     __ mulss($dst$$XMMRegister, $constantaddress($con));
  1198   %}
  1199   ins_pipe(pipe_slow);
  1200 %}
  1202 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
  1203   predicate(UseAVX > 0);
  1204   match(Set dst (MulF src1 src2));
  1206   format %{ "vmulss  $dst, $src1, $src2" %}
  1207   ins_cost(150);
  1208   ins_encode %{
  1209     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  1210   %}
  1211   ins_pipe(pipe_slow);
  1212 %}
  1214 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
  1215   predicate(UseAVX > 0);
  1216   match(Set dst (MulF src1 (LoadF src2)));
  1218   format %{ "vmulss  $dst, $src1, $src2" %}
  1219   ins_cost(150);
  1220   ins_encode %{
  1221     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
  1222   %}
  1223   ins_pipe(pipe_slow);
  1224 %}
  1226 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
  1227   predicate(UseAVX > 0);
  1228   match(Set dst (MulF src con));
  1230   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
  1231   ins_cost(150);
  1232   ins_encode %{
  1233     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
  1234   %}
  1235   ins_pipe(pipe_slow);
  1236 %}
  1238 instruct mulD_reg(regD dst, regD src) %{
  1239   predicate((UseSSE>=2) && (UseAVX == 0));
  1240   match(Set dst (MulD dst src));
  1242   format %{ "mulsd   $dst, $src" %}
  1243   ins_cost(150);
  1244   ins_encode %{
  1245     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
  1246   %}
  1247   ins_pipe(pipe_slow);
  1248 %}
  1250 instruct mulD_mem(regD dst, memory src) %{
  1251   predicate((UseSSE>=2) && (UseAVX == 0));
  1252   match(Set dst (MulD dst (LoadD src)));
  1254   format %{ "mulsd   $dst, $src" %}
  1255   ins_cost(150);
  1256   ins_encode %{
  1257     __ mulsd($dst$$XMMRegister, $src$$Address);
  1258   %}
  1259   ins_pipe(pipe_slow);
  1260 %}
  1262 instruct mulD_imm(regD dst, immD con) %{
  1263   predicate((UseSSE>=2) && (UseAVX == 0));
  1264   match(Set dst (MulD dst con));
  1265   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
  1266   ins_cost(150);
  1267   ins_encode %{
  1268     __ mulsd($dst$$XMMRegister, $constantaddress($con));
  1269   %}
  1270   ins_pipe(pipe_slow);
  1271 %}
  1273 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
  1274   predicate(UseAVX > 0);
  1275   match(Set dst (MulD src1 src2));
  1277   format %{ "vmulsd  $dst, $src1, $src2" %}
  1278   ins_cost(150);
  1279   ins_encode %{
  1280     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  1281   %}
  1282   ins_pipe(pipe_slow);
  1283 %}
  1285 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
  1286   predicate(UseAVX > 0);
  1287   match(Set dst (MulD src1 (LoadD src2)));
  1289   format %{ "vmulsd  $dst, $src1, $src2" %}
  1290   ins_cost(150);
  1291   ins_encode %{
  1292     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
  1293   %}
  1294   ins_pipe(pipe_slow);
  1295 %}
  1297 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
  1298   predicate(UseAVX > 0);
  1299   match(Set dst (MulD src con));
  1301   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
  1302   ins_cost(150);
  1303   ins_encode %{
  1304     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
  1305   %}
  1306   ins_pipe(pipe_slow);
  1307 %}
  1309 instruct divF_reg(regF dst, regF src) %{
  1310   predicate((UseSSE>=1) && (UseAVX == 0));
  1311   match(Set dst (DivF dst src));
  1313   format %{ "divss   $dst, $src" %}
  1314   ins_cost(150);
  1315   ins_encode %{
  1316     __ divss($dst$$XMMRegister, $src$$XMMRegister);
  1317   %}
  1318   ins_pipe(pipe_slow);
  1319 %}
  1321 instruct divF_mem(regF dst, memory src) %{
  1322   predicate((UseSSE>=1) && (UseAVX == 0));
  1323   match(Set dst (DivF dst (LoadF src)));
  1325   format %{ "divss   $dst, $src" %}
  1326   ins_cost(150);
  1327   ins_encode %{
  1328     __ divss($dst$$XMMRegister, $src$$Address);
  1329   %}
  1330   ins_pipe(pipe_slow);
  1331 %}
  1333 instruct divF_imm(regF dst, immF con) %{
  1334   predicate((UseSSE>=1) && (UseAVX == 0));
  1335   match(Set dst (DivF dst con));
  1336   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
  1337   ins_cost(150);
  1338   ins_encode %{
  1339     __ divss($dst$$XMMRegister, $constantaddress($con));
  1340   %}
  1341   ins_pipe(pipe_slow);
  1342 %}
  1344 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
  1345   predicate(UseAVX > 0);
  1346   match(Set dst (DivF src1 src2));
  1348   format %{ "vdivss  $dst, $src1, $src2" %}
  1349   ins_cost(150);
  1350   ins_encode %{
  1351     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  1352   %}
  1353   ins_pipe(pipe_slow);
  1354 %}
  1356 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
  1357   predicate(UseAVX > 0);
  1358   match(Set dst (DivF src1 (LoadF src2)));
  1360   format %{ "vdivss  $dst, $src1, $src2" %}
  1361   ins_cost(150);
  1362   ins_encode %{
  1363     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
  1364   %}
  1365   ins_pipe(pipe_slow);
  1366 %}
  1368 instruct divF_reg_imm(regF dst, regF src, immF con) %{
  1369   predicate(UseAVX > 0);
  1370   match(Set dst (DivF src con));
  1372   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
  1373   ins_cost(150);
  1374   ins_encode %{
  1375     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
  1376   %}
  1377   ins_pipe(pipe_slow);
  1378 %}
  1380 instruct divD_reg(regD dst, regD src) %{
  1381   predicate((UseSSE>=2) && (UseAVX == 0));
  1382   match(Set dst (DivD dst src));
  1384   format %{ "divsd   $dst, $src" %}
  1385   ins_cost(150);
  1386   ins_encode %{
  1387     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
  1388   %}
  1389   ins_pipe(pipe_slow);
  1390 %}
  1392 instruct divD_mem(regD dst, memory src) %{
  1393   predicate((UseSSE>=2) && (UseAVX == 0));
  1394   match(Set dst (DivD dst (LoadD src)));
  1396   format %{ "divsd   $dst, $src" %}
  1397   ins_cost(150);
  1398   ins_encode %{
  1399     __ divsd($dst$$XMMRegister, $src$$Address);
  1400   %}
  1401   ins_pipe(pipe_slow);
  1402 %}
  1404 instruct divD_imm(regD dst, immD con) %{
  1405   predicate((UseSSE>=2) && (UseAVX == 0));
  1406   match(Set dst (DivD dst con));
  1407   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
  1408   ins_cost(150);
  1409   ins_encode %{
  1410     __ divsd($dst$$XMMRegister, $constantaddress($con));
  1411   %}
  1412   ins_pipe(pipe_slow);
  1413 %}
  1415 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
  1416   predicate(UseAVX > 0);
  1417   match(Set dst (DivD src1 src2));
  1419   format %{ "vdivsd  $dst, $src1, $src2" %}
  1420   ins_cost(150);
  1421   ins_encode %{
  1422     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  1423   %}
  1424   ins_pipe(pipe_slow);
  1425 %}
  1427 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
  1428   predicate(UseAVX > 0);
  1429   match(Set dst (DivD src1 (LoadD src2)));
  1431   format %{ "vdivsd  $dst, $src1, $src2" %}
  1432   ins_cost(150);
  1433   ins_encode %{
  1434     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
  1435   %}
  1436   ins_pipe(pipe_slow);
  1437 %}
  1439 instruct divD_reg_imm(regD dst, regD src, immD con) %{
  1440   predicate(UseAVX > 0);
  1441   match(Set dst (DivD src con));
  1443   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
  1444   ins_cost(150);
  1445   ins_encode %{
  1446     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
  1447   %}
  1448   ins_pipe(pipe_slow);
  1449 %}
  1451 instruct absF_reg(regF dst) %{
  1452   predicate((UseSSE>=1) && (UseAVX == 0));
  1453   match(Set dst (AbsF dst));
  1454   ins_cost(150);
  1455   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
  1456   ins_encode %{
  1457     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
  1458   %}
  1459   ins_pipe(pipe_slow);
  1460 %}
  1462 instruct absF_reg_reg(regF dst, regF src) %{
  1463   predicate(UseAVX > 0);
  1464   match(Set dst (AbsF src));
  1465   ins_cost(150);
  1466   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
  1467   ins_encode %{
  1468     bool vector256 = false;
  1469     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
  1470               ExternalAddress(float_signmask()), vector256);
  1471   %}
  1472   ins_pipe(pipe_slow);
  1473 %}
  1475 instruct absD_reg(regD dst) %{
  1476   predicate((UseSSE>=2) && (UseAVX == 0));
  1477   match(Set dst (AbsD dst));
  1478   ins_cost(150);
  1479   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
  1480             "# abs double by sign masking" %}
  1481   ins_encode %{
  1482     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
  1483   %}
  1484   ins_pipe(pipe_slow);
  1485 %}
  1487 instruct absD_reg_reg(regD dst, regD src) %{
  1488   predicate(UseAVX > 0);
  1489   match(Set dst (AbsD src));
  1490   ins_cost(150);
  1491   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
  1492             "# abs double by sign masking" %}
  1493   ins_encode %{
  1494     bool vector256 = false;
  1495     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
  1496               ExternalAddress(double_signmask()), vector256);
  1497   %}
  1498   ins_pipe(pipe_slow);
  1499 %}
  1501 instruct negF_reg(regF dst) %{
  1502   predicate((UseSSE>=1) && (UseAVX == 0));
  1503   match(Set dst (NegF dst));
  1504   ins_cost(150);
  1505   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
  1506   ins_encode %{
  1507     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
  1508   %}
  1509   ins_pipe(pipe_slow);
  1510 %}
  1512 instruct negF_reg_reg(regF dst, regF src) %{
  1513   predicate(UseAVX > 0);
  1514   match(Set dst (NegF src));
  1515   ins_cost(150);
  1516   format %{ "vxorps  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
  1517   ins_encode %{
  1518     bool vector256 = false;
  1519     __ vxorps($dst$$XMMRegister, $src$$XMMRegister,
  1520               ExternalAddress(float_signflip()), vector256);
  1521   %}
  1522   ins_pipe(pipe_slow);
  1523 %}
  1525 instruct negD_reg(regD dst) %{
  1526   predicate((UseSSE>=2) && (UseAVX == 0));
  1527   match(Set dst (NegD dst));
  1528   ins_cost(150);
  1529   format %{ "xorpd   $dst, [0x8000000000000000]\t"
  1530             "# neg double by sign flipping" %}
  1531   ins_encode %{
  1532     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
  1533   %}
  1534   ins_pipe(pipe_slow);
  1535 %}
  1537 instruct negD_reg_reg(regD dst, regD src) %{
  1538   predicate(UseAVX > 0);
  1539   match(Set dst (NegD src));
  1540   ins_cost(150);
  1541   format %{ "vxorpd  $dst, $src, [0x8000000000000000]\t"
  1542             "# neg double by sign flipping" %}
  1543   ins_encode %{
  1544     bool vector256 = false;
  1545     __ vxorpd($dst$$XMMRegister, $src$$XMMRegister,
  1546               ExternalAddress(double_signflip()), vector256);
  1547   %}
  1548   ins_pipe(pipe_slow);
  1549 %}
  1551 instruct sqrtF_reg(regF dst, regF src) %{
  1552   predicate(UseSSE>=1);
  1553   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
  1555   format %{ "sqrtss  $dst, $src" %}
  1556   ins_cost(150);
  1557   ins_encode %{
  1558     __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
  1559   %}
  1560   ins_pipe(pipe_slow);
  1561 %}
  1563 instruct sqrtF_mem(regF dst, memory src) %{
  1564   predicate(UseSSE>=1);
  1565   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
  1567   format %{ "sqrtss  $dst, $src" %}
  1568   ins_cost(150);
  1569   ins_encode %{
  1570     __ sqrtss($dst$$XMMRegister, $src$$Address);
  1571   %}
  1572   ins_pipe(pipe_slow);
  1573 %}
  1575 instruct sqrtF_imm(regF dst, immF con) %{
  1576   predicate(UseSSE>=1);
  1577   match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
  1578   format %{ "sqrtss  $dst, [$constantaddress]\t# load from constant table: float=$con" %}
  1579   ins_cost(150);
  1580   ins_encode %{
  1581     __ sqrtss($dst$$XMMRegister, $constantaddress($con));
  1582   %}
  1583   ins_pipe(pipe_slow);
  1584 %}
  1586 instruct sqrtD_reg(regD dst, regD src) %{
  1587   predicate(UseSSE>=2);
  1588   match(Set dst (SqrtD src));
  1590   format %{ "sqrtsd  $dst, $src" %}
  1591   ins_cost(150);
  1592   ins_encode %{
  1593     __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
  1594   %}
  1595   ins_pipe(pipe_slow);
  1596 %}
  1598 instruct sqrtD_mem(regD dst, memory src) %{
  1599   predicate(UseSSE>=2);
  1600   match(Set dst (SqrtD (LoadD src)));
  1602   format %{ "sqrtsd  $dst, $src" %}
  1603   ins_cost(150);
  1604   ins_encode %{
  1605     __ sqrtsd($dst$$XMMRegister, $src$$Address);
  1606   %}
  1607   ins_pipe(pipe_slow);
  1608 %}
  1610 instruct sqrtD_imm(regD dst, immD con) %{
  1611   predicate(UseSSE>=2);
  1612   match(Set dst (SqrtD con));
  1613   format %{ "sqrtsd  $dst, [$constantaddress]\t# load from constant table: double=$con" %}
  1614   ins_cost(150);
  1615   ins_encode %{
  1616     __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
  1617   %}
  1618   ins_pipe(pipe_slow);
  1619 %}
  1622 // ====================VECTOR INSTRUCTIONS=====================================
  1624 // Load vectors (4 bytes long)
  1625 instruct loadV4(vecS dst, memory mem) %{
  1626   predicate(n->as_LoadVector()->memory_size() == 4);
  1627   match(Set dst (LoadVector mem));
  1628   ins_cost(125);
  1629   format %{ "movd    $dst,$mem\t! load vector (4 bytes)" %}
  1630   ins_encode %{
  1631     __ movdl($dst$$XMMRegister, $mem$$Address);
  1632   %}
  1633   ins_pipe( pipe_slow );
  1634 %}
  1636 // Load vectors (8 bytes long)
  1637 instruct loadV8(vecD dst, memory mem) %{
  1638   predicate(n->as_LoadVector()->memory_size() == 8);
  1639   match(Set dst (LoadVector mem));
  1640   ins_cost(125);
  1641   format %{ "movq    $dst,$mem\t! load vector (8 bytes)" %}
  1642   ins_encode %{
  1643     __ movq($dst$$XMMRegister, $mem$$Address);
  1644   %}
  1645   ins_pipe( pipe_slow );
  1646 %}
  1648 // Load vectors (16 bytes long)
  1649 instruct loadV16(vecX dst, memory mem) %{
  1650   predicate(n->as_LoadVector()->memory_size() == 16);
  1651   match(Set dst (LoadVector mem));
  1652   ins_cost(125);
  1653   format %{ "movdqu  $dst,$mem\t! load vector (16 bytes)" %}
  1654   ins_encode %{
  1655     __ movdqu($dst$$XMMRegister, $mem$$Address);
  1656   %}
  1657   ins_pipe( pipe_slow );
  1658 %}
  1660 // Load vectors (32 bytes long)
  1661 instruct loadV32(vecY dst, memory mem) %{
  1662   predicate(n->as_LoadVector()->memory_size() == 32);
  1663   match(Set dst (LoadVector mem));
  1664   ins_cost(125);
  1665   format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %}
  1666   ins_encode %{
  1667     __ vmovdqu($dst$$XMMRegister, $mem$$Address);
  1668   %}
  1669   ins_pipe( pipe_slow );
  1670 %}
  1672 // Store vectors
  1673 instruct storeV4(memory mem, vecS src) %{
  1674   predicate(n->as_StoreVector()->memory_size() == 4);
  1675   match(Set mem (StoreVector mem src));
  1676   ins_cost(145);
  1677   format %{ "movd    $mem,$src\t! store vector (4 bytes)" %}
  1678   ins_encode %{
  1679     __ movdl($mem$$Address, $src$$XMMRegister);
  1680   %}
  1681   ins_pipe( pipe_slow );
  1682 %}
  1684 instruct storeV8(memory mem, vecD src) %{
  1685   predicate(n->as_StoreVector()->memory_size() == 8);
  1686   match(Set mem (StoreVector mem src));
  1687   ins_cost(145);
  1688   format %{ "movq    $mem,$src\t! store vector (8 bytes)" %}
  1689   ins_encode %{
  1690     __ movq($mem$$Address, $src$$XMMRegister);
  1691   %}
  1692   ins_pipe( pipe_slow );
  1693 %}
  1695 instruct storeV16(memory mem, vecX src) %{
  1696   predicate(n->as_StoreVector()->memory_size() == 16);
  1697   match(Set mem (StoreVector mem src));
  1698   ins_cost(145);
  1699   format %{ "movdqu  $mem,$src\t! store vector (16 bytes)" %}
  1700   ins_encode %{
  1701     __ movdqu($mem$$Address, $src$$XMMRegister);
  1702   %}
  1703   ins_pipe( pipe_slow );
  1704 %}
  1706 instruct storeV32(memory mem, vecY src) %{
  1707   predicate(n->as_StoreVector()->memory_size() == 32);
  1708   match(Set mem (StoreVector mem src));
  1709   ins_cost(145);
  1710   format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %}
  1711   ins_encode %{
  1712     __ vmovdqu($mem$$Address, $src$$XMMRegister);
  1713   %}
  1714   ins_pipe( pipe_slow );
  1715 %}
  1717 // Replicate byte scalar to be vector
  1718 instruct Repl4B(vecS dst, rRegI src) %{
  1719   predicate(n->as_Vector()->length() == 4);
  1720   match(Set dst (ReplicateB src));
  1721   format %{ "movd    $dst,$src\n\t"
  1722             "punpcklbw $dst,$dst\n\t"
  1723             "pshuflw $dst,$dst,0x00\t! replicate4B" %}
  1724   ins_encode %{
  1725     __ movdl($dst$$XMMRegister, $src$$Register);
  1726     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
  1727     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  1728   %}
  1729   ins_pipe( pipe_slow );
  1730 %}
  1732 instruct Repl8B(vecD dst, rRegI src) %{
  1733   predicate(n->as_Vector()->length() == 8);
  1734   match(Set dst (ReplicateB src));
  1735   format %{ "movd    $dst,$src\n\t"
  1736             "punpcklbw $dst,$dst\n\t"
  1737             "pshuflw $dst,$dst,0x00\t! replicate8B" %}
  1738   ins_encode %{
  1739     __ movdl($dst$$XMMRegister, $src$$Register);
  1740     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
  1741     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  1742   %}
  1743   ins_pipe( pipe_slow );
  1744 %}
  1746 instruct Repl16B(vecX dst, rRegI src) %{
  1747   predicate(n->as_Vector()->length() == 16);
  1748   match(Set dst (ReplicateB src));
  1749   format %{ "movd    $dst,$src\n\t"
  1750             "punpcklbw $dst,$dst\n\t"
  1751             "pshuflw $dst,$dst,0x00\n\t"
  1752             "punpcklqdq $dst,$dst\t! replicate16B" %}
  1753   ins_encode %{
  1754     __ movdl($dst$$XMMRegister, $src$$Register);
  1755     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
  1756     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  1757     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  1758   %}
  1759   ins_pipe( pipe_slow );
  1760 %}
  1762 instruct Repl32B(vecY dst, rRegI src) %{
  1763   predicate(n->as_Vector()->length() == 32);
  1764   match(Set dst (ReplicateB src));
  1765   format %{ "movd    $dst,$src\n\t"
  1766             "punpcklbw $dst,$dst\n\t"
  1767             "pshuflw $dst,$dst,0x00\n\t"
  1768             "punpcklqdq $dst,$dst\n\t"
  1769             "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
  1770   ins_encode %{
  1771     __ movdl($dst$$XMMRegister, $src$$Register);
  1772     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
  1773     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  1774     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  1775     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  1776   %}
  1777   ins_pipe( pipe_slow );
  1778 %}
  1780 // Replicate byte scalar immediate to be vector by loading from const table.
  1781 instruct Repl4B_imm(vecS dst, immI con) %{
  1782   predicate(n->as_Vector()->length() == 4);
  1783   match(Set dst (ReplicateB con));
  1784   format %{ "movdl   $dst,[$constantaddress]\t! replicate4B($con)" %}
  1785   ins_encode %{
  1786     __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1)));
  1787   %}
  1788   ins_pipe( pipe_slow );
  1789 %}
  1791 instruct Repl8B_imm(vecD dst, immI con) %{
  1792   predicate(n->as_Vector()->length() == 8);
  1793   match(Set dst (ReplicateB con));
  1794   format %{ "movq    $dst,[$constantaddress]\t! replicate8B($con)" %}
  1795   ins_encode %{
  1796     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
  1797   %}
  1798   ins_pipe( pipe_slow );
  1799 %}
  1801 instruct Repl16B_imm(vecX dst, immI con) %{
  1802   predicate(n->as_Vector()->length() == 16);
  1803   match(Set dst (ReplicateB con));
  1804   format %{ "movq    $dst,[$constantaddress]\n\t"
  1805             "punpcklqdq $dst,$dst\t! replicate16B($con)" %}
  1806   ins_encode %{
  1807     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
  1808     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  1809   %}
  1810   ins_pipe( pipe_slow );
  1811 %}
  1813 instruct Repl32B_imm(vecY dst, immI con) %{
  1814   predicate(n->as_Vector()->length() == 32);
  1815   match(Set dst (ReplicateB con));
  1816   format %{ "movq    $dst,[$constantaddress]\n\t"
  1817             "punpcklqdq $dst,$dst\n\t"
  1818             "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %}
  1819   ins_encode %{
  1820     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
  1821     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  1822     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  1823   %}
  1824   ins_pipe( pipe_slow );
  1825 %}
  1827 // Replicate byte scalar zero to be vector
  1828 instruct Repl4B_zero(vecS dst, immI0 zero) %{
  1829   predicate(n->as_Vector()->length() == 4);
  1830   match(Set dst (ReplicateB zero));
  1831   format %{ "pxor    $dst,$dst\t! replicate4B zero" %}
  1832   ins_encode %{
  1833     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  1834   %}
  1835   ins_pipe( fpu_reg_reg );
  1836 %}
  1838 instruct Repl8B_zero(vecD dst, immI0 zero) %{
  1839   predicate(n->as_Vector()->length() == 8);
  1840   match(Set dst (ReplicateB zero));
  1841   format %{ "pxor    $dst,$dst\t! replicate8B zero" %}
  1842   ins_encode %{
  1843     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  1844   %}
  1845   ins_pipe( fpu_reg_reg );
  1846 %}
  1848 instruct Repl16B_zero(vecX dst, immI0 zero) %{
  1849   predicate(n->as_Vector()->length() == 16);
  1850   match(Set dst (ReplicateB zero));
  1851   format %{ "pxor    $dst,$dst\t! replicate16B zero" %}
  1852   ins_encode %{
  1853     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  1854   %}
  1855   ins_pipe( fpu_reg_reg );
  1856 %}
  1858 instruct Repl32B_zero(vecY dst, immI0 zero) %{
  1859   predicate(n->as_Vector()->length() == 32);
  1860   match(Set dst (ReplicateB zero));
  1861   format %{ "vpxor   $dst,$dst,$dst\t! replicate32B zero" %}
  1862   ins_encode %{
  1863     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
  1864     bool vector256 = true;
  1865     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
  1866   %}
  1867   ins_pipe( fpu_reg_reg );
  1868 %}
  1870 // Replicate char/short (2 byte) scalar to be vector
  1871 instruct Repl2S(vecS dst, rRegI src) %{
  1872   predicate(n->as_Vector()->length() == 2);
  1873   match(Set dst (ReplicateS src));
  1874   format %{ "movd    $dst,$src\n\t"
  1875             "pshuflw $dst,$dst,0x00\t! replicate2S" %}
  1876   ins_encode %{
  1877     __ movdl($dst$$XMMRegister, $src$$Register);
  1878     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  1879   %}
  1880   ins_pipe( fpu_reg_reg );
  1881 %}
  1883 instruct Repl4S(vecD dst, rRegI src) %{
  1884   predicate(n->as_Vector()->length() == 4);
  1885   match(Set dst (ReplicateS src));
  1886   format %{ "movd    $dst,$src\n\t"
  1887             "pshuflw $dst,$dst,0x00\t! replicate4S" %}
  1888   ins_encode %{
  1889     __ movdl($dst$$XMMRegister, $src$$Register);
  1890     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  1891   %}
  1892   ins_pipe( fpu_reg_reg );
  1893 %}
  1895 instruct Repl8S(vecX dst, rRegI src) %{
  1896   predicate(n->as_Vector()->length() == 8);
  1897   match(Set dst (ReplicateS src));
  1898   format %{ "movd    $dst,$src\n\t"
  1899             "pshuflw $dst,$dst,0x00\n\t"
  1900             "punpcklqdq $dst,$dst\t! replicate8S" %}
  1901   ins_encode %{
  1902     __ movdl($dst$$XMMRegister, $src$$Register);
  1903     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  1904     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  1905   %}
  1906   ins_pipe( pipe_slow );
  1907 %}
  1909 instruct Repl16S(vecY dst, rRegI src) %{
  1910   predicate(n->as_Vector()->length() == 16);
  1911   match(Set dst (ReplicateS src));
  1912   format %{ "movd    $dst,$src\n\t"
  1913             "pshuflw $dst,$dst,0x00\n\t"
  1914             "punpcklqdq $dst,$dst\n\t"
  1915             "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
  1916   ins_encode %{
  1917     __ movdl($dst$$XMMRegister, $src$$Register);
  1918     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  1919     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  1920     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  1921   %}
  1922   ins_pipe( pipe_slow );
  1923 %}
  1925 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table.
  1926 instruct Repl2S_imm(vecS dst, immI con) %{
  1927   predicate(n->as_Vector()->length() == 2);
  1928   match(Set dst (ReplicateS con));
  1929   format %{ "movdl   $dst,[$constantaddress]\t! replicate2S($con)" %}
  1930   ins_encode %{
  1931     __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2)));
  1932   %}
  1933   ins_pipe( fpu_reg_reg );
  1934 %}
  1936 instruct Repl4S_imm(vecD dst, immI con) %{
  1937   predicate(n->as_Vector()->length() == 4);
  1938   match(Set dst (ReplicateS con));
  1939   format %{ "movq    $dst,[$constantaddress]\t! replicate4S($con)" %}
  1940   ins_encode %{
  1941     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
  1942   %}
  1943   ins_pipe( fpu_reg_reg );
  1944 %}
  1946 instruct Repl8S_imm(vecX dst, immI con) %{
  1947   predicate(n->as_Vector()->length() == 8);
  1948   match(Set dst (ReplicateS con));
  1949   format %{ "movq    $dst,[$constantaddress]\n\t"
  1950             "punpcklqdq $dst,$dst\t! replicate8S($con)" %}
  1951   ins_encode %{
  1952     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
  1953     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  1954   %}
  1955   ins_pipe( pipe_slow );
  1956 %}
  1958 instruct Repl16S_imm(vecY dst, immI con) %{
  1959   predicate(n->as_Vector()->length() == 16);
  1960   match(Set dst (ReplicateS con));
  1961   format %{ "movq    $dst,[$constantaddress]\n\t"
  1962             "punpcklqdq $dst,$dst\n\t"
  1963             "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %}
  1964   ins_encode %{
  1965     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
  1966     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  1967     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  1968   %}
  1969   ins_pipe( pipe_slow );
  1970 %}
  1972 // Replicate char/short (2 byte) scalar zero to be vector
  1973 instruct Repl2S_zero(vecS dst, immI0 zero) %{
  1974   predicate(n->as_Vector()->length() == 2);
  1975   match(Set dst (ReplicateS zero));
  1976   format %{ "pxor    $dst,$dst\t! replicate2S zero" %}
  1977   ins_encode %{
  1978     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  1979   %}
  1980   ins_pipe( fpu_reg_reg );
  1981 %}
  1983 instruct Repl4S_zero(vecD dst, immI0 zero) %{
  1984   predicate(n->as_Vector()->length() == 4);
  1985   match(Set dst (ReplicateS zero));
  1986   format %{ "pxor    $dst,$dst\t! replicate4S zero" %}
  1987   ins_encode %{
  1988     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  1989   %}
  1990   ins_pipe( fpu_reg_reg );
  1991 %}
  1993 instruct Repl8S_zero(vecX dst, immI0 zero) %{
  1994   predicate(n->as_Vector()->length() == 8);
  1995   match(Set dst (ReplicateS zero));
  1996   format %{ "pxor    $dst,$dst\t! replicate8S zero" %}
  1997   ins_encode %{
  1998     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  1999   %}
  2000   ins_pipe( fpu_reg_reg );
  2001 %}
  2003 instruct Repl16S_zero(vecY dst, immI0 zero) %{
  2004   predicate(n->as_Vector()->length() == 16);
  2005   match(Set dst (ReplicateS zero));
  2006   format %{ "vpxor   $dst,$dst,$dst\t! replicate16S zero" %}
  2007   ins_encode %{
  2008     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
  2009     bool vector256 = true;
  2010     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
  2011   %}
  2012   ins_pipe( fpu_reg_reg );
  2013 %}
  2015 // Replicate integer (4 byte) scalar to be vector
  2016 instruct Repl2I(vecD dst, rRegI src) %{
  2017   predicate(n->as_Vector()->length() == 2);
  2018   match(Set dst (ReplicateI src));
  2019   format %{ "movd    $dst,$src\n\t"
  2020             "pshufd  $dst,$dst,0x00\t! replicate2I" %}
  2021   ins_encode %{
  2022     __ movdl($dst$$XMMRegister, $src$$Register);
  2023     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  2024   %}
  2025   ins_pipe( fpu_reg_reg );
  2026 %}
  2028 instruct Repl4I(vecX dst, rRegI src) %{
  2029   predicate(n->as_Vector()->length() == 4);
  2030   match(Set dst (ReplicateI src));
  2031   format %{ "movd    $dst,$src\n\t"
  2032             "pshufd  $dst,$dst,0x00\t! replicate4I" %}
  2033   ins_encode %{
  2034     __ movdl($dst$$XMMRegister, $src$$Register);
  2035     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  2036   %}
  2037   ins_pipe( pipe_slow );
  2038 %}
  2040 instruct Repl8I(vecY dst, rRegI src) %{
  2041   predicate(n->as_Vector()->length() == 8);
  2042   match(Set dst (ReplicateI src));
  2043   format %{ "movd    $dst,$src\n\t"
  2044             "pshufd  $dst,$dst,0x00\n\t"
  2045             "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
  2046   ins_encode %{
  2047     __ movdl($dst$$XMMRegister, $src$$Register);
  2048     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  2049     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2050   %}
  2051   ins_pipe( pipe_slow );
  2052 %}
  2054 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
  2055 instruct Repl2I_imm(vecD dst, immI con) %{
  2056   predicate(n->as_Vector()->length() == 2);
  2057   match(Set dst (ReplicateI con));
  2058   format %{ "movq    $dst,[$constantaddress]\t! replicate2I($con)" %}
  2059   ins_encode %{
  2060     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
  2061   %}
  2062   ins_pipe( fpu_reg_reg );
  2063 %}
  2065 instruct Repl4I_imm(vecX dst, immI con) %{
  2066   predicate(n->as_Vector()->length() == 4);
  2067   match(Set dst (ReplicateI con));
  2068   format %{ "movq    $dst,[$constantaddress]\t! replicate4I($con)\n\t"
  2069             "punpcklqdq $dst,$dst" %}
  2070   ins_encode %{
  2071     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
  2072     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  2073   %}
  2074   ins_pipe( pipe_slow );
  2075 %}
  2077 instruct Repl8I_imm(vecY dst, immI con) %{
  2078   predicate(n->as_Vector()->length() == 8);
  2079   match(Set dst (ReplicateI con));
  2080   format %{ "movq    $dst,[$constantaddress]\t! replicate8I($con)\n\t"
  2081             "punpcklqdq $dst,$dst\n\t"
  2082             "vinserti128h $dst,$dst,$dst" %}
  2083   ins_encode %{
  2084     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
  2085     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  2086     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2087   %}
  2088   ins_pipe( pipe_slow );
  2089 %}
  2091 // Integer could be loaded into xmm register directly from memory.
  2092 instruct Repl2I_mem(vecD dst, memory mem) %{
  2093   predicate(n->as_Vector()->length() == 2);
  2094   match(Set dst (ReplicateI (LoadI mem)));
  2095   format %{ "movd    $dst,$mem\n\t"
  2096             "pshufd  $dst,$dst,0x00\t! replicate2I" %}
  2097   ins_encode %{
  2098     __ movdl($dst$$XMMRegister, $mem$$Address);
  2099     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  2100   %}
  2101   ins_pipe( fpu_reg_reg );
  2102 %}
  2104 instruct Repl4I_mem(vecX dst, memory mem) %{
  2105   predicate(n->as_Vector()->length() == 4);
  2106   match(Set dst (ReplicateI (LoadI mem)));
  2107   format %{ "movd    $dst,$mem\n\t"
  2108             "pshufd  $dst,$dst,0x00\t! replicate4I" %}
  2109   ins_encode %{
  2110     __ movdl($dst$$XMMRegister, $mem$$Address);
  2111     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  2112   %}
  2113   ins_pipe( pipe_slow );
  2114 %}
  2116 instruct Repl8I_mem(vecY dst, memory mem) %{
  2117   predicate(n->as_Vector()->length() == 8);
  2118   match(Set dst (ReplicateI (LoadI mem)));
  2119   format %{ "movd    $dst,$mem\n\t"
  2120             "pshufd  $dst,$dst,0x00\n\t"
  2121             "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
  2122   ins_encode %{
  2123     __ movdl($dst$$XMMRegister, $mem$$Address);
  2124     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  2125     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2126   %}
  2127   ins_pipe( pipe_slow );
  2128 %}
  2130 // Replicate integer (4 byte) scalar zero to be vector
  2131 instruct Repl2I_zero(vecD dst, immI0 zero) %{
  2132   predicate(n->as_Vector()->length() == 2);
  2133   match(Set dst (ReplicateI zero));
  2134   format %{ "pxor    $dst,$dst\t! replicate2I" %}
  2135   ins_encode %{
  2136     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  2137   %}
  2138   ins_pipe( fpu_reg_reg );
  2139 %}
  2141 instruct Repl4I_zero(vecX dst, immI0 zero) %{
  2142   predicate(n->as_Vector()->length() == 4);
  2143   match(Set dst (ReplicateI zero));
  2144   format %{ "pxor    $dst,$dst\t! replicate4I zero)" %}
  2145   ins_encode %{
  2146     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  2147   %}
  2148   ins_pipe( fpu_reg_reg );
  2149 %}
  2151 instruct Repl8I_zero(vecY dst, immI0 zero) %{
  2152   predicate(n->as_Vector()->length() == 8);
  2153   match(Set dst (ReplicateI zero));
  2154   format %{ "vpxor   $dst,$dst,$dst\t! replicate8I zero" %}
  2155   ins_encode %{
  2156     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
  2157     bool vector256 = true;
  2158     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
  2159   %}
  2160   ins_pipe( fpu_reg_reg );
  2161 %}
  2163 // Replicate long (8 byte) scalar to be vector
  2164 #ifdef _LP64
  2165 instruct Repl2L(vecX dst, rRegL src) %{
  2166   predicate(n->as_Vector()->length() == 2);
  2167   match(Set dst (ReplicateL src));
  2168   format %{ "movdq   $dst,$src\n\t"
  2169             "punpcklqdq $dst,$dst\t! replicate2L" %}
  2170   ins_encode %{
  2171     __ movdq($dst$$XMMRegister, $src$$Register);
  2172     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  2173   %}
  2174   ins_pipe( pipe_slow );
  2175 %}
  2177 instruct Repl4L(vecY dst, rRegL src) %{
  2178   predicate(n->as_Vector()->length() == 4);
  2179   match(Set dst (ReplicateL src));
  2180   format %{ "movdq   $dst,$src\n\t"
  2181             "punpcklqdq $dst,$dst\n\t"
  2182             "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
  2183   ins_encode %{
  2184     __ movdq($dst$$XMMRegister, $src$$Register);
  2185     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  2186     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2187   %}
  2188   ins_pipe( pipe_slow );
  2189 %}
  2190 #else // _LP64
  2191 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{
  2192   predicate(n->as_Vector()->length() == 2);
  2193   match(Set dst (ReplicateL src));
  2194   effect(TEMP dst, USE src, TEMP tmp);
  2195   format %{ "movdl   $dst,$src.lo\n\t"
  2196             "movdl   $tmp,$src.hi\n\t"
  2197             "punpckldq $dst,$tmp\n\t"
  2198             "punpcklqdq $dst,$dst\t! replicate2L"%}
  2199   ins_encode %{
  2200     __ movdl($dst$$XMMRegister, $src$$Register);
  2201     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
  2202     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
  2203     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  2204   %}
  2205   ins_pipe( pipe_slow );
  2206 %}
  2208 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
  2209   predicate(n->as_Vector()->length() == 4);
  2210   match(Set dst (ReplicateL src));
  2211   effect(TEMP dst, USE src, TEMP tmp);
  2212   format %{ "movdl   $dst,$src.lo\n\t"
  2213             "movdl   $tmp,$src.hi\n\t"
  2214             "punpckldq $dst,$tmp\n\t"
  2215             "punpcklqdq $dst,$dst\n\t"
  2216             "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
  2217   ins_encode %{
  2218     __ movdl($dst$$XMMRegister, $src$$Register);
  2219     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
  2220     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
  2221     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  2222     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2223   %}
  2224   ins_pipe( pipe_slow );
  2225 %}
  2226 #endif // _LP64
  2228 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
  2229 instruct Repl2L_imm(vecX dst, immL con) %{
  2230   predicate(n->as_Vector()->length() == 2);
  2231   match(Set dst (ReplicateL con));
  2232   format %{ "movq    $dst,[$constantaddress]\n\t"
  2233             "punpcklqdq $dst,$dst\t! replicate2L($con)" %}
  2234   ins_encode %{
  2235     __ movq($dst$$XMMRegister, $constantaddress($con));
  2236     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  2237   %}
  2238   ins_pipe( pipe_slow );
  2239 %}
  2241 instruct Repl4L_imm(vecY dst, immL con) %{
  2242   predicate(n->as_Vector()->length() == 4);
  2243   match(Set dst (ReplicateL con));
  2244   format %{ "movq    $dst,[$constantaddress]\n\t"
  2245             "punpcklqdq $dst,$dst\n\t"
  2246             "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %}
  2247   ins_encode %{
  2248     __ movq($dst$$XMMRegister, $constantaddress($con));
  2249     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  2250     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2251   %}
  2252   ins_pipe( pipe_slow );
  2253 %}
  2255 // Long could be loaded into xmm register directly from memory.
  2256 instruct Repl2L_mem(vecX dst, memory mem) %{
  2257   predicate(n->as_Vector()->length() == 2);
  2258   match(Set dst (ReplicateL (LoadL mem)));
  2259   format %{ "movq    $dst,$mem\n\t"
  2260             "punpcklqdq $dst,$dst\t! replicate2L" %}
  2261   ins_encode %{
  2262     __ movq($dst$$XMMRegister, $mem$$Address);
  2263     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  2264   %}
  2265   ins_pipe( pipe_slow );
  2266 %}
  2268 instruct Repl4L_mem(vecY dst, memory mem) %{
  2269   predicate(n->as_Vector()->length() == 4);
  2270   match(Set dst (ReplicateL (LoadL mem)));
  2271   format %{ "movq    $dst,$mem\n\t"
  2272             "punpcklqdq $dst,$dst\n\t"
  2273             "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
  2274   ins_encode %{
  2275     __ movq($dst$$XMMRegister, $mem$$Address);
  2276     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  2277     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2278   %}
  2279   ins_pipe( pipe_slow );
  2280 %}
  2282 // Replicate long (8 byte) scalar zero to be vector
  2283 instruct Repl2L_zero(vecX dst, immL0 zero) %{
  2284   predicate(n->as_Vector()->length() == 2);
  2285   match(Set dst (ReplicateL zero));
  2286   format %{ "pxor    $dst,$dst\t! replicate2L zero" %}
  2287   ins_encode %{
  2288     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  2289   %}
  2290   ins_pipe( fpu_reg_reg );
  2291 %}
  2293 instruct Repl4L_zero(vecY dst, immL0 zero) %{
  2294   predicate(n->as_Vector()->length() == 4);
  2295   match(Set dst (ReplicateL zero));
  2296   format %{ "vpxor   $dst,$dst,$dst\t! replicate4L zero" %}
  2297   ins_encode %{
  2298     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
  2299     bool vector256 = true;
  2300     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
  2301   %}
  2302   ins_pipe( fpu_reg_reg );
  2303 %}
  2305 // Replicate float (4 byte) scalar to be vector
  2306 instruct Repl2F(vecD dst, regF src) %{
  2307   predicate(n->as_Vector()->length() == 2);
  2308   match(Set dst (ReplicateF src));
  2309   format %{ "pshufd  $dst,$dst,0x00\t! replicate2F" %}
  2310   ins_encode %{
  2311     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
  2312   %}
  2313   ins_pipe( fpu_reg_reg );
  2314 %}
  2316 instruct Repl4F(vecX dst, regF src) %{
  2317   predicate(n->as_Vector()->length() == 4);
  2318   match(Set dst (ReplicateF src));
  2319   format %{ "pshufd  $dst,$dst,0x00\t! replicate4F" %}
  2320   ins_encode %{
  2321     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
  2322   %}
  2323   ins_pipe( pipe_slow );
  2324 %}
  2326 instruct Repl8F(vecY dst, regF src) %{
  2327   predicate(n->as_Vector()->length() == 8);
  2328   match(Set dst (ReplicateF src));
  2329   format %{ "pshufd  $dst,$src,0x00\n\t"
  2330             "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
  2331   ins_encode %{
  2332     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
  2333     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2334   %}
  2335   ins_pipe( pipe_slow );
  2336 %}
  2338 // Replicate float (4 byte) scalar zero to be vector
  2339 instruct Repl2F_zero(vecD dst, immF0 zero) %{
  2340   predicate(n->as_Vector()->length() == 2);
  2341   match(Set dst (ReplicateF zero));
  2342   format %{ "xorps   $dst,$dst\t! replicate2F zero" %}
  2343   ins_encode %{
  2344     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
  2345   %}
  2346   ins_pipe( fpu_reg_reg );
  2347 %}
  2349 instruct Repl4F_zero(vecX dst, immF0 zero) %{
  2350   predicate(n->as_Vector()->length() == 4);
  2351   match(Set dst (ReplicateF zero));
  2352   format %{ "xorps   $dst,$dst\t! replicate4F zero" %}
  2353   ins_encode %{
  2354     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
  2355   %}
  2356   ins_pipe( fpu_reg_reg );
  2357 %}
  2359 instruct Repl8F_zero(vecY dst, immF0 zero) %{
  2360   predicate(n->as_Vector()->length() == 8);
  2361   match(Set dst (ReplicateF zero));
  2362   format %{ "vxorps  $dst,$dst,$dst\t! replicate8F zero" %}
  2363   ins_encode %{
  2364     bool vector256 = true;
  2365     __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
  2366   %}
  2367   ins_pipe( fpu_reg_reg );
  2368 %}
  2370 // Replicate double (8 bytes) scalar to be vector
  2371 instruct Repl2D(vecX dst, regD src) %{
  2372   predicate(n->as_Vector()->length() == 2);
  2373   match(Set dst (ReplicateD src));
  2374   format %{ "pshufd  $dst,$src,0x44\t! replicate2D" %}
  2375   ins_encode %{
  2376     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
  2377   %}
  2378   ins_pipe( pipe_slow );
  2379 %}
  2381 instruct Repl4D(vecY dst, regD src) %{
  2382   predicate(n->as_Vector()->length() == 4);
  2383   match(Set dst (ReplicateD src));
  2384   format %{ "pshufd  $dst,$src,0x44\n\t"
  2385             "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
  2386   ins_encode %{
  2387     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
  2388     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2389   %}
  2390   ins_pipe( pipe_slow );
  2391 %}
  2393 // Replicate double (8 byte) scalar zero to be vector
  2394 instruct Repl2D_zero(vecX dst, immD0 zero) %{
  2395   predicate(n->as_Vector()->length() == 2);
  2396   match(Set dst (ReplicateD zero));
  2397   format %{ "xorpd   $dst,$dst\t! replicate2D zero" %}
  2398   ins_encode %{
  2399     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
  2400   %}
  2401   ins_pipe( fpu_reg_reg );
  2402 %}
  2404 instruct Repl4D_zero(vecY dst, immD0 zero) %{
  2405   predicate(n->as_Vector()->length() == 4);
  2406   match(Set dst (ReplicateD zero));
  2407   format %{ "vxorpd  $dst,$dst,$dst,vect256\t! replicate4D zero" %}
  2408   ins_encode %{
  2409     bool vector256 = true;
  2410     __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
  2411   %}
  2412   ins_pipe( fpu_reg_reg );
  2413 %}
  2415 // ====================VECTOR ARITHMETIC=======================================
  2417 // --------------------------------- ADD --------------------------------------
  2419 // Bytes vector add
  2420 instruct vadd4B(vecS dst, vecS src) %{
  2421   predicate(n->as_Vector()->length() == 4);
  2422   match(Set dst (AddVB dst src));
  2423   format %{ "paddb   $dst,$src\t! add packed4B" %}
  2424   ins_encode %{
  2425     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
  2426   %}
  2427   ins_pipe( pipe_slow );
  2428 %}
  2430 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{
  2431   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  2432   match(Set dst (AddVB src1 src2));
  2433   format %{ "vpaddb  $dst,$src1,$src2\t! add packed4B" %}
  2434   ins_encode %{
  2435     bool vector256 = false;
  2436     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2437   %}
  2438   ins_pipe( pipe_slow );
  2439 %}
  2441 instruct vadd8B(vecD dst, vecD src) %{
  2442   predicate(n->as_Vector()->length() == 8);
  2443   match(Set dst (AddVB dst src));
  2444   format %{ "paddb   $dst,$src\t! add packed8B" %}
  2445   ins_encode %{
  2446     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
  2447   %}
  2448   ins_pipe( pipe_slow );
  2449 %}
  2451 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
  2452   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  2453   match(Set dst (AddVB src1 src2));
  2454   format %{ "vpaddb  $dst,$src1,$src2\t! add packed8B" %}
  2455   ins_encode %{
  2456     bool vector256 = false;
  2457     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2458   %}
  2459   ins_pipe( pipe_slow );
  2460 %}
  2462 instruct vadd16B(vecX dst, vecX src) %{
  2463   predicate(n->as_Vector()->length() == 16);
  2464   match(Set dst (AddVB dst src));
  2465   format %{ "paddb   $dst,$src\t! add packed16B" %}
  2466   ins_encode %{
  2467     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
  2468   %}
  2469   ins_pipe( pipe_slow );
  2470 %}
  2472 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
  2473   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
  2474   match(Set dst (AddVB src1 src2));
  2475   format %{ "vpaddb  $dst,$src1,$src2\t! add packed16B" %}
  2476   ins_encode %{
  2477     bool vector256 = false;
  2478     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2479   %}
  2480   ins_pipe( pipe_slow );
  2481 %}
  2483 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{
  2484   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
  2485   match(Set dst (AddVB src (LoadVector mem)));
  2486   format %{ "vpaddb  $dst,$src,$mem\t! add packed16B" %}
  2487   ins_encode %{
  2488     bool vector256 = false;
  2489     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2490   %}
  2491   ins_pipe( pipe_slow );
  2492 %}
  2494 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{
  2495   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
  2496   match(Set dst (AddVB src1 src2));
  2497   format %{ "vpaddb  $dst,$src1,$src2\t! add packed32B" %}
  2498   ins_encode %{
  2499     bool vector256 = true;
  2500     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2501   %}
  2502   ins_pipe( pipe_slow );
  2503 %}
  2505 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{
  2506   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
  2507   match(Set dst (AddVB src (LoadVector mem)));
  2508   format %{ "vpaddb  $dst,$src,$mem\t! add packed32B" %}
  2509   ins_encode %{
  2510     bool vector256 = true;
  2511     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2512   %}
  2513   ins_pipe( pipe_slow );
  2514 %}
  2516 // Shorts/Chars vector add
  2517 instruct vadd2S(vecS dst, vecS src) %{
  2518   predicate(n->as_Vector()->length() == 2);
  2519   match(Set dst (AddVS dst src));
  2520   format %{ "paddw   $dst,$src\t! add packed2S" %}
  2521   ins_encode %{
  2522     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
  2523   %}
  2524   ins_pipe( pipe_slow );
  2525 %}
  2527 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{
  2528   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  2529   match(Set dst (AddVS src1 src2));
  2530   format %{ "vpaddw  $dst,$src1,$src2\t! add packed2S" %}
  2531   ins_encode %{
  2532     bool vector256 = false;
  2533     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2534   %}
  2535   ins_pipe( pipe_slow );
  2536 %}
  2538 instruct vadd4S(vecD dst, vecD src) %{
  2539   predicate(n->as_Vector()->length() == 4);
  2540   match(Set dst (AddVS dst src));
  2541   format %{ "paddw   $dst,$src\t! add packed4S" %}
  2542   ins_encode %{
  2543     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
  2544   %}
  2545   ins_pipe( pipe_slow );
  2546 %}
  2548 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
  2549   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  2550   match(Set dst (AddVS src1 src2));
  2551   format %{ "vpaddw  $dst,$src1,$src2\t! add packed4S" %}
  2552   ins_encode %{
  2553     bool vector256 = false;
  2554     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2555   %}
  2556   ins_pipe( pipe_slow );
  2557 %}
  2559 instruct vadd8S(vecX dst, vecX src) %{
  2560   predicate(n->as_Vector()->length() == 8);
  2561   match(Set dst (AddVS dst src));
  2562   format %{ "paddw   $dst,$src\t! add packed8S" %}
  2563   ins_encode %{
  2564     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
  2565   %}
  2566   ins_pipe( pipe_slow );
  2567 %}
  2569 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
  2570   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  2571   match(Set dst (AddVS src1 src2));
  2572   format %{ "vpaddw  $dst,$src1,$src2\t! add packed8S" %}
  2573   ins_encode %{
  2574     bool vector256 = false;
  2575     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2576   %}
  2577   ins_pipe( pipe_slow );
  2578 %}
  2580 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{
  2581   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  2582   match(Set dst (AddVS src (LoadVector mem)));
  2583   format %{ "vpaddw  $dst,$src,$mem\t! add packed8S" %}
  2584   ins_encode %{
  2585     bool vector256 = false;
  2586     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2587   %}
  2588   ins_pipe( pipe_slow );
  2589 %}
  2591 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{
  2592   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  2593   match(Set dst (AddVS src1 src2));
  2594   format %{ "vpaddw  $dst,$src1,$src2\t! add packed16S" %}
  2595   ins_encode %{
  2596     bool vector256 = true;
  2597     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2598   %}
  2599   ins_pipe( pipe_slow );
  2600 %}
  2602 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{
  2603   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  2604   match(Set dst (AddVS src (LoadVector mem)));
  2605   format %{ "vpaddw  $dst,$src,$mem\t! add packed16S" %}
  2606   ins_encode %{
  2607     bool vector256 = true;
  2608     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2609   %}
  2610   ins_pipe( pipe_slow );
  2611 %}
  2613 // Integers vector add
  2614 instruct vadd2I(vecD dst, vecD src) %{
  2615   predicate(n->as_Vector()->length() == 2);
  2616   match(Set dst (AddVI dst src));
  2617   format %{ "paddd   $dst,$src\t! add packed2I" %}
  2618   ins_encode %{
  2619     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
  2620   %}
  2621   ins_pipe( pipe_slow );
  2622 %}
  2624 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{
  2625   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  2626   match(Set dst (AddVI src1 src2));
  2627   format %{ "vpaddd  $dst,$src1,$src2\t! add packed2I" %}
  2628   ins_encode %{
  2629     bool vector256 = false;
  2630     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2631   %}
  2632   ins_pipe( pipe_slow );
  2633 %}
  2635 instruct vadd4I(vecX dst, vecX src) %{
  2636   predicate(n->as_Vector()->length() == 4);
  2637   match(Set dst (AddVI dst src));
  2638   format %{ "paddd   $dst,$src\t! add packed4I" %}
  2639   ins_encode %{
  2640     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
  2641   %}
  2642   ins_pipe( pipe_slow );
  2643 %}
  2645 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
  2646   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  2647   match(Set dst (AddVI src1 src2));
  2648   format %{ "vpaddd  $dst,$src1,$src2\t! add packed4I" %}
  2649   ins_encode %{
  2650     bool vector256 = false;
  2651     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2652   %}
  2653   ins_pipe( pipe_slow );
  2654 %}
  2656 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{
  2657   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  2658   match(Set dst (AddVI src (LoadVector mem)));
  2659   format %{ "vpaddd  $dst,$src,$mem\t! add packed4I" %}
  2660   ins_encode %{
  2661     bool vector256 = false;
  2662     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2663   %}
  2664   ins_pipe( pipe_slow );
  2665 %}
  2667 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{
  2668   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  2669   match(Set dst (AddVI src1 src2));
  2670   format %{ "vpaddd  $dst,$src1,$src2\t! add packed8I" %}
  2671   ins_encode %{
  2672     bool vector256 = true;
  2673     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2674   %}
  2675   ins_pipe( pipe_slow );
  2676 %}
  2678 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{
  2679   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  2680   match(Set dst (AddVI src (LoadVector mem)));
  2681   format %{ "vpaddd  $dst,$src,$mem\t! add packed8I" %}
  2682   ins_encode %{
  2683     bool vector256 = true;
  2684     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2685   %}
  2686   ins_pipe( pipe_slow );
  2687 %}
  2689 // Longs vector add
  2690 instruct vadd2L(vecX dst, vecX src) %{
  2691   predicate(n->as_Vector()->length() == 2);
  2692   match(Set dst (AddVL dst src));
  2693   format %{ "paddq   $dst,$src\t! add packed2L" %}
  2694   ins_encode %{
  2695     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
  2696   %}
  2697   ins_pipe( pipe_slow );
  2698 %}
  2700 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
  2701   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  2702   match(Set dst (AddVL src1 src2));
  2703   format %{ "vpaddq  $dst,$src1,$src2\t! add packed2L" %}
  2704   ins_encode %{
  2705     bool vector256 = false;
  2706     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2707   %}
  2708   ins_pipe( pipe_slow );
  2709 %}
  2711 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{
  2712   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  2713   match(Set dst (AddVL src (LoadVector mem)));
  2714   format %{ "vpaddq  $dst,$src,$mem\t! add packed2L" %}
  2715   ins_encode %{
  2716     bool vector256 = false;
  2717     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2718   %}
  2719   ins_pipe( pipe_slow );
  2720 %}
  2722 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{
  2723   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
  2724   match(Set dst (AddVL src1 src2));
  2725   format %{ "vpaddq  $dst,$src1,$src2\t! add packed4L" %}
  2726   ins_encode %{
  2727     bool vector256 = true;
  2728     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2729   %}
  2730   ins_pipe( pipe_slow );
  2731 %}
  2733 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{
  2734   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
  2735   match(Set dst (AddVL src (LoadVector mem)));
  2736   format %{ "vpaddq  $dst,$src,$mem\t! add packed4L" %}
  2737   ins_encode %{
  2738     bool vector256 = true;
  2739     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2740   %}
  2741   ins_pipe( pipe_slow );
  2742 %}
  2744 // Floats vector add
  2745 instruct vadd2F(vecD dst, vecD src) %{
  2746   predicate(n->as_Vector()->length() == 2);
  2747   match(Set dst (AddVF dst src));
  2748   format %{ "addps   $dst,$src\t! add packed2F" %}
  2749   ins_encode %{
  2750     __ addps($dst$$XMMRegister, $src$$XMMRegister);
  2751   %}
  2752   ins_pipe( pipe_slow );
  2753 %}
  2755 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{
  2756   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  2757   match(Set dst (AddVF src1 src2));
  2758   format %{ "vaddps  $dst,$src1,$src2\t! add packed2F" %}
  2759   ins_encode %{
  2760     bool vector256 = false;
  2761     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2762   %}
  2763   ins_pipe( pipe_slow );
  2764 %}
  2766 instruct vadd4F(vecX dst, vecX src) %{
  2767   predicate(n->as_Vector()->length() == 4);
  2768   match(Set dst (AddVF dst src));
  2769   format %{ "addps   $dst,$src\t! add packed4F" %}
  2770   ins_encode %{
  2771     __ addps($dst$$XMMRegister, $src$$XMMRegister);
  2772   %}
  2773   ins_pipe( pipe_slow );
  2774 %}
  2776 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
  2777   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  2778   match(Set dst (AddVF src1 src2));
  2779   format %{ "vaddps  $dst,$src1,$src2\t! add packed4F" %}
  2780   ins_encode %{
  2781     bool vector256 = false;
  2782     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2783   %}
  2784   ins_pipe( pipe_slow );
  2785 %}
  2787 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{
  2788   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  2789   match(Set dst (AddVF src (LoadVector mem)));
  2790   format %{ "vaddps  $dst,$src,$mem\t! add packed4F" %}
  2791   ins_encode %{
  2792     bool vector256 = false;
  2793     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2794   %}
  2795   ins_pipe( pipe_slow );
  2796 %}
  2798 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{
  2799   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  2800   match(Set dst (AddVF src1 src2));
  2801   format %{ "vaddps  $dst,$src1,$src2\t! add packed8F" %}
  2802   ins_encode %{
  2803     bool vector256 = true;
  2804     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2805   %}
  2806   ins_pipe( pipe_slow );
  2807 %}
  2809 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{
  2810   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  2811   match(Set dst (AddVF src (LoadVector mem)));
  2812   format %{ "vaddps  $dst,$src,$mem\t! add packed8F" %}
  2813   ins_encode %{
  2814     bool vector256 = true;
  2815     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2816   %}
  2817   ins_pipe( pipe_slow );
  2818 %}
  2820 // Doubles vector add
  2821 instruct vadd2D(vecX dst, vecX src) %{
  2822   predicate(n->as_Vector()->length() == 2);
  2823   match(Set dst (AddVD dst src));
  2824   format %{ "addpd   $dst,$src\t! add packed2D" %}
  2825   ins_encode %{
  2826     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
  2827   %}
  2828   ins_pipe( pipe_slow );
  2829 %}
  2831 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
  2832   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  2833   match(Set dst (AddVD src1 src2));
  2834   format %{ "vaddpd  $dst,$src1,$src2\t! add packed2D" %}
  2835   ins_encode %{
  2836     bool vector256 = false;
  2837     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2838   %}
  2839   ins_pipe( pipe_slow );
  2840 %}
  2842 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{
  2843   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  2844   match(Set dst (AddVD src (LoadVector mem)));
  2845   format %{ "vaddpd  $dst,$src,$mem\t! add packed2D" %}
  2846   ins_encode %{
  2847     bool vector256 = false;
  2848     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2849   %}
  2850   ins_pipe( pipe_slow );
  2851 %}
  2853 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{
  2854   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  2855   match(Set dst (AddVD src1 src2));
  2856   format %{ "vaddpd  $dst,$src1,$src2\t! add packed4D" %}
  2857   ins_encode %{
  2858     bool vector256 = true;
  2859     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2860   %}
  2861   ins_pipe( pipe_slow );
  2862 %}
  2864 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{
  2865   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  2866   match(Set dst (AddVD src (LoadVector mem)));
  2867   format %{ "vaddpd  $dst,$src,$mem\t! add packed4D" %}
  2868   ins_encode %{
  2869     bool vector256 = true;
  2870     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2871   %}
  2872   ins_pipe( pipe_slow );
  2873 %}
  2875 // --------------------------------- SUB --------------------------------------
  2877 // Bytes vector sub
  2878 instruct vsub4B(vecS dst, vecS src) %{
  2879   predicate(n->as_Vector()->length() == 4);
  2880   match(Set dst (SubVB dst src));
  2881   format %{ "psubb   $dst,$src\t! sub packed4B" %}
  2882   ins_encode %{
  2883     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
  2884   %}
  2885   ins_pipe( pipe_slow );
  2886 %}
  2888 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{
  2889   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  2890   match(Set dst (SubVB src1 src2));
  2891   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed4B" %}
  2892   ins_encode %{
  2893     bool vector256 = false;
  2894     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2895   %}
  2896   ins_pipe( pipe_slow );
  2897 %}
  2899 instruct vsub8B(vecD dst, vecD src) %{
  2900   predicate(n->as_Vector()->length() == 8);
  2901   match(Set dst (SubVB dst src));
  2902   format %{ "psubb   $dst,$src\t! sub packed8B" %}
  2903   ins_encode %{
  2904     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
  2905   %}
  2906   ins_pipe( pipe_slow );
  2907 %}
  2909 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{
  2910   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  2911   match(Set dst (SubVB src1 src2));
  2912   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed8B" %}
  2913   ins_encode %{
  2914     bool vector256 = false;
  2915     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2916   %}
  2917   ins_pipe( pipe_slow );
  2918 %}
  2920 instruct vsub16B(vecX dst, vecX src) %{
  2921   predicate(n->as_Vector()->length() == 16);
  2922   match(Set dst (SubVB dst src));
  2923   format %{ "psubb   $dst,$src\t! sub packed16B" %}
  2924   ins_encode %{
  2925     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
  2926   %}
  2927   ins_pipe( pipe_slow );
  2928 %}
  2930 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
  2931   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
  2932   match(Set dst (SubVB src1 src2));
  2933   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed16B" %}
  2934   ins_encode %{
  2935     bool vector256 = false;
  2936     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2937   %}
  2938   ins_pipe( pipe_slow );
  2939 %}
  2941 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{
  2942   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
  2943   match(Set dst (SubVB src (LoadVector mem)));
  2944   format %{ "vpsubb  $dst,$src,$mem\t! sub packed16B" %}
  2945   ins_encode %{
  2946     bool vector256 = false;
  2947     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2948   %}
  2949   ins_pipe( pipe_slow );
  2950 %}
  2952 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{
  2953   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
  2954   match(Set dst (SubVB src1 src2));
  2955   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed32B" %}
  2956   ins_encode %{
  2957     bool vector256 = true;
  2958     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2959   %}
  2960   ins_pipe( pipe_slow );
  2961 %}
  2963 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{
  2964   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
  2965   match(Set dst (SubVB src (LoadVector mem)));
  2966   format %{ "vpsubb  $dst,$src,$mem\t! sub packed32B" %}
  2967   ins_encode %{
  2968     bool vector256 = true;
  2969     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2970   %}
  2971   ins_pipe( pipe_slow );
  2972 %}
  2974 // Shorts/Chars vector sub
  2975 instruct vsub2S(vecS dst, vecS src) %{
  2976   predicate(n->as_Vector()->length() == 2);
  2977   match(Set dst (SubVS dst src));
  2978   format %{ "psubw   $dst,$src\t! sub packed2S" %}
  2979   ins_encode %{
  2980     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
  2981   %}
  2982   ins_pipe( pipe_slow );
  2983 %}
  2985 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{
  2986   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  2987   match(Set dst (SubVS src1 src2));
  2988   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed2S" %}
  2989   ins_encode %{
  2990     bool vector256 = false;
  2991     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2992   %}
  2993   ins_pipe( pipe_slow );
  2994 %}
  2996 instruct vsub4S(vecD dst, vecD src) %{
  2997   predicate(n->as_Vector()->length() == 4);
  2998   match(Set dst (SubVS dst src));
  2999   format %{ "psubw   $dst,$src\t! sub packed4S" %}
  3000   ins_encode %{
  3001     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
  3002   %}
  3003   ins_pipe( pipe_slow );
  3004 %}
  3006 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{
  3007   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3008   match(Set dst (SubVS src1 src2));
  3009   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed4S" %}
  3010   ins_encode %{
  3011     bool vector256 = false;
  3012     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3013   %}
  3014   ins_pipe( pipe_slow );
  3015 %}
  3017 instruct vsub8S(vecX dst, vecX src) %{
  3018   predicate(n->as_Vector()->length() == 8);
  3019   match(Set dst (SubVS dst src));
  3020   format %{ "psubw   $dst,$src\t! sub packed8S" %}
  3021   ins_encode %{
  3022     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
  3023   %}
  3024   ins_pipe( pipe_slow );
  3025 %}
  3027 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
  3028   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3029   match(Set dst (SubVS src1 src2));
  3030   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed8S" %}
  3031   ins_encode %{
  3032     bool vector256 = false;
  3033     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3034   %}
  3035   ins_pipe( pipe_slow );
  3036 %}
  3038 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{
  3039   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3040   match(Set dst (SubVS src (LoadVector mem)));
  3041   format %{ "vpsubw  $dst,$src,$mem\t! sub packed8S" %}
  3042   ins_encode %{
  3043     bool vector256 = false;
  3044     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3045   %}
  3046   ins_pipe( pipe_slow );
  3047 %}
  3049 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{
  3050   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  3051   match(Set dst (SubVS src1 src2));
  3052   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed16S" %}
  3053   ins_encode %{
  3054     bool vector256 = true;
  3055     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3056   %}
  3057   ins_pipe( pipe_slow );
  3058 %}
  3060 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{
  3061   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  3062   match(Set dst (SubVS src (LoadVector mem)));
  3063   format %{ "vpsubw  $dst,$src,$mem\t! sub packed16S" %}
  3064   ins_encode %{
  3065     bool vector256 = true;
  3066     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3067   %}
  3068   ins_pipe( pipe_slow );
  3069 %}
  3071 // Integers vector sub
  3072 instruct vsub2I(vecD dst, vecD src) %{
  3073   predicate(n->as_Vector()->length() == 2);
  3074   match(Set dst (SubVI dst src));
  3075   format %{ "psubd   $dst,$src\t! sub packed2I" %}
  3076   ins_encode %{
  3077     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
  3078   %}
  3079   ins_pipe( pipe_slow );
  3080 %}
  3082 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{
  3083   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3084   match(Set dst (SubVI src1 src2));
  3085   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed2I" %}
  3086   ins_encode %{
  3087     bool vector256 = false;
  3088     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3089   %}
  3090   ins_pipe( pipe_slow );
  3091 %}
  3093 instruct vsub4I(vecX dst, vecX src) %{
  3094   predicate(n->as_Vector()->length() == 4);
  3095   match(Set dst (SubVI dst src));
  3096   format %{ "psubd   $dst,$src\t! sub packed4I" %}
  3097   ins_encode %{
  3098     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
  3099   %}
  3100   ins_pipe( pipe_slow );
  3101 %}
  3103 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
  3104   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3105   match(Set dst (SubVI src1 src2));
  3106   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed4I" %}
  3107   ins_encode %{
  3108     bool vector256 = false;
  3109     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3110   %}
  3111   ins_pipe( pipe_slow );
  3112 %}
  3114 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{
  3115   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3116   match(Set dst (SubVI src (LoadVector mem)));
  3117   format %{ "vpsubd  $dst,$src,$mem\t! sub packed4I" %}
  3118   ins_encode %{
  3119     bool vector256 = false;
  3120     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3121   %}
  3122   ins_pipe( pipe_slow );
  3123 %}
  3125 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{
  3126   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  3127   match(Set dst (SubVI src1 src2));
  3128   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed8I" %}
  3129   ins_encode %{
  3130     bool vector256 = true;
  3131     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3132   %}
  3133   ins_pipe( pipe_slow );
  3134 %}
  3136 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{
  3137   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  3138   match(Set dst (SubVI src (LoadVector mem)));
  3139   format %{ "vpsubd  $dst,$src,$mem\t! sub packed8I" %}
  3140   ins_encode %{
  3141     bool vector256 = true;
  3142     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3143   %}
  3144   ins_pipe( pipe_slow );
  3145 %}
  3147 // Longs vector sub
  3148 instruct vsub2L(vecX dst, vecX src) %{
  3149   predicate(n->as_Vector()->length() == 2);
  3150   match(Set dst (SubVL dst src));
  3151   format %{ "psubq   $dst,$src\t! sub packed2L" %}
  3152   ins_encode %{
  3153     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
  3154   %}
  3155   ins_pipe( pipe_slow );
  3156 %}
  3158 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
  3159   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3160   match(Set dst (SubVL src1 src2));
  3161   format %{ "vpsubq  $dst,$src1,$src2\t! sub packed2L" %}
  3162   ins_encode %{
  3163     bool vector256 = false;
  3164     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3165   %}
  3166   ins_pipe( pipe_slow );
  3167 %}
  3169 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{
  3170   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3171   match(Set dst (SubVL src (LoadVector mem)));
  3172   format %{ "vpsubq  $dst,$src,$mem\t! sub packed2L" %}
  3173   ins_encode %{
  3174     bool vector256 = false;
  3175     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3176   %}
  3177   ins_pipe( pipe_slow );
  3178 %}
  3180 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{
  3181   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
  3182   match(Set dst (SubVL src1 src2));
  3183   format %{ "vpsubq  $dst,$src1,$src2\t! sub packed4L" %}
  3184   ins_encode %{
  3185     bool vector256 = true;
  3186     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3187   %}
  3188   ins_pipe( pipe_slow );
  3189 %}
  3191 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{
  3192   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
  3193   match(Set dst (SubVL src (LoadVector mem)));
  3194   format %{ "vpsubq  $dst,$src,$mem\t! sub packed4L" %}
  3195   ins_encode %{
  3196     bool vector256 = true;
  3197     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3198   %}
  3199   ins_pipe( pipe_slow );
  3200 %}
  3202 // Floats vector sub
  3203 instruct vsub2F(vecD dst, vecD src) %{
  3204   predicate(n->as_Vector()->length() == 2);
  3205   match(Set dst (SubVF dst src));
  3206   format %{ "subps   $dst,$src\t! sub packed2F" %}
  3207   ins_encode %{
  3208     __ subps($dst$$XMMRegister, $src$$XMMRegister);
  3209   %}
  3210   ins_pipe( pipe_slow );
  3211 %}
  3213 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{
  3214   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3215   match(Set dst (SubVF src1 src2));
  3216   format %{ "vsubps  $dst,$src1,$src2\t! sub packed2F" %}
  3217   ins_encode %{
  3218     bool vector256 = false;
  3219     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3220   %}
  3221   ins_pipe( pipe_slow );
  3222 %}
  3224 instruct vsub4F(vecX dst, vecX src) %{
  3225   predicate(n->as_Vector()->length() == 4);
  3226   match(Set dst (SubVF dst src));
  3227   format %{ "subps   $dst,$src\t! sub packed4F" %}
  3228   ins_encode %{
  3229     __ subps($dst$$XMMRegister, $src$$XMMRegister);
  3230   %}
  3231   ins_pipe( pipe_slow );
  3232 %}
  3234 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
  3235   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3236   match(Set dst (SubVF src1 src2));
  3237   format %{ "vsubps  $dst,$src1,$src2\t! sub packed4F" %}
  3238   ins_encode %{
  3239     bool vector256 = false;
  3240     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3241   %}
  3242   ins_pipe( pipe_slow );
  3243 %}
  3245 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{
  3246   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3247   match(Set dst (SubVF src (LoadVector mem)));
  3248   format %{ "vsubps  $dst,$src,$mem\t! sub packed4F" %}
  3249   ins_encode %{
  3250     bool vector256 = false;
  3251     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3252   %}
  3253   ins_pipe( pipe_slow );
  3254 %}
  3256 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{
  3257   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3258   match(Set dst (SubVF src1 src2));
  3259   format %{ "vsubps  $dst,$src1,$src2\t! sub packed8F" %}
  3260   ins_encode %{
  3261     bool vector256 = true;
  3262     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3263   %}
  3264   ins_pipe( pipe_slow );
  3265 %}
  3267 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{
  3268   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3269   match(Set dst (SubVF src (LoadVector mem)));
  3270   format %{ "vsubps  $dst,$src,$mem\t! sub packed8F" %}
  3271   ins_encode %{
  3272     bool vector256 = true;
  3273     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3274   %}
  3275   ins_pipe( pipe_slow );
  3276 %}
  3278 // Doubles vector sub
  3279 instruct vsub2D(vecX dst, vecX src) %{
  3280   predicate(n->as_Vector()->length() == 2);
  3281   match(Set dst (SubVD dst src));
  3282   format %{ "subpd   $dst,$src\t! sub packed2D" %}
  3283   ins_encode %{
  3284     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
  3285   %}
  3286   ins_pipe( pipe_slow );
  3287 %}
  3289 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{
  3290   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3291   match(Set dst (SubVD src1 src2));
  3292   format %{ "vsubpd  $dst,$src1,$src2\t! sub packed2D" %}
  3293   ins_encode %{
  3294     bool vector256 = false;
  3295     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3296   %}
  3297   ins_pipe( pipe_slow );
  3298 %}
  3300 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{
  3301   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3302   match(Set dst (SubVD src (LoadVector mem)));
  3303   format %{ "vsubpd  $dst,$src,$mem\t! sub packed2D" %}
  3304   ins_encode %{
  3305     bool vector256 = false;
  3306     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3307   %}
  3308   ins_pipe( pipe_slow );
  3309 %}
  3311 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{
  3312   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3313   match(Set dst (SubVD src1 src2));
  3314   format %{ "vsubpd  $dst,$src1,$src2\t! sub packed4D" %}
  3315   ins_encode %{
  3316     bool vector256 = true;
  3317     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3318   %}
  3319   ins_pipe( pipe_slow );
  3320 %}
  3322 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{
  3323   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3324   match(Set dst (SubVD src (LoadVector mem)));
  3325   format %{ "vsubpd  $dst,$src,$mem\t! sub packed4D" %}
  3326   ins_encode %{
  3327     bool vector256 = true;
  3328     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3329   %}
  3330   ins_pipe( pipe_slow );
  3331 %}
  3333 // --------------------------------- MUL --------------------------------------
  3335 // Shorts/Chars vector mul
  3336 instruct vmul2S(vecS dst, vecS src) %{
  3337   predicate(n->as_Vector()->length() == 2);
  3338   match(Set dst (MulVS dst src));
  3339   format %{ "pmullw $dst,$src\t! mul packed2S" %}
  3340   ins_encode %{
  3341     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
  3342   %}
  3343   ins_pipe( pipe_slow );
  3344 %}
  3346 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{
  3347   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3348   match(Set dst (MulVS src1 src2));
  3349   format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
  3350   ins_encode %{
  3351     bool vector256 = false;
  3352     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3353   %}
  3354   ins_pipe( pipe_slow );
  3355 %}
  3357 instruct vmul4S(vecD dst, vecD src) %{
  3358   predicate(n->as_Vector()->length() == 4);
  3359   match(Set dst (MulVS dst src));
  3360   format %{ "pmullw  $dst,$src\t! mul packed4S" %}
  3361   ins_encode %{
  3362     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
  3363   %}
  3364   ins_pipe( pipe_slow );
  3365 %}
  3367 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
  3368   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3369   match(Set dst (MulVS src1 src2));
  3370   format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
  3371   ins_encode %{
  3372     bool vector256 = false;
  3373     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3374   %}
  3375   ins_pipe( pipe_slow );
  3376 %}
  3378 instruct vmul8S(vecX dst, vecX src) %{
  3379   predicate(n->as_Vector()->length() == 8);
  3380   match(Set dst (MulVS dst src));
  3381   format %{ "pmullw  $dst,$src\t! mul packed8S" %}
  3382   ins_encode %{
  3383     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
  3384   %}
  3385   ins_pipe( pipe_slow );
  3386 %}
  3388 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
  3389   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3390   match(Set dst (MulVS src1 src2));
  3391   format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
  3392   ins_encode %{
  3393     bool vector256 = false;
  3394     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3395   %}
  3396   ins_pipe( pipe_slow );
  3397 %}
  3399 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{
  3400   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3401   match(Set dst (MulVS src (LoadVector mem)));
  3402   format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
  3403   ins_encode %{
  3404     bool vector256 = false;
  3405     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3406   %}
  3407   ins_pipe( pipe_slow );
  3408 %}
  3410 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{
  3411   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  3412   match(Set dst (MulVS src1 src2));
  3413   format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
  3414   ins_encode %{
  3415     bool vector256 = true;
  3416     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3417   %}
  3418   ins_pipe( pipe_slow );
  3419 %}
  3421 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{
  3422   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  3423   match(Set dst (MulVS src (LoadVector mem)));
  3424   format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
  3425   ins_encode %{
  3426     bool vector256 = true;
  3427     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3428   %}
  3429   ins_pipe( pipe_slow );
  3430 %}
  3432 // Integers vector mul (sse4_1)
  3433 instruct vmul2I(vecD dst, vecD src) %{
  3434   predicate(UseSSE > 3 && n->as_Vector()->length() == 2);
  3435   match(Set dst (MulVI dst src));
  3436   format %{ "pmulld  $dst,$src\t! mul packed2I" %}
  3437   ins_encode %{
  3438     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
  3439   %}
  3440   ins_pipe( pipe_slow );
  3441 %}
  3443 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{
  3444   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3445   match(Set dst (MulVI src1 src2));
  3446   format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %}
  3447   ins_encode %{
  3448     bool vector256 = false;
  3449     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3450   %}
  3451   ins_pipe( pipe_slow );
  3452 %}
  3454 instruct vmul4I(vecX dst, vecX src) %{
  3455   predicate(UseSSE > 3 && n->as_Vector()->length() == 4);
  3456   match(Set dst (MulVI dst src));
  3457   format %{ "pmulld  $dst,$src\t! mul packed4I" %}
  3458   ins_encode %{
  3459     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
  3460   %}
  3461   ins_pipe( pipe_slow );
  3462 %}
  3464 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
  3465   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3466   match(Set dst (MulVI src1 src2));
  3467   format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %}
  3468   ins_encode %{
  3469     bool vector256 = false;
  3470     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3471   %}
  3472   ins_pipe( pipe_slow );
  3473 %}
  3475 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{
  3476   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3477   match(Set dst (MulVI src (LoadVector mem)));
  3478   format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %}
  3479   ins_encode %{
  3480     bool vector256 = false;
  3481     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3482   %}
  3483   ins_pipe( pipe_slow );
  3484 %}
  3486 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{
  3487   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  3488   match(Set dst (MulVI src1 src2));
  3489   format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %}
  3490   ins_encode %{
  3491     bool vector256 = true;
  3492     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3493   %}
  3494   ins_pipe( pipe_slow );
  3495 %}
  3497 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{
  3498   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  3499   match(Set dst (MulVI src (LoadVector mem)));
  3500   format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %}
  3501   ins_encode %{
  3502     bool vector256 = true;
  3503     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3504   %}
  3505   ins_pipe( pipe_slow );
  3506 %}
  3508 // Floats vector mul
  3509 instruct vmul2F(vecD dst, vecD src) %{
  3510   predicate(n->as_Vector()->length() == 2);
  3511   match(Set dst (MulVF dst src));
  3512   format %{ "mulps   $dst,$src\t! mul packed2F" %}
  3513   ins_encode %{
  3514     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
  3515   %}
  3516   ins_pipe( pipe_slow );
  3517 %}
  3519 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{
  3520   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3521   match(Set dst (MulVF src1 src2));
  3522   format %{ "vmulps  $dst,$src1,$src2\t! mul packed2F" %}
  3523   ins_encode %{
  3524     bool vector256 = false;
  3525     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3526   %}
  3527   ins_pipe( pipe_slow );
  3528 %}
  3530 instruct vmul4F(vecX dst, vecX src) %{
  3531   predicate(n->as_Vector()->length() == 4);
  3532   match(Set dst (MulVF dst src));
  3533   format %{ "mulps   $dst,$src\t! mul packed4F" %}
  3534   ins_encode %{
  3535     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
  3536   %}
  3537   ins_pipe( pipe_slow );
  3538 %}
  3540 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
  3541   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3542   match(Set dst (MulVF src1 src2));
  3543   format %{ "vmulps  $dst,$src1,$src2\t! mul packed4F" %}
  3544   ins_encode %{
  3545     bool vector256 = false;
  3546     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3547   %}
  3548   ins_pipe( pipe_slow );
  3549 %}
  3551 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{
  3552   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3553   match(Set dst (MulVF src (LoadVector mem)));
  3554   format %{ "vmulps  $dst,$src,$mem\t! mul packed4F" %}
  3555   ins_encode %{
  3556     bool vector256 = false;
  3557     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3558   %}
  3559   ins_pipe( pipe_slow );
  3560 %}
  3562 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{
  3563   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3564   match(Set dst (MulVF src1 src2));
  3565   format %{ "vmulps  $dst,$src1,$src2\t! mul packed8F" %}
  3566   ins_encode %{
  3567     bool vector256 = true;
  3568     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3569   %}
  3570   ins_pipe( pipe_slow );
  3571 %}
  3573 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{
  3574   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3575   match(Set dst (MulVF src (LoadVector mem)));
  3576   format %{ "vmulps  $dst,$src,$mem\t! mul packed8F" %}
  3577   ins_encode %{
  3578     bool vector256 = true;
  3579     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3580   %}
  3581   ins_pipe( pipe_slow );
  3582 %}
  3584 // Doubles vector mul
  3585 instruct vmul2D(vecX dst, vecX src) %{
  3586   predicate(n->as_Vector()->length() == 2);
  3587   match(Set dst (MulVD dst src));
  3588   format %{ "mulpd   $dst,$src\t! mul packed2D" %}
  3589   ins_encode %{
  3590     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
  3591   %}
  3592   ins_pipe( pipe_slow );
  3593 %}
  3595 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
  3596   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3597   match(Set dst (MulVD src1 src2));
  3598   format %{ "vmulpd  $dst,$src1,$src2\t! mul packed2D" %}
  3599   ins_encode %{
  3600     bool vector256 = false;
  3601     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3602   %}
  3603   ins_pipe( pipe_slow );
  3604 %}
  3606 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{
  3607   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3608   match(Set dst (MulVD src (LoadVector mem)));
  3609   format %{ "vmulpd  $dst,$src,$mem\t! mul packed2D" %}
  3610   ins_encode %{
  3611     bool vector256 = false;
  3612     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3613   %}
  3614   ins_pipe( pipe_slow );
  3615 %}
  3617 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{
  3618   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3619   match(Set dst (MulVD src1 src2));
  3620   format %{ "vmulpd  $dst,$src1,$src2\t! mul packed4D" %}
  3621   ins_encode %{
  3622     bool vector256 = true;
  3623     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3624   %}
  3625   ins_pipe( pipe_slow );
  3626 %}
  3628 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{
  3629   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3630   match(Set dst (MulVD src (LoadVector mem)));
  3631   format %{ "vmulpd  $dst,$src,$mem\t! mul packed4D" %}
  3632   ins_encode %{
  3633     bool vector256 = true;
  3634     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3635   %}
  3636   ins_pipe( pipe_slow );
  3637 %}
  3639 // --------------------------------- DIV --------------------------------------
  3641 // Floats vector div
  3642 instruct vdiv2F(vecD dst, vecD src) %{
  3643   predicate(n->as_Vector()->length() == 2);
  3644   match(Set dst (DivVF dst src));
  3645   format %{ "divps   $dst,$src\t! div packed2F" %}
  3646   ins_encode %{
  3647     __ divps($dst$$XMMRegister, $src$$XMMRegister);
  3648   %}
  3649   ins_pipe( pipe_slow );
  3650 %}
  3652 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{
  3653   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3654   match(Set dst (DivVF src1 src2));
  3655   format %{ "vdivps  $dst,$src1,$src2\t! div packed2F" %}
  3656   ins_encode %{
  3657     bool vector256 = false;
  3658     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3659   %}
  3660   ins_pipe( pipe_slow );
  3661 %}
  3663 instruct vdiv4F(vecX dst, vecX src) %{
  3664   predicate(n->as_Vector()->length() == 4);
  3665   match(Set dst (DivVF dst src));
  3666   format %{ "divps   $dst,$src\t! div packed4F" %}
  3667   ins_encode %{
  3668     __ divps($dst$$XMMRegister, $src$$XMMRegister);
  3669   %}
  3670   ins_pipe( pipe_slow );
  3671 %}
  3673 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{
  3674   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3675   match(Set dst (DivVF src1 src2));
  3676   format %{ "vdivps  $dst,$src1,$src2\t! div packed4F" %}
  3677   ins_encode %{
  3678     bool vector256 = false;
  3679     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3680   %}
  3681   ins_pipe( pipe_slow );
  3682 %}
  3684 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{
  3685   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3686   match(Set dst (DivVF src (LoadVector mem)));
  3687   format %{ "vdivps  $dst,$src,$mem\t! div packed4F" %}
  3688   ins_encode %{
  3689     bool vector256 = false;
  3690     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3691   %}
  3692   ins_pipe( pipe_slow );
  3693 %}
  3695 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{
  3696   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3697   match(Set dst (DivVF src1 src2));
  3698   format %{ "vdivps  $dst,$src1,$src2\t! div packed8F" %}
  3699   ins_encode %{
  3700     bool vector256 = true;
  3701     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3702   %}
  3703   ins_pipe( pipe_slow );
  3704 %}
  3706 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{
  3707   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3708   match(Set dst (DivVF src (LoadVector mem)));
  3709   format %{ "vdivps  $dst,$src,$mem\t! div packed8F" %}
  3710   ins_encode %{
  3711     bool vector256 = true;
  3712     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3713   %}
  3714   ins_pipe( pipe_slow );
  3715 %}
  3717 // Doubles vector div
  3718 instruct vdiv2D(vecX dst, vecX src) %{
  3719   predicate(n->as_Vector()->length() == 2);
  3720   match(Set dst (DivVD dst src));
  3721   format %{ "divpd   $dst,$src\t! div packed2D" %}
  3722   ins_encode %{
  3723     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
  3724   %}
  3725   ins_pipe( pipe_slow );
  3726 %}
  3728 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
  3729   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3730   match(Set dst (DivVD src1 src2));
  3731   format %{ "vdivpd  $dst,$src1,$src2\t! div packed2D" %}
  3732   ins_encode %{
  3733     bool vector256 = false;
  3734     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3735   %}
  3736   ins_pipe( pipe_slow );
  3737 %}
  3739 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{
  3740   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3741   match(Set dst (DivVD src (LoadVector mem)));
  3742   format %{ "vdivpd  $dst,$src,$mem\t! div packed2D" %}
  3743   ins_encode %{
  3744     bool vector256 = false;
  3745     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3746   %}
  3747   ins_pipe( pipe_slow );
  3748 %}
  3750 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{
  3751   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3752   match(Set dst (DivVD src1 src2));
  3753   format %{ "vdivpd  $dst,$src1,$src2\t! div packed4D" %}
  3754   ins_encode %{
  3755     bool vector256 = true;
  3756     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3757   %}
  3758   ins_pipe( pipe_slow );
  3759 %}
  3761 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{
  3762   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3763   match(Set dst (DivVD src (LoadVector mem)));
  3764   format %{ "vdivpd  $dst,$src,$mem\t! div packed4D" %}
  3765   ins_encode %{
  3766     bool vector256 = true;
  3767     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3768   %}
  3769   ins_pipe( pipe_slow );
  3770 %}
  3772 // ------------------------------ Shift ---------------------------------------
  3774 // Left and right shift count vectors are the same on x86
  3775 // (only lowest bits of xmm reg are used for count).
  3776 instruct vshiftcnt(vecS dst, rRegI cnt) %{
  3777   match(Set dst (LShiftCntV cnt));
  3778   match(Set dst (RShiftCntV cnt));
  3779   format %{ "movd    $dst,$cnt\t! load shift count" %}
  3780   ins_encode %{
  3781     __ movdl($dst$$XMMRegister, $cnt$$Register);
  3782   %}
  3783   ins_pipe( pipe_slow );
  3784 %}
  3786 // ------------------------------ LeftShift -----------------------------------
  3788 // Shorts/Chars vector left shift
  3789 instruct vsll2S(vecS dst, vecS shift) %{
  3790   predicate(n->as_Vector()->length() == 2);
  3791   match(Set dst (LShiftVS dst shift));
  3792   format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
  3793   ins_encode %{
  3794     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
  3795   %}
  3796   ins_pipe( pipe_slow );
  3797 %}
  3799 instruct vsll2S_imm(vecS dst, immI8 shift) %{
  3800   predicate(n->as_Vector()->length() == 2);
  3801   match(Set dst (LShiftVS dst shift));
  3802   format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
  3803   ins_encode %{
  3804     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
  3805   %}
  3806   ins_pipe( pipe_slow );
  3807 %}
  3809 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{
  3810   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3811   match(Set dst (LShiftVS src shift));
  3812   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
  3813   ins_encode %{
  3814     bool vector256 = false;
  3815     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  3816   %}
  3817   ins_pipe( pipe_slow );
  3818 %}
  3820 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
  3821   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3822   match(Set dst (LShiftVS src shift));
  3823   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
  3824   ins_encode %{
  3825     bool vector256 = false;
  3826     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  3827   %}
  3828   ins_pipe( pipe_slow );
  3829 %}
  3831 instruct vsll4S(vecD dst, vecS shift) %{
  3832   predicate(n->as_Vector()->length() == 4);
  3833   match(Set dst (LShiftVS dst shift));
  3834   format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
  3835   ins_encode %{
  3836     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
  3837   %}
  3838   ins_pipe( pipe_slow );
  3839 %}
  3841 instruct vsll4S_imm(vecD dst, immI8 shift) %{
  3842   predicate(n->as_Vector()->length() == 4);
  3843   match(Set dst (LShiftVS dst shift));
  3844   format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
  3845   ins_encode %{
  3846     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
  3847   %}
  3848   ins_pipe( pipe_slow );
  3849 %}
  3851 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{
  3852   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3853   match(Set dst (LShiftVS src shift));
  3854   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
  3855   ins_encode %{
  3856     bool vector256 = false;
  3857     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  3858   %}
  3859   ins_pipe( pipe_slow );
  3860 %}
  3862 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
  3863   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3864   match(Set dst (LShiftVS src shift));
  3865   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
  3866   ins_encode %{
  3867     bool vector256 = false;
  3868     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  3869   %}
  3870   ins_pipe( pipe_slow );
  3871 %}
  3873 instruct vsll8S(vecX dst, vecS shift) %{
  3874   predicate(n->as_Vector()->length() == 8);
  3875   match(Set dst (LShiftVS dst shift));
  3876   format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
  3877   ins_encode %{
  3878     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
  3879   %}
  3880   ins_pipe( pipe_slow );
  3881 %}
  3883 instruct vsll8S_imm(vecX dst, immI8 shift) %{
  3884   predicate(n->as_Vector()->length() == 8);
  3885   match(Set dst (LShiftVS dst shift));
  3886   format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
  3887   ins_encode %{
  3888     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
  3889   %}
  3890   ins_pipe( pipe_slow );
  3891 %}
  3893 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{
  3894   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3895   match(Set dst (LShiftVS src shift));
  3896   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
  3897   ins_encode %{
  3898     bool vector256 = false;
  3899     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  3900   %}
  3901   ins_pipe( pipe_slow );
  3902 %}
  3904 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
  3905   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3906   match(Set dst (LShiftVS src shift));
  3907   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
  3908   ins_encode %{
  3909     bool vector256 = false;
  3910     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  3911   %}
  3912   ins_pipe( pipe_slow );
  3913 %}
  3915 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{
  3916   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  3917   match(Set dst (LShiftVS src shift));
  3918   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
  3919   ins_encode %{
  3920     bool vector256 = true;
  3921     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  3922   %}
  3923   ins_pipe( pipe_slow );
  3924 %}
  3926 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
  3927   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  3928   match(Set dst (LShiftVS src shift));
  3929   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
  3930   ins_encode %{
  3931     bool vector256 = true;
  3932     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  3933   %}
  3934   ins_pipe( pipe_slow );
  3935 %}
  3937 // Integers vector left shift
  3938 instruct vsll2I(vecD dst, vecS shift) %{
  3939   predicate(n->as_Vector()->length() == 2);
  3940   match(Set dst (LShiftVI dst shift));
  3941   format %{ "pslld   $dst,$shift\t! left shift packed2I" %}
  3942   ins_encode %{
  3943     __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
  3944   %}
  3945   ins_pipe( pipe_slow );
  3946 %}
  3948 instruct vsll2I_imm(vecD dst, immI8 shift) %{
  3949   predicate(n->as_Vector()->length() == 2);
  3950   match(Set dst (LShiftVI dst shift));
  3951   format %{ "pslld   $dst,$shift\t! left shift packed2I" %}
  3952   ins_encode %{
  3953     __ pslld($dst$$XMMRegister, (int)$shift$$constant);
  3954   %}
  3955   ins_pipe( pipe_slow );
  3956 %}
  3958 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{
  3959   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3960   match(Set dst (LShiftVI src shift));
  3961   format %{ "vpslld  $dst,$src,$shift\t! left shift packed2I" %}
  3962   ins_encode %{
  3963     bool vector256 = false;
  3964     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  3965   %}
  3966   ins_pipe( pipe_slow );
  3967 %}
  3969 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
  3970   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3971   match(Set dst (LShiftVI src shift));
  3972   format %{ "vpslld  $dst,$src,$shift\t! left shift packed2I" %}
  3973   ins_encode %{
  3974     bool vector256 = false;
  3975     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  3976   %}
  3977   ins_pipe( pipe_slow );
  3978 %}
  3980 instruct vsll4I(vecX dst, vecS shift) %{
  3981   predicate(n->as_Vector()->length() == 4);
  3982   match(Set dst (LShiftVI dst shift));
  3983   format %{ "pslld   $dst,$shift\t! left shift packed4I" %}
  3984   ins_encode %{
  3985     __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
  3986   %}
  3987   ins_pipe( pipe_slow );
  3988 %}
  3990 instruct vsll4I_imm(vecX dst, immI8 shift) %{
  3991   predicate(n->as_Vector()->length() == 4);
  3992   match(Set dst (LShiftVI dst shift));
  3993   format %{ "pslld   $dst,$shift\t! left shift packed4I" %}
  3994   ins_encode %{
  3995     __ pslld($dst$$XMMRegister, (int)$shift$$constant);
  3996   %}
  3997   ins_pipe( pipe_slow );
  3998 %}
  4000 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{
  4001   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  4002   match(Set dst (LShiftVI src shift));
  4003   format %{ "vpslld  $dst,$src,$shift\t! left shift packed4I" %}
  4004   ins_encode %{
  4005     bool vector256 = false;
  4006     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4007   %}
  4008   ins_pipe( pipe_slow );
  4009 %}
  4011 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
  4012   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  4013   match(Set dst (LShiftVI src shift));
  4014   format %{ "vpslld  $dst,$src,$shift\t! left shift packed4I" %}
  4015   ins_encode %{
  4016     bool vector256 = false;
  4017     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4018   %}
  4019   ins_pipe( pipe_slow );
  4020 %}
  4022 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{
  4023   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  4024   match(Set dst (LShiftVI src shift));
  4025   format %{ "vpslld  $dst,$src,$shift\t! left shift packed8I" %}
  4026   ins_encode %{
  4027     bool vector256 = true;
  4028     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4029   %}
  4030   ins_pipe( pipe_slow );
  4031 %}
  4033 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
  4034   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  4035   match(Set dst (LShiftVI src shift));
  4036   format %{ "vpslld  $dst,$src,$shift\t! left shift packed8I" %}
  4037   ins_encode %{
  4038     bool vector256 = true;
  4039     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4040   %}
  4041   ins_pipe( pipe_slow );
  4042 %}
  4044 // Longs vector left shift
  4045 instruct vsll2L(vecX dst, vecS shift) %{
  4046   predicate(n->as_Vector()->length() == 2);
  4047   match(Set dst (LShiftVL dst shift));
  4048   format %{ "psllq   $dst,$shift\t! left shift packed2L" %}
  4049   ins_encode %{
  4050     __ psllq($dst$$XMMRegister, $shift$$XMMRegister);
  4051   %}
  4052   ins_pipe( pipe_slow );
  4053 %}
  4055 instruct vsll2L_imm(vecX dst, immI8 shift) %{
  4056   predicate(n->as_Vector()->length() == 2);
  4057   match(Set dst (LShiftVL dst shift));
  4058   format %{ "psllq   $dst,$shift\t! left shift packed2L" %}
  4059   ins_encode %{
  4060     __ psllq($dst$$XMMRegister, (int)$shift$$constant);
  4061   %}
  4062   ins_pipe( pipe_slow );
  4063 %}
  4065 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{
  4066   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  4067   match(Set dst (LShiftVL src shift));
  4068   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed2L" %}
  4069   ins_encode %{
  4070     bool vector256 = false;
  4071     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4072   %}
  4073   ins_pipe( pipe_slow );
  4074 %}
  4076 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
  4077   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  4078   match(Set dst (LShiftVL src shift));
  4079   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed2L" %}
  4080   ins_encode %{
  4081     bool vector256 = false;
  4082     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4083   %}
  4084   ins_pipe( pipe_slow );
  4085 %}
  4087 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{
  4088   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
  4089   match(Set dst (LShiftVL src shift));
  4090   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed4L" %}
  4091   ins_encode %{
  4092     bool vector256 = true;
  4093     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4094   %}
  4095   ins_pipe( pipe_slow );
  4096 %}
  4098 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
  4099   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
  4100   match(Set dst (LShiftVL src shift));
  4101   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed4L" %}
  4102   ins_encode %{
  4103     bool vector256 = true;
  4104     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4105   %}
  4106   ins_pipe( pipe_slow );
  4107 %}
  4109 // ----------------------- LogicalRightShift -----------------------------------
  4111 // Shorts vector logical right shift produces incorrect Java result
  4112 // for negative data because java code convert short value into int with
  4113 // sign extension before a shift. But char vectors are fine since chars are
  4114 // unsigned values.
  4116 instruct vsrl2S(vecS dst, vecS shift) %{
  4117   predicate(n->as_Vector()->length() == 2);
  4118   match(Set dst (URShiftVS dst shift));
  4119   format %{ "psrlw   $dst,$shift\t! logical right shift packed2S" %}
  4120   ins_encode %{
  4121     __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
  4122   %}
  4123   ins_pipe( pipe_slow );
  4124 %}
  4126 instruct vsrl2S_imm(vecS dst, immI8 shift) %{
  4127   predicate(n->as_Vector()->length() == 2);
  4128   match(Set dst (URShiftVS dst shift));
  4129   format %{ "psrlw   $dst,$shift\t! logical right shift packed2S" %}
  4130   ins_encode %{
  4131     __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
  4132   %}
  4133   ins_pipe( pipe_slow );
  4134 %}
  4136 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{
  4137   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  4138   match(Set dst (URShiftVS src shift));
  4139   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
  4140   ins_encode %{
  4141     bool vector256 = false;
  4142     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4143   %}
  4144   ins_pipe( pipe_slow );
  4145 %}
  4147 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
  4148   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  4149   match(Set dst (URShiftVS src shift));
  4150   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
  4151   ins_encode %{
  4152     bool vector256 = false;
  4153     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4154   %}
  4155   ins_pipe( pipe_slow );
  4156 %}
  4158 instruct vsrl4S(vecD dst, vecS shift) %{
  4159   predicate(n->as_Vector()->length() == 4);
  4160   match(Set dst (URShiftVS dst shift));
  4161   format %{ "psrlw   $dst,$shift\t! logical right shift packed4S" %}
  4162   ins_encode %{
  4163     __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
  4164   %}
  4165   ins_pipe( pipe_slow );
  4166 %}
  4168 instruct vsrl4S_imm(vecD dst, immI8 shift) %{
  4169   predicate(n->as_Vector()->length() == 4);
  4170   match(Set dst (URShiftVS dst shift));
  4171   format %{ "psrlw   $dst,$shift\t! logical right shift packed4S" %}
  4172   ins_encode %{
  4173     __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
  4174   %}
  4175   ins_pipe( pipe_slow );
  4176 %}
  4178 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{
  4179   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  4180   match(Set dst (URShiftVS src shift));
  4181   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
  4182   ins_encode %{
  4183     bool vector256 = false;
  4184     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4185   %}
  4186   ins_pipe( pipe_slow );
  4187 %}
  4189 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
  4190   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  4191   match(Set dst (URShiftVS src shift));
  4192   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
  4193   ins_encode %{
  4194     bool vector256 = false;
  4195     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4196   %}
  4197   ins_pipe( pipe_slow );
  4198 %}
  4200 instruct vsrl8S(vecX dst, vecS shift) %{
  4201   predicate(n->as_Vector()->length() == 8);
  4202   match(Set dst (URShiftVS dst shift));
  4203   format %{ "psrlw   $dst,$shift\t! logical right shift packed8S" %}
  4204   ins_encode %{
  4205     __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
  4206   %}
  4207   ins_pipe( pipe_slow );
  4208 %}
  4210 instruct vsrl8S_imm(vecX dst, immI8 shift) %{
  4211   predicate(n->as_Vector()->length() == 8);
  4212   match(Set dst (URShiftVS dst shift));
  4213   format %{ "psrlw   $dst,$shift\t! logical right shift packed8S" %}
  4214   ins_encode %{
  4215     __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
  4216   %}
  4217   ins_pipe( pipe_slow );
  4218 %}
  4220 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{
  4221   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  4222   match(Set dst (URShiftVS src shift));
  4223   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
  4224   ins_encode %{
  4225     bool vector256 = false;
  4226     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4227   %}
  4228   ins_pipe( pipe_slow );
  4229 %}
  4231 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
  4232   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  4233   match(Set dst (URShiftVS src shift));
  4234   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
  4235   ins_encode %{
  4236     bool vector256 = false;
  4237     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4238   %}
  4239   ins_pipe( pipe_slow );
  4240 %}
  4242 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{
  4243   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  4244   match(Set dst (URShiftVS src shift));
  4245   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
  4246   ins_encode %{
  4247     bool vector256 = true;
  4248     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4249   %}
  4250   ins_pipe( pipe_slow );
  4251 %}
  4253 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
  4254   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  4255   match(Set dst (URShiftVS src shift));
  4256   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
  4257   ins_encode %{
  4258     bool vector256 = true;
  4259     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4260   %}
  4261   ins_pipe( pipe_slow );
  4262 %}
  4264 // Integers vector logical right shift
  4265 instruct vsrl2I(vecD dst, vecS shift) %{
  4266   predicate(n->as_Vector()->length() == 2);
  4267   match(Set dst (URShiftVI dst shift));
  4268   format %{ "psrld   $dst,$shift\t! logical right shift packed2I" %}
  4269   ins_encode %{
  4270     __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
  4271   %}
  4272   ins_pipe( pipe_slow );
  4273 %}
  4275 instruct vsrl2I_imm(vecD dst, immI8 shift) %{
  4276   predicate(n->as_Vector()->length() == 2);
  4277   match(Set dst (URShiftVI dst shift));
  4278   format %{ "psrld   $dst,$shift\t! logical right shift packed2I" %}
  4279   ins_encode %{
  4280     __ psrld($dst$$XMMRegister, (int)$shift$$constant);
  4281   %}
  4282   ins_pipe( pipe_slow );
  4283 %}
  4285 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{
  4286   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  4287   match(Set dst (URShiftVI src shift));
  4288   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed2I" %}
  4289   ins_encode %{
  4290     bool vector256 = false;
  4291     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4292   %}
  4293   ins_pipe( pipe_slow );
  4294 %}
  4296 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
  4297   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  4298   match(Set dst (URShiftVI src shift));
  4299   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed2I" %}
  4300   ins_encode %{
  4301     bool vector256 = false;
  4302     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4303   %}
  4304   ins_pipe( pipe_slow );
  4305 %}
  4307 instruct vsrl4I(vecX dst, vecS shift) %{
  4308   predicate(n->as_Vector()->length() == 4);
  4309   match(Set dst (URShiftVI dst shift));
  4310   format %{ "psrld   $dst,$shift\t! logical right shift packed4I" %}
  4311   ins_encode %{
  4312     __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
  4313   %}
  4314   ins_pipe( pipe_slow );
  4315 %}
  4317 instruct vsrl4I_imm(vecX dst, immI8 shift) %{
  4318   predicate(n->as_Vector()->length() == 4);
  4319   match(Set dst (URShiftVI dst shift));
  4320   format %{ "psrld   $dst,$shift\t! logical right shift packed4I" %}
  4321   ins_encode %{
  4322     __ psrld($dst$$XMMRegister, (int)$shift$$constant);
  4323   %}
  4324   ins_pipe( pipe_slow );
  4325 %}
  4327 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{
  4328   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  4329   match(Set dst (URShiftVI src shift));
  4330   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed4I" %}
  4331   ins_encode %{
  4332     bool vector256 = false;
  4333     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4334   %}
  4335   ins_pipe( pipe_slow );
  4336 %}
  4338 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
  4339   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  4340   match(Set dst (URShiftVI src shift));
  4341   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed4I" %}
  4342   ins_encode %{
  4343     bool vector256 = false;
  4344     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4345   %}
  4346   ins_pipe( pipe_slow );
  4347 %}
  4349 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{
  4350   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  4351   match(Set dst (URShiftVI src shift));
  4352   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed8I" %}
  4353   ins_encode %{
  4354     bool vector256 = true;
  4355     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4356   %}
  4357   ins_pipe( pipe_slow );
  4358 %}
  4360 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
  4361   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  4362   match(Set dst (URShiftVI src shift));
  4363   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed8I" %}
  4364   ins_encode %{
  4365     bool vector256 = true;
  4366     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4367   %}
  4368   ins_pipe( pipe_slow );
  4369 %}
  4371 // Longs vector logical right shift
  4372 instruct vsrl2L(vecX dst, vecS shift) %{
  4373   predicate(n->as_Vector()->length() == 2);
  4374   match(Set dst (URShiftVL dst shift));
  4375   format %{ "psrlq   $dst,$shift\t! logical right shift packed2L" %}
  4376   ins_encode %{
  4377     __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
  4378   %}
  4379   ins_pipe( pipe_slow );
  4380 %}
  4382 instruct vsrl2L_imm(vecX dst, immI8 shift) %{
  4383   predicate(n->as_Vector()->length() == 2);
  4384   match(Set dst (URShiftVL dst shift));
  4385   format %{ "psrlq   $dst,$shift\t! logical right shift packed2L" %}
  4386   ins_encode %{
  4387     __ psrlq($dst$$XMMRegister, (int)$shift$$constant);
  4388   %}
  4389   ins_pipe( pipe_slow );
  4390 %}
  4392 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{
  4393   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  4394   match(Set dst (URShiftVL src shift));
  4395   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed2L" %}
  4396   ins_encode %{
  4397     bool vector256 = false;
  4398     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4399   %}
  4400   ins_pipe( pipe_slow );
  4401 %}
  4403 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
  4404   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  4405   match(Set dst (URShiftVL src shift));
  4406   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed2L" %}
  4407   ins_encode %{
  4408     bool vector256 = false;
  4409     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4410   %}
  4411   ins_pipe( pipe_slow );
  4412 %}
  4414 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{
  4415   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
  4416   match(Set dst (URShiftVL src shift));
  4417   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed4L" %}
  4418   ins_encode %{
  4419     bool vector256 = true;
  4420     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4421   %}
  4422   ins_pipe( pipe_slow );
  4423 %}
  4425 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
  4426   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
  4427   match(Set dst (URShiftVL src shift));
  4428   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed4L" %}
  4429   ins_encode %{
  4430     bool vector256 = true;
  4431     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4432   %}
  4433   ins_pipe( pipe_slow );
  4434 %}
  4436 // ------------------- ArithmeticRightShift -----------------------------------
  4438 // Shorts/Chars vector arithmetic right shift
  4439 instruct vsra2S(vecS dst, vecS shift) %{
  4440   predicate(n->as_Vector()->length() == 2);
  4441   match(Set dst (RShiftVS dst shift));
  4442   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
  4443   ins_encode %{
  4444     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
  4445   %}
  4446   ins_pipe( pipe_slow );
  4447 %}
  4449 instruct vsra2S_imm(vecS dst, immI8 shift) %{
  4450   predicate(n->as_Vector()->length() == 2);
  4451   match(Set dst (RShiftVS dst shift));
  4452   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
  4453   ins_encode %{
  4454     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
  4455   %}
  4456   ins_pipe( pipe_slow );
  4457 %}
  4459 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{
  4460   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  4461   match(Set dst (RShiftVS src shift));
  4462   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
  4463   ins_encode %{
  4464     bool vector256 = false;
  4465     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4466   %}
  4467   ins_pipe( pipe_slow );
  4468 %}
  4470 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
  4471   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  4472   match(Set dst (RShiftVS src shift));
  4473   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
  4474   ins_encode %{
  4475     bool vector256 = false;
  4476     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4477   %}
  4478   ins_pipe( pipe_slow );
  4479 %}
  4481 instruct vsra4S(vecD dst, vecS shift) %{
  4482   predicate(n->as_Vector()->length() == 4);
  4483   match(Set dst (RShiftVS dst shift));
  4484   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
  4485   ins_encode %{
  4486     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
  4487   %}
  4488   ins_pipe( pipe_slow );
  4489 %}
  4491 instruct vsra4S_imm(vecD dst, immI8 shift) %{
  4492   predicate(n->as_Vector()->length() == 4);
  4493   match(Set dst (RShiftVS dst shift));
  4494   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
  4495   ins_encode %{
  4496     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
  4497   %}
  4498   ins_pipe( pipe_slow );
  4499 %}
  4501 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{
  4502   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  4503   match(Set dst (RShiftVS src shift));
  4504   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
  4505   ins_encode %{
  4506     bool vector256 = false;
  4507     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4508   %}
  4509   ins_pipe( pipe_slow );
  4510 %}
  4512 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
  4513   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  4514   match(Set dst (RShiftVS src shift));
  4515   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
  4516   ins_encode %{
  4517     bool vector256 = false;
  4518     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4519   %}
  4520   ins_pipe( pipe_slow );
  4521 %}
  4523 instruct vsra8S(vecX dst, vecS shift) %{
  4524   predicate(n->as_Vector()->length() == 8);
  4525   match(Set dst (RShiftVS dst shift));
  4526   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
  4527   ins_encode %{
  4528     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
  4529   %}
  4530   ins_pipe( pipe_slow );
  4531 %}
  4533 instruct vsra8S_imm(vecX dst, immI8 shift) %{
  4534   predicate(n->as_Vector()->length() == 8);
  4535   match(Set dst (RShiftVS dst shift));
  4536   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
  4537   ins_encode %{
  4538     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
  4539   %}
  4540   ins_pipe( pipe_slow );
  4541 %}
  4543 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{
  4544   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  4545   match(Set dst (RShiftVS src shift));
  4546   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
  4547   ins_encode %{
  4548     bool vector256 = false;
  4549     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4550   %}
  4551   ins_pipe( pipe_slow );
  4552 %}
  4554 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
  4555   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  4556   match(Set dst (RShiftVS src shift));
  4557   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
  4558   ins_encode %{
  4559     bool vector256 = false;
  4560     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4561   %}
  4562   ins_pipe( pipe_slow );
  4563 %}
  4565 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{
  4566   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  4567   match(Set dst (RShiftVS src shift));
  4568   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
  4569   ins_encode %{
  4570     bool vector256 = true;
  4571     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4572   %}
  4573   ins_pipe( pipe_slow );
  4574 %}
  4576 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
  4577   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  4578   match(Set dst (RShiftVS src shift));
  4579   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
  4580   ins_encode %{
  4581     bool vector256 = true;
  4582     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4583   %}
  4584   ins_pipe( pipe_slow );
  4585 %}
  4587 // Integers vector arithmetic right shift
  4588 instruct vsra2I(vecD dst, vecS shift) %{
  4589   predicate(n->as_Vector()->length() == 2);
  4590   match(Set dst (RShiftVI dst shift));
  4591   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed2I" %}
  4592   ins_encode %{
  4593     __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
  4594   %}
  4595   ins_pipe( pipe_slow );
  4596 %}
  4598 instruct vsra2I_imm(vecD dst, immI8 shift) %{
  4599   predicate(n->as_Vector()->length() == 2);
  4600   match(Set dst (RShiftVI dst shift));
  4601   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed2I" %}
  4602   ins_encode %{
  4603     __ psrad($dst$$XMMRegister, (int)$shift$$constant);
  4604   %}
  4605   ins_pipe( pipe_slow );
  4606 %}
  4608 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{
  4609   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  4610   match(Set dst (RShiftVI src shift));
  4611   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed2I" %}
  4612   ins_encode %{
  4613     bool vector256 = false;
  4614     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4615   %}
  4616   ins_pipe( pipe_slow );
  4617 %}
  4619 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
  4620   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  4621   match(Set dst (RShiftVI src shift));
  4622   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed2I" %}
  4623   ins_encode %{
  4624     bool vector256 = false;
  4625     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4626   %}
  4627   ins_pipe( pipe_slow );
  4628 %}
  4630 instruct vsra4I(vecX dst, vecS shift) %{
  4631   predicate(n->as_Vector()->length() == 4);
  4632   match(Set dst (RShiftVI dst shift));
  4633   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed4I" %}
  4634   ins_encode %{
  4635     __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
  4636   %}
  4637   ins_pipe( pipe_slow );
  4638 %}
  4640 instruct vsra4I_imm(vecX dst, immI8 shift) %{
  4641   predicate(n->as_Vector()->length() == 4);
  4642   match(Set dst (RShiftVI dst shift));
  4643   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed4I" %}
  4644   ins_encode %{
  4645     __ psrad($dst$$XMMRegister, (int)$shift$$constant);
  4646   %}
  4647   ins_pipe( pipe_slow );
  4648 %}
  4650 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{
  4651   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  4652   match(Set dst (RShiftVI src shift));
  4653   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed4I" %}
  4654   ins_encode %{
  4655     bool vector256 = false;
  4656     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4657   %}
  4658   ins_pipe( pipe_slow );
  4659 %}
  4661 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
  4662   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  4663   match(Set dst (RShiftVI src shift));
  4664   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed4I" %}
  4665   ins_encode %{
  4666     bool vector256 = false;
  4667     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4668   %}
  4669   ins_pipe( pipe_slow );
  4670 %}
  4672 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{
  4673   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  4674   match(Set dst (RShiftVI src shift));
  4675   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed8I" %}
  4676   ins_encode %{
  4677     bool vector256 = true;
  4678     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4679   %}
  4680   ins_pipe( pipe_slow );
  4681 %}
  4683 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
  4684   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  4685   match(Set dst (RShiftVI src shift));
  4686   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed8I" %}
  4687   ins_encode %{
  4688     bool vector256 = true;
  4689     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4690   %}
  4691   ins_pipe( pipe_slow );
  4692 %}
  4694 // There are no longs vector arithmetic right shift instructions.
  4697 // --------------------------------- AND --------------------------------------
  4699 instruct vand4B(vecS dst, vecS src) %{
  4700   predicate(n->as_Vector()->length_in_bytes() == 4);
  4701   match(Set dst (AndV dst src));
  4702   format %{ "pand    $dst,$src\t! and vectors (4 bytes)" %}
  4703   ins_encode %{
  4704     __ pand($dst$$XMMRegister, $src$$XMMRegister);
  4705   %}
  4706   ins_pipe( pipe_slow );
  4707 %}
  4709 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{
  4710   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
  4711   match(Set dst (AndV src1 src2));
  4712   format %{ "vpand   $dst,$src1,$src2\t! and vectors (4 bytes)" %}
  4713   ins_encode %{
  4714     bool vector256 = false;
  4715     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4716   %}
  4717   ins_pipe( pipe_slow );
  4718 %}
  4720 instruct vand8B(vecD dst, vecD src) %{
  4721   predicate(n->as_Vector()->length_in_bytes() == 8);
  4722   match(Set dst (AndV dst src));
  4723   format %{ "pand    $dst,$src\t! and vectors (8 bytes)" %}
  4724   ins_encode %{
  4725     __ pand($dst$$XMMRegister, $src$$XMMRegister);
  4726   %}
  4727   ins_pipe( pipe_slow );
  4728 %}
  4730 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{
  4731   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
  4732   match(Set dst (AndV src1 src2));
  4733   format %{ "vpand   $dst,$src1,$src2\t! and vectors (8 bytes)" %}
  4734   ins_encode %{
  4735     bool vector256 = false;
  4736     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4737   %}
  4738   ins_pipe( pipe_slow );
  4739 %}
  4741 instruct vand16B(vecX dst, vecX src) %{
  4742   predicate(n->as_Vector()->length_in_bytes() == 16);
  4743   match(Set dst (AndV dst src));
  4744   format %{ "pand    $dst,$src\t! and vectors (16 bytes)" %}
  4745   ins_encode %{
  4746     __ pand($dst$$XMMRegister, $src$$XMMRegister);
  4747   %}
  4748   ins_pipe( pipe_slow );
  4749 %}
  4751 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{
  4752   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
  4753   match(Set dst (AndV src1 src2));
  4754   format %{ "vpand   $dst,$src1,$src2\t! and vectors (16 bytes)" %}
  4755   ins_encode %{
  4756     bool vector256 = false;
  4757     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4758   %}
  4759   ins_pipe( pipe_slow );
  4760 %}
  4762 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{
  4763   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
  4764   match(Set dst (AndV src (LoadVector mem)));
  4765   format %{ "vpand   $dst,$src,$mem\t! and vectors (16 bytes)" %}
  4766   ins_encode %{
  4767     bool vector256 = false;
  4768     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  4769   %}
  4770   ins_pipe( pipe_slow );
  4771 %}
  4773 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{
  4774   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
  4775   match(Set dst (AndV src1 src2));
  4776   format %{ "vpand   $dst,$src1,$src2\t! and vectors (32 bytes)" %}
  4777   ins_encode %{
  4778     bool vector256 = true;
  4779     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4780   %}
  4781   ins_pipe( pipe_slow );
  4782 %}
  4784 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{
  4785   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
  4786   match(Set dst (AndV src (LoadVector mem)));
  4787   format %{ "vpand   $dst,$src,$mem\t! and vectors (32 bytes)" %}
  4788   ins_encode %{
  4789     bool vector256 = true;
  4790     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  4791   %}
  4792   ins_pipe( pipe_slow );
  4793 %}
  4795 // --------------------------------- OR ---------------------------------------
  4797 instruct vor4B(vecS dst, vecS src) %{
  4798   predicate(n->as_Vector()->length_in_bytes() == 4);
  4799   match(Set dst (OrV dst src));
  4800   format %{ "por     $dst,$src\t! or vectors (4 bytes)" %}
  4801   ins_encode %{
  4802     __ por($dst$$XMMRegister, $src$$XMMRegister);
  4803   %}
  4804   ins_pipe( pipe_slow );
  4805 %}
  4807 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{
  4808   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
  4809   match(Set dst (OrV src1 src2));
  4810   format %{ "vpor    $dst,$src1,$src2\t! or vectors (4 bytes)" %}
  4811   ins_encode %{
  4812     bool vector256 = false;
  4813     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4814   %}
  4815   ins_pipe( pipe_slow );
  4816 %}
  4818 instruct vor8B(vecD dst, vecD src) %{
  4819   predicate(n->as_Vector()->length_in_bytes() == 8);
  4820   match(Set dst (OrV dst src));
  4821   format %{ "por     $dst,$src\t! or vectors (8 bytes)" %}
  4822   ins_encode %{
  4823     __ por($dst$$XMMRegister, $src$$XMMRegister);
  4824   %}
  4825   ins_pipe( pipe_slow );
  4826 %}
  4828 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{
  4829   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
  4830   match(Set dst (OrV src1 src2));
  4831   format %{ "vpor    $dst,$src1,$src2\t! or vectors (8 bytes)" %}
  4832   ins_encode %{
  4833     bool vector256 = false;
  4834     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4835   %}
  4836   ins_pipe( pipe_slow );
  4837 %}
  4839 instruct vor16B(vecX dst, vecX src) %{
  4840   predicate(n->as_Vector()->length_in_bytes() == 16);
  4841   match(Set dst (OrV dst src));
  4842   format %{ "por     $dst,$src\t! or vectors (16 bytes)" %}
  4843   ins_encode %{
  4844     __ por($dst$$XMMRegister, $src$$XMMRegister);
  4845   %}
  4846   ins_pipe( pipe_slow );
  4847 %}
  4849 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{
  4850   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
  4851   match(Set dst (OrV src1 src2));
  4852   format %{ "vpor    $dst,$src1,$src2\t! or vectors (16 bytes)" %}
  4853   ins_encode %{
  4854     bool vector256 = false;
  4855     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4856   %}
  4857   ins_pipe( pipe_slow );
  4858 %}
  4860 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{
  4861   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
  4862   match(Set dst (OrV src (LoadVector mem)));
  4863   format %{ "vpor    $dst,$src,$mem\t! or vectors (16 bytes)" %}
  4864   ins_encode %{
  4865     bool vector256 = false;
  4866     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  4867   %}
  4868   ins_pipe( pipe_slow );
  4869 %}
  4871 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{
  4872   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
  4873   match(Set dst (OrV src1 src2));
  4874   format %{ "vpor    $dst,$src1,$src2\t! or vectors (32 bytes)" %}
  4875   ins_encode %{
  4876     bool vector256 = true;
  4877     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4878   %}
  4879   ins_pipe( pipe_slow );
  4880 %}
  4882 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{
  4883   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
  4884   match(Set dst (OrV src (LoadVector mem)));
  4885   format %{ "vpor    $dst,$src,$mem\t! or vectors (32 bytes)" %}
  4886   ins_encode %{
  4887     bool vector256 = true;
  4888     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  4889   %}
  4890   ins_pipe( pipe_slow );
  4891 %}
  4893 // --------------------------------- XOR --------------------------------------
  4895 instruct vxor4B(vecS dst, vecS src) %{
  4896   predicate(n->as_Vector()->length_in_bytes() == 4);
  4897   match(Set dst (XorV dst src));
  4898   format %{ "pxor    $dst,$src\t! xor vectors (4 bytes)" %}
  4899   ins_encode %{
  4900     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
  4901   %}
  4902   ins_pipe( pipe_slow );
  4903 %}
  4905 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{
  4906   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
  4907   match(Set dst (XorV src1 src2));
  4908   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (4 bytes)" %}
  4909   ins_encode %{
  4910     bool vector256 = false;
  4911     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4912   %}
  4913   ins_pipe( pipe_slow );
  4914 %}
  4916 instruct vxor8B(vecD dst, vecD src) %{
  4917   predicate(n->as_Vector()->length_in_bytes() == 8);
  4918   match(Set dst (XorV dst src));
  4919   format %{ "pxor    $dst,$src\t! xor vectors (8 bytes)" %}
  4920   ins_encode %{
  4921     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
  4922   %}
  4923   ins_pipe( pipe_slow );
  4924 %}
  4926 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{
  4927   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
  4928   match(Set dst (XorV src1 src2));
  4929   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (8 bytes)" %}
  4930   ins_encode %{
  4931     bool vector256 = false;
  4932     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4933   %}
  4934   ins_pipe( pipe_slow );
  4935 %}
  4937 instruct vxor16B(vecX dst, vecX src) %{
  4938   predicate(n->as_Vector()->length_in_bytes() == 16);
  4939   match(Set dst (XorV dst src));
  4940   format %{ "pxor    $dst,$src\t! xor vectors (16 bytes)" %}
  4941   ins_encode %{
  4942     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
  4943   %}
  4944   ins_pipe( pipe_slow );
  4945 %}
  4947 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{
  4948   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
  4949   match(Set dst (XorV src1 src2));
  4950   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (16 bytes)" %}
  4951   ins_encode %{
  4952     bool vector256 = false;
  4953     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4954   %}
  4955   ins_pipe( pipe_slow );
  4956 %}
  4958 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{
  4959   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
  4960   match(Set dst (XorV src (LoadVector mem)));
  4961   format %{ "vpxor   $dst,$src,$mem\t! xor vectors (16 bytes)" %}
  4962   ins_encode %{
  4963     bool vector256 = false;
  4964     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  4965   %}
  4966   ins_pipe( pipe_slow );
  4967 %}
  4969 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{
  4970   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
  4971   match(Set dst (XorV src1 src2));
  4972   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (32 bytes)" %}
  4973   ins_encode %{
  4974     bool vector256 = true;
  4975     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4976   %}
  4977   ins_pipe( pipe_slow );
  4978 %}
  4980 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{
  4981   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
  4982   match(Set dst (XorV src (LoadVector mem)));
  4983   format %{ "vpxor   $dst,$src,$mem\t! xor vectors (32 bytes)" %}
  4984   ins_encode %{
  4985     bool vector256 = true;
  4986     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  4987   %}
  4988   ins_pipe( pipe_slow );
  4989 %}

mercurial