src/cpu/x86/vm/x86.ad

Mon, 20 Aug 2012 09:07:21 -0700

author
kvn
date
Mon, 20 Aug 2012 09:07:21 -0700
changeset 4001
006050192a5a
parent 3929
2c368ea3e844
child 4037
da91efe96a93
permissions
-rw-r--r--

6340864: Implement vectorization optimizations in hotspot-server
Summary: Added asm encoding and mach nodes for vector arithmetic instructions on x86.
Reviewed-by: roland

     1 //
     2 // Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
     3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4 //
     5 // This code is free software; you can redistribute it and/or modify it
     6 // under the terms of the GNU General Public License version 2 only, as
     7 // published by the Free Software Foundation.
     8 //
     9 // This code is distributed in the hope that it will be useful, but WITHOUT
    10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12 // version 2 for more details (a copy is included in the LICENSE file that
    13 // accompanied this code).
    14 //
    15 // You should have received a copy of the GNU General Public License version
    16 // 2 along with this work; if not, write to the Free Software Foundation,
    17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18 //
    19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    20 // or visit www.oracle.com if you need additional information or have any
    21 // questions.
    22 //
    23 //
    25 // X86 Common Architecture Description File
    27 //----------REGISTER DEFINITION BLOCK------------------------------------------
    28 // This information is used by the matcher and the register allocator to
    29 // describe individual registers and classes of registers within the target
    30 // archtecture.
    32 register %{
    33 //----------Architecture Description Register Definitions----------------------
    34 // General Registers
    35 // "reg_def"  name ( register save type, C convention save type,
    36 //                   ideal register type, encoding );
    37 // Register Save Types:
    38 //
    39 // NS  = No-Save:       The register allocator assumes that these registers
    40 //                      can be used without saving upon entry to the method, &
    41 //                      that they do not need to be saved at call sites.
    42 //
    43 // SOC = Save-On-Call:  The register allocator assumes that these registers
    44 //                      can be used without saving upon entry to the method,
    45 //                      but that they must be saved at call sites.
    46 //
    47 // SOE = Save-On-Entry: The register allocator assumes that these registers
    48 //                      must be saved before using them upon entry to the
    49 //                      method, but they do not need to be saved at call
    50 //                      sites.
    51 //
    52 // AS  = Always-Save:   The register allocator assumes that these registers
    53 //                      must be saved before using them upon entry to the
    54 //                      method, & that they must be saved at call sites.
    55 //
    56 // Ideal Register Type is used to determine how to save & restore a
    57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
    58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
    59 //
    60 // The encoding number is the actual bit-pattern placed into the opcodes.
    62 // XMM registers.  256-bit registers or 8 words each, labeled (a)-h.
    63 // Word a in each register holds a Float, words ab hold a Double.
    64 // The whole registers are used in SSE4.2 version intrinsics,
    65 // array copy stubs and superword operations (see UseSSE42Intrinsics,
    66 // UseXMMForArrayCopy and UseSuperword flags).
    67 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX).
    68 // Linux ABI:   No register preserved across function calls
    69 //              XMM0-XMM7 might hold parameters
    70 // Windows ABI: XMM6-XMM15 preserved across function calls
    71 //              XMM0-XMM3 might hold parameters
    73 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
    74 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
    75 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
    76 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
    77 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
    78 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
    79 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
    80 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
    82 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
    83 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
    84 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
    85 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
    86 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
    87 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
    88 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
    89 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
    91 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
    92 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
    93 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
    94 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
    95 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
    96 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
    97 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
    98 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
   100 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
   101 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
   102 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
   103 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
   104 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
   105 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
   106 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
   107 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
   109 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
   110 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
   111 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
   112 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
   113 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
   114 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
   115 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
   116 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
   118 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
   119 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
   120 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
   121 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
   122 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
   123 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
   124 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
   125 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
   127 #ifdef _WIN64
   129 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg());
   130 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1));
   131 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2));
   132 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3));
   133 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4));
   134 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5));
   135 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6));
   136 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7));
   138 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg());
   139 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1));
   140 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2));
   141 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3));
   142 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4));
   143 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5));
   144 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6));
   145 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7));
   147 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg());
   148 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1));
   149 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2));
   150 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3));
   151 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4));
   152 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5));
   153 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6));
   154 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7));
   156 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg());
   157 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1));
   158 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2));
   159 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3));
   160 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4));
   161 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5));
   162 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6));
   163 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7));
   165 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
   166 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1));
   167 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2));
   168 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3));
   169 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4));
   170 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5));
   171 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6));
   172 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7));
   174 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
   175 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1));
   176 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2));
   177 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3));
   178 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4));
   179 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5));
   180 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6));
   181 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7));
   183 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
   184 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1));
   185 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2));
   186 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3));
   187 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4));
   188 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5));
   189 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6));
   190 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7));
   192 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
   193 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1));
   194 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2));
   195 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3));
   196 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4));
   197 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5));
   198 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6));
   199 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7));
   201 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
   202 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1));
   203 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2));
   204 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3));
   205 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4));
   206 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5));
   207 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6));
   208 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7));
   210 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
   211 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1));
   212 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2));
   213 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3));
   214 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4));
   215 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5));
   216 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6));
   217 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7));
   219 #else // _WIN64
   221 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
   222 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
   223 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
   224 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
   225 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
   226 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
   227 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
   228 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
   230 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
   231 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
   232 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
   233 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
   234 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
   235 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
   236 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
   237 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
   239 #ifdef _LP64
   241 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
   242 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
   243 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
   244 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
   245 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
   246 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
   247 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
   248 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
   250 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
   251 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
   252 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
   253 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
   254 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
   255 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
   256 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
   257 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
   259 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
   260 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
   261 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
   262 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
   263 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
   264 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
   265 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
   266 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
   268 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
   269 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
   270 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
   271 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
   272 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
   273 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
   274 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
   275 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
   277 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
   278 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
   279 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
   280 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
   281 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
   282 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
   283 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
   284 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
   286 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
   287 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
   288 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
   289 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
   290 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
   291 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
   292 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
   293 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
   295 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
   296 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
   297 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
   298 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
   299 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
   300 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
   301 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
   302 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
   304 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
   305 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
   306 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
   307 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
   308 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
   309 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
   310 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
   311 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
   313 #endif // _LP64
   315 #endif // _WIN64
   317 #ifdef _LP64
   318 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
   319 #else
   320 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
   321 #endif // _LP64
   323 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
   324                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
   325                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
   326                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
   327                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
   328                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
   329                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
   330                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h
   331 #ifdef _LP64
   332                   ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
   333                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
   334                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
   335                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
   336                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
   337                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
   338                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
   339                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
   340 #endif
   341                    );
   343 // flags allocation class should be last.
   344 alloc_class chunk2(RFLAGS);
   346 // Singleton class for condition codes
   347 reg_class int_flags(RFLAGS);
   349 // Class for all float registers
   350 reg_class float_reg(XMM0,
   351                     XMM1,
   352                     XMM2,
   353                     XMM3,
   354                     XMM4,
   355                     XMM5,
   356                     XMM6,
   357                     XMM7
   358 #ifdef _LP64
   359                    ,XMM8,
   360                     XMM9,
   361                     XMM10,
   362                     XMM11,
   363                     XMM12,
   364                     XMM13,
   365                     XMM14,
   366                     XMM15
   367 #endif
   368                     );
   370 // Class for all double registers
   371 reg_class double_reg(XMM0,  XMM0b,
   372                      XMM1,  XMM1b,
   373                      XMM2,  XMM2b,
   374                      XMM3,  XMM3b,
   375                      XMM4,  XMM4b,
   376                      XMM5,  XMM5b,
   377                      XMM6,  XMM6b,
   378                      XMM7,  XMM7b
   379 #ifdef _LP64
   380                     ,XMM8,  XMM8b,
   381                      XMM9,  XMM9b,
   382                      XMM10, XMM10b,
   383                      XMM11, XMM11b,
   384                      XMM12, XMM12b,
   385                      XMM13, XMM13b,
   386                      XMM14, XMM14b,
   387                      XMM15, XMM15b
   388 #endif
   389                      );
   391 // Class for all 32bit vector registers
   392 reg_class vectors_reg(XMM0,
   393                       XMM1,
   394                       XMM2,
   395                       XMM3,
   396                       XMM4,
   397                       XMM5,
   398                       XMM6,
   399                       XMM7
   400 #ifdef _LP64
   401                      ,XMM8,
   402                       XMM9,
   403                       XMM10,
   404                       XMM11,
   405                       XMM12,
   406                       XMM13,
   407                       XMM14,
   408                       XMM15
   409 #endif
   410                       );
   412 // Class for all 64bit vector registers
   413 reg_class vectord_reg(XMM0,  XMM0b,
   414                       XMM1,  XMM1b,
   415                       XMM2,  XMM2b,
   416                       XMM3,  XMM3b,
   417                       XMM4,  XMM4b,
   418                       XMM5,  XMM5b,
   419                       XMM6,  XMM6b,
   420                       XMM7,  XMM7b
   421 #ifdef _LP64
   422                      ,XMM8,  XMM8b,
   423                       XMM9,  XMM9b,
   424                       XMM10, XMM10b,
   425                       XMM11, XMM11b,
   426                       XMM12, XMM12b,
   427                       XMM13, XMM13b,
   428                       XMM14, XMM14b,
   429                       XMM15, XMM15b
   430 #endif
   431                       );
   433 // Class for all 128bit vector registers
   434 reg_class vectorx_reg(XMM0,  XMM0b,  XMM0c,  XMM0d,
   435                       XMM1,  XMM1b,  XMM1c,  XMM1d,
   436                       XMM2,  XMM2b,  XMM2c,  XMM2d,
   437                       XMM3,  XMM3b,  XMM3c,  XMM3d,
   438                       XMM4,  XMM4b,  XMM4c,  XMM4d,
   439                       XMM5,  XMM5b,  XMM5c,  XMM5d,
   440                       XMM6,  XMM6b,  XMM6c,  XMM6d,
   441                       XMM7,  XMM7b,  XMM7c,  XMM7d
   442 #ifdef _LP64
   443                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,
   444                       XMM9,  XMM9b,  XMM9c,  XMM9d,
   445                       XMM10, XMM10b, XMM10c, XMM10d,
   446                       XMM11, XMM11b, XMM11c, XMM11d,
   447                       XMM12, XMM12b, XMM12c, XMM12d,
   448                       XMM13, XMM13b, XMM13c, XMM13d,
   449                       XMM14, XMM14b, XMM14c, XMM14d,
   450                       XMM15, XMM15b, XMM15c, XMM15d
   451 #endif
   452                       );
   454 // Class for all 256bit vector registers
   455 reg_class vectory_reg(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
   456                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
   457                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
   458                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
   459                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
   460                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
   461                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
   462                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h
   463 #ifdef _LP64
   464                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
   465                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
   466                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
   467                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
   468                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
   469                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
   470                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
   471                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
   472 #endif
   473                       );
   475 %}
   477 source %{
   478   // Float masks come from different places depending on platform.
   479 #ifdef _LP64
   480   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
   481   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
   482   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
   483   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
   484 #else
   485   static address float_signmask()  { return (address)float_signmask_pool; }
   486   static address float_signflip()  { return (address)float_signflip_pool; }
   487   static address double_signmask() { return (address)double_signmask_pool; }
   488   static address double_signflip() { return (address)double_signflip_pool; }
   489 #endif
   491 // Map Types to machine register types
   492 const int Matcher::base2reg[Type::lastype] = {
   493   Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN,
   494   Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */
   495   Op_VecS, Op_VecD, Op_VecX, Op_VecY, /* Vectors */
   496   Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */
   497   0, 0/*abio*/,
   498   Op_RegP /* Return address */, 0, /* the memories */
   499   Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD,
   500   0  /*bottom*/
   501 };
   503 const bool Matcher::match_rule_supported(int opcode) {
   504   if (!has_match_rule(opcode))
   505     return false;
   507   switch (opcode) {
   508     case Op_PopCountI:
   509     case Op_PopCountL:
   510       if (!UsePopCountInstruction)
   511         return false;
   512     case Op_MulVI:
   513       if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
   514         return false;
   515     break;
   516   }
   518   return true;  // Per default match rules are supported.
   519 }
   521 // Max vector size in bytes. 0 if not supported.
   522 const int Matcher::vector_width_in_bytes(BasicType bt) {
   523   assert(is_java_primitive(bt), "only primitive type vectors");
   524   if (UseSSE < 2) return 0;
   525   // SSE2 supports 128bit vectors for all types.
   526   // AVX2 supports 256bit vectors for all types.
   527   int size = (UseAVX > 1) ? 32 : 16;
   528   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
   529   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
   530     size = 32;
   531   // Use flag to limit vector size.
   532   size = MIN2(size,(int)MaxVectorSize);
   533   // Minimum 2 values in vector (or 4 for bytes).
   534   switch (bt) {
   535   case T_DOUBLE:
   536   case T_LONG:
   537     if (size < 16) return 0;
   538   case T_FLOAT:
   539   case T_INT:
   540     if (size < 8) return 0;
   541   case T_BOOLEAN:
   542   case T_BYTE:
   543   case T_CHAR:
   544   case T_SHORT:
   545     if (size < 4) return 0;
   546     break;
   547   default:
   548     ShouldNotReachHere();
   549   }
   550   return size;
   551 }
   553 // Limits on vector size (number of elements) loaded into vector.
   554 const int Matcher::max_vector_size(const BasicType bt) {
   555   return vector_width_in_bytes(bt)/type2aelembytes(bt);
   556 }
   557 const int Matcher::min_vector_size(const BasicType bt) {
   558   int max_size = max_vector_size(bt);
   559   // Min size which can be loaded into vector is 4 bytes.
   560   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
   561   return MIN2(size,max_size);
   562 }
   564 // Vector ideal reg corresponding to specidied size in bytes
   565 const int Matcher::vector_ideal_reg(int size) {
   566   assert(MaxVectorSize >= size, "");
   567   switch(size) {
   568     case  4: return Op_VecS;
   569     case  8: return Op_VecD;
   570     case 16: return Op_VecX;
   571     case 32: return Op_VecY;
   572   }
   573   ShouldNotReachHere();
   574   return 0;
   575 }
   577 // x86 supports misaligned vectors store/load.
   578 const bool Matcher::misaligned_vectors_ok() {
   579   return !AlignVector; // can be changed by flag
   580 }
   582 // Helper methods for MachSpillCopyNode::implementation().
   583 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
   584                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
   585   // In 64-bit VM size calculation is very complex. Emitting instructions
   586   // into scratch buffer is used to get size in 64-bit VM.
   587   LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
   588   assert(ireg == Op_VecS || // 32bit vector
   589          (src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
   590          (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
   591          "no non-adjacent vector moves" );
   592   if (cbuf) {
   593     MacroAssembler _masm(cbuf);
   594     int offset = __ offset();
   595     switch (ireg) {
   596     case Op_VecS: // copy whole register
   597     case Op_VecD:
   598     case Op_VecX:
   599       __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
   600       break;
   601     case Op_VecY:
   602       __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
   603       break;
   604     default:
   605       ShouldNotReachHere();
   606     }
   607     int size = __ offset() - offset;
   608 #ifdef ASSERT
   609     // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
   610     assert(!do_size || size == 4, "incorrect size calculattion");
   611 #endif
   612     return size;
   613 #ifndef PRODUCT
   614   } else if (!do_size) {
   615     switch (ireg) {
   616     case Op_VecS:
   617     case Op_VecD:
   618     case Op_VecX:
   619       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
   620       break;
   621     case Op_VecY:
   622       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
   623       break;
   624     default:
   625       ShouldNotReachHere();
   626     }
   627 #endif
   628   }
   629   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
   630   return 4;
   631 }
   633 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
   634                             int stack_offset, int reg, uint ireg, outputStream* st) {
   635   // In 64-bit VM size calculation is very complex. Emitting instructions
   636   // into scratch buffer is used to get size in 64-bit VM.
   637   LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
   638   if (cbuf) {
   639     MacroAssembler _masm(cbuf);
   640     int offset = __ offset();
   641     if (is_load) {
   642       switch (ireg) {
   643       case Op_VecS:
   644         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
   645         break;
   646       case Op_VecD:
   647         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
   648         break;
   649       case Op_VecX:
   650         __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
   651         break;
   652       case Op_VecY:
   653         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
   654         break;
   655       default:
   656         ShouldNotReachHere();
   657       }
   658     } else { // store
   659       switch (ireg) {
   660       case Op_VecS:
   661         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
   662         break;
   663       case Op_VecD:
   664         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
   665         break;
   666       case Op_VecX:
   667         __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
   668         break;
   669       case Op_VecY:
   670         __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
   671         break;
   672       default:
   673         ShouldNotReachHere();
   674       }
   675     }
   676     int size = __ offset() - offset;
   677 #ifdef ASSERT
   678     int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
   679     // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
   680     assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
   681 #endif
   682     return size;
   683 #ifndef PRODUCT
   684   } else if (!do_size) {
   685     if (is_load) {
   686       switch (ireg) {
   687       case Op_VecS:
   688         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
   689         break;
   690       case Op_VecD:
   691         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
   692         break;
   693        case Op_VecX:
   694         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
   695         break;
   696       case Op_VecY:
   697         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
   698         break;
   699       default:
   700         ShouldNotReachHere();
   701       }
   702     } else { // store
   703       switch (ireg) {
   704       case Op_VecS:
   705         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
   706         break;
   707       case Op_VecD:
   708         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
   709         break;
   710        case Op_VecX:
   711         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
   712         break;
   713       case Op_VecY:
   714         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
   715         break;
   716       default:
   717         ShouldNotReachHere();
   718       }
   719     }
   720 #endif
   721   }
   722   int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
   723   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
   724   return 5+offset_size;
   725 }
   727 static inline jfloat replicate4_imm(int con, int width) {
   728   // Load a constant of "width" (in bytes) and replicate it to fill 32bit.
   729   assert(width == 1 || width == 2, "only byte or short types here");
   730   int bit_width = width * 8;
   731   jint val = con;
   732   val &= (1 << bit_width) - 1;  // mask off sign bits
   733   while(bit_width < 32) {
   734     val |= (val << bit_width);
   735     bit_width <<= 1;
   736   }
   737   jfloat fval = *((jfloat*) &val);  // coerce to float type
   738   return fval;
   739 }
   741 static inline jdouble replicate8_imm(int con, int width) {
   742   // Load a constant of "width" (in bytes) and replicate it to fill 64bit.
   743   assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here");
   744   int bit_width = width * 8;
   745   jlong val = con;
   746   val &= (((jlong) 1) << bit_width) - 1;  // mask off sign bits
   747   while(bit_width < 64) {
   748     val |= (val << bit_width);
   749     bit_width <<= 1;
   750   }
   751   jdouble dval = *((jdouble*) &val);  // coerce to double type
   752   return dval;
   753 }
   755 #ifndef PRODUCT
   756   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
   757     st->print("nop \t# %d bytes pad for loops and calls", _count);
   758   }
   759 #endif
   761   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
   762     MacroAssembler _masm(&cbuf);
   763     __ nop(_count);
   764   }
   766   uint MachNopNode::size(PhaseRegAlloc*) const {
   767     return _count;
   768   }
   770 #ifndef PRODUCT
   771   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
   772     st->print("# breakpoint");
   773   }
   774 #endif
   776   void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
   777     MacroAssembler _masm(&cbuf);
   778     __ int3();
   779   }
   781   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
   782     return MachNode::size(ra_);
   783   }
   785 %}
   787 encode %{
   789   enc_class preserve_SP %{
   790     debug_only(int off0 = cbuf.insts_size());
   791     MacroAssembler _masm(&cbuf);
   792     // RBP is preserved across all calls, even compiled calls.
   793     // Use it to preserve RSP in places where the callee might change the SP.
   794     __ movptr(rbp_mh_SP_save, rsp);
   795     debug_only(int off1 = cbuf.insts_size());
   796     assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
   797   %}
   799   enc_class restore_SP %{
   800     MacroAssembler _masm(&cbuf);
   801     __ movptr(rsp, rbp_mh_SP_save);
   802   %}
   804   enc_class call_epilog %{
   805     if (VerifyStackAtCalls) {
   806       // Check that stack depth is unchanged: find majik cookie on stack
   807       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
   808       MacroAssembler _masm(&cbuf);
   809       Label L;
   810       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
   811       __ jccb(Assembler::equal, L);
   812       // Die if stack mismatch
   813       __ int3();
   814       __ bind(L);
   815     }
   816   %}
   818 %}
   821 //----------OPERANDS-----------------------------------------------------------
   822 // Operand definitions must precede instruction definitions for correct parsing
   823 // in the ADLC because operands constitute user defined types which are used in
   824 // instruction definitions.
   826 // Vectors
   827 operand vecS() %{
   828   constraint(ALLOC_IN_RC(vectors_reg));
   829   match(VecS);
   831   format %{ %}
   832   interface(REG_INTER);
   833 %}
   835 operand vecD() %{
   836   constraint(ALLOC_IN_RC(vectord_reg));
   837   match(VecD);
   839   format %{ %}
   840   interface(REG_INTER);
   841 %}
   843 operand vecX() %{
   844   constraint(ALLOC_IN_RC(vectorx_reg));
   845   match(VecX);
   847   format %{ %}
   848   interface(REG_INTER);
   849 %}
   851 operand vecY() %{
   852   constraint(ALLOC_IN_RC(vectory_reg));
   853   match(VecY);
   855   format %{ %}
   856   interface(REG_INTER);
   857 %}
   860 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
   862 // ============================================================================
   864 instruct ShouldNotReachHere() %{
   865   match(Halt);
   866   format %{ "int3\t# ShouldNotReachHere" %}
   867   ins_encode %{
   868     __ int3();
   869   %}
   870   ins_pipe(pipe_slow);
   871 %}
   873 // ============================================================================
   875 instruct addF_reg(regF dst, regF src) %{
   876   predicate((UseSSE>=1) && (UseAVX == 0));
   877   match(Set dst (AddF dst src));
   879   format %{ "addss   $dst, $src" %}
   880   ins_cost(150);
   881   ins_encode %{
   882     __ addss($dst$$XMMRegister, $src$$XMMRegister);
   883   %}
   884   ins_pipe(pipe_slow);
   885 %}
   887 instruct addF_mem(regF dst, memory src) %{
   888   predicate((UseSSE>=1) && (UseAVX == 0));
   889   match(Set dst (AddF dst (LoadF src)));
   891   format %{ "addss   $dst, $src" %}
   892   ins_cost(150);
   893   ins_encode %{
   894     __ addss($dst$$XMMRegister, $src$$Address);
   895   %}
   896   ins_pipe(pipe_slow);
   897 %}
   899 instruct addF_imm(regF dst, immF con) %{
   900   predicate((UseSSE>=1) && (UseAVX == 0));
   901   match(Set dst (AddF dst con));
   902   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
   903   ins_cost(150);
   904   ins_encode %{
   905     __ addss($dst$$XMMRegister, $constantaddress($con));
   906   %}
   907   ins_pipe(pipe_slow);
   908 %}
   910 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
   911   predicate(UseAVX > 0);
   912   match(Set dst (AddF src1 src2));
   914   format %{ "vaddss  $dst, $src1, $src2" %}
   915   ins_cost(150);
   916   ins_encode %{
   917     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
   918   %}
   919   ins_pipe(pipe_slow);
   920 %}
   922 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
   923   predicate(UseAVX > 0);
   924   match(Set dst (AddF src1 (LoadF src2)));
   926   format %{ "vaddss  $dst, $src1, $src2" %}
   927   ins_cost(150);
   928   ins_encode %{
   929     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
   930   %}
   931   ins_pipe(pipe_slow);
   932 %}
   934 instruct addF_reg_imm(regF dst, regF src, immF con) %{
   935   predicate(UseAVX > 0);
   936   match(Set dst (AddF src con));
   938   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
   939   ins_cost(150);
   940   ins_encode %{
   941     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
   942   %}
   943   ins_pipe(pipe_slow);
   944 %}
   946 instruct addD_reg(regD dst, regD src) %{
   947   predicate((UseSSE>=2) && (UseAVX == 0));
   948   match(Set dst (AddD dst src));
   950   format %{ "addsd   $dst, $src" %}
   951   ins_cost(150);
   952   ins_encode %{
   953     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
   954   %}
   955   ins_pipe(pipe_slow);
   956 %}
   958 instruct addD_mem(regD dst, memory src) %{
   959   predicate((UseSSE>=2) && (UseAVX == 0));
   960   match(Set dst (AddD dst (LoadD src)));
   962   format %{ "addsd   $dst, $src" %}
   963   ins_cost(150);
   964   ins_encode %{
   965     __ addsd($dst$$XMMRegister, $src$$Address);
   966   %}
   967   ins_pipe(pipe_slow);
   968 %}
   970 instruct addD_imm(regD dst, immD con) %{
   971   predicate((UseSSE>=2) && (UseAVX == 0));
   972   match(Set dst (AddD dst con));
   973   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
   974   ins_cost(150);
   975   ins_encode %{
   976     __ addsd($dst$$XMMRegister, $constantaddress($con));
   977   %}
   978   ins_pipe(pipe_slow);
   979 %}
   981 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
   982   predicate(UseAVX > 0);
   983   match(Set dst (AddD src1 src2));
   985   format %{ "vaddsd  $dst, $src1, $src2" %}
   986   ins_cost(150);
   987   ins_encode %{
   988     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
   989   %}
   990   ins_pipe(pipe_slow);
   991 %}
   993 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
   994   predicate(UseAVX > 0);
   995   match(Set dst (AddD src1 (LoadD src2)));
   997   format %{ "vaddsd  $dst, $src1, $src2" %}
   998   ins_cost(150);
   999   ins_encode %{
  1000     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
  1001   %}
  1002   ins_pipe(pipe_slow);
  1003 %}
  1005 instruct addD_reg_imm(regD dst, regD src, immD con) %{
  1006   predicate(UseAVX > 0);
  1007   match(Set dst (AddD src con));
  1009   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
  1010   ins_cost(150);
  1011   ins_encode %{
  1012     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
  1013   %}
  1014   ins_pipe(pipe_slow);
  1015 %}
  1017 instruct subF_reg(regF dst, regF src) %{
  1018   predicate((UseSSE>=1) && (UseAVX == 0));
  1019   match(Set dst (SubF dst src));
  1021   format %{ "subss   $dst, $src" %}
  1022   ins_cost(150);
  1023   ins_encode %{
  1024     __ subss($dst$$XMMRegister, $src$$XMMRegister);
  1025   %}
  1026   ins_pipe(pipe_slow);
  1027 %}
  1029 instruct subF_mem(regF dst, memory src) %{
  1030   predicate((UseSSE>=1) && (UseAVX == 0));
  1031   match(Set dst (SubF dst (LoadF src)));
  1033   format %{ "subss   $dst, $src" %}
  1034   ins_cost(150);
  1035   ins_encode %{
  1036     __ subss($dst$$XMMRegister, $src$$Address);
  1037   %}
  1038   ins_pipe(pipe_slow);
  1039 %}
  1041 instruct subF_imm(regF dst, immF con) %{
  1042   predicate((UseSSE>=1) && (UseAVX == 0));
  1043   match(Set dst (SubF dst con));
  1044   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
  1045   ins_cost(150);
  1046   ins_encode %{
  1047     __ subss($dst$$XMMRegister, $constantaddress($con));
  1048   %}
  1049   ins_pipe(pipe_slow);
  1050 %}
  1052 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
  1053   predicate(UseAVX > 0);
  1054   match(Set dst (SubF src1 src2));
  1056   format %{ "vsubss  $dst, $src1, $src2" %}
  1057   ins_cost(150);
  1058   ins_encode %{
  1059     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  1060   %}
  1061   ins_pipe(pipe_slow);
  1062 %}
  1064 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
  1065   predicate(UseAVX > 0);
  1066   match(Set dst (SubF src1 (LoadF src2)));
  1068   format %{ "vsubss  $dst, $src1, $src2" %}
  1069   ins_cost(150);
  1070   ins_encode %{
  1071     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
  1072   %}
  1073   ins_pipe(pipe_slow);
  1074 %}
  1076 instruct subF_reg_imm(regF dst, regF src, immF con) %{
  1077   predicate(UseAVX > 0);
  1078   match(Set dst (SubF src con));
  1080   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
  1081   ins_cost(150);
  1082   ins_encode %{
  1083     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
  1084   %}
  1085   ins_pipe(pipe_slow);
  1086 %}
  1088 instruct subD_reg(regD dst, regD src) %{
  1089   predicate((UseSSE>=2) && (UseAVX == 0));
  1090   match(Set dst (SubD dst src));
  1092   format %{ "subsd   $dst, $src" %}
  1093   ins_cost(150);
  1094   ins_encode %{
  1095     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
  1096   %}
  1097   ins_pipe(pipe_slow);
  1098 %}
  1100 instruct subD_mem(regD dst, memory src) %{
  1101   predicate((UseSSE>=2) && (UseAVX == 0));
  1102   match(Set dst (SubD dst (LoadD src)));
  1104   format %{ "subsd   $dst, $src" %}
  1105   ins_cost(150);
  1106   ins_encode %{
  1107     __ subsd($dst$$XMMRegister, $src$$Address);
  1108   %}
  1109   ins_pipe(pipe_slow);
  1110 %}
  1112 instruct subD_imm(regD dst, immD con) %{
  1113   predicate((UseSSE>=2) && (UseAVX == 0));
  1114   match(Set dst (SubD dst con));
  1115   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
  1116   ins_cost(150);
  1117   ins_encode %{
  1118     __ subsd($dst$$XMMRegister, $constantaddress($con));
  1119   %}
  1120   ins_pipe(pipe_slow);
  1121 %}
  1123 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
  1124   predicate(UseAVX > 0);
  1125   match(Set dst (SubD src1 src2));
  1127   format %{ "vsubsd  $dst, $src1, $src2" %}
  1128   ins_cost(150);
  1129   ins_encode %{
  1130     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  1131   %}
  1132   ins_pipe(pipe_slow);
  1133 %}
  1135 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
  1136   predicate(UseAVX > 0);
  1137   match(Set dst (SubD src1 (LoadD src2)));
  1139   format %{ "vsubsd  $dst, $src1, $src2" %}
  1140   ins_cost(150);
  1141   ins_encode %{
  1142     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
  1143   %}
  1144   ins_pipe(pipe_slow);
  1145 %}
  1147 instruct subD_reg_imm(regD dst, regD src, immD con) %{
  1148   predicate(UseAVX > 0);
  1149   match(Set dst (SubD src con));
  1151   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
  1152   ins_cost(150);
  1153   ins_encode %{
  1154     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
  1155   %}
  1156   ins_pipe(pipe_slow);
  1157 %}
  1159 instruct mulF_reg(regF dst, regF src) %{
  1160   predicate((UseSSE>=1) && (UseAVX == 0));
  1161   match(Set dst (MulF dst src));
  1163   format %{ "mulss   $dst, $src" %}
  1164   ins_cost(150);
  1165   ins_encode %{
  1166     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
  1167   %}
  1168   ins_pipe(pipe_slow);
  1169 %}
  1171 instruct mulF_mem(regF dst, memory src) %{
  1172   predicate((UseSSE>=1) && (UseAVX == 0));
  1173   match(Set dst (MulF dst (LoadF src)));
  1175   format %{ "mulss   $dst, $src" %}
  1176   ins_cost(150);
  1177   ins_encode %{
  1178     __ mulss($dst$$XMMRegister, $src$$Address);
  1179   %}
  1180   ins_pipe(pipe_slow);
  1181 %}
  1183 instruct mulF_imm(regF dst, immF con) %{
  1184   predicate((UseSSE>=1) && (UseAVX == 0));
  1185   match(Set dst (MulF dst con));
  1186   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
  1187   ins_cost(150);
  1188   ins_encode %{
  1189     __ mulss($dst$$XMMRegister, $constantaddress($con));
  1190   %}
  1191   ins_pipe(pipe_slow);
  1192 %}
  1194 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
  1195   predicate(UseAVX > 0);
  1196   match(Set dst (MulF src1 src2));
  1198   format %{ "vmulss  $dst, $src1, $src2" %}
  1199   ins_cost(150);
  1200   ins_encode %{
  1201     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  1202   %}
  1203   ins_pipe(pipe_slow);
  1204 %}
  1206 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
  1207   predicate(UseAVX > 0);
  1208   match(Set dst (MulF src1 (LoadF src2)));
  1210   format %{ "vmulss  $dst, $src1, $src2" %}
  1211   ins_cost(150);
  1212   ins_encode %{
  1213     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
  1214   %}
  1215   ins_pipe(pipe_slow);
  1216 %}
  1218 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
  1219   predicate(UseAVX > 0);
  1220   match(Set dst (MulF src con));
  1222   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
  1223   ins_cost(150);
  1224   ins_encode %{
  1225     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
  1226   %}
  1227   ins_pipe(pipe_slow);
  1228 %}
  1230 instruct mulD_reg(regD dst, regD src) %{
  1231   predicate((UseSSE>=2) && (UseAVX == 0));
  1232   match(Set dst (MulD dst src));
  1234   format %{ "mulsd   $dst, $src" %}
  1235   ins_cost(150);
  1236   ins_encode %{
  1237     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
  1238   %}
  1239   ins_pipe(pipe_slow);
  1240 %}
  1242 instruct mulD_mem(regD dst, memory src) %{
  1243   predicate((UseSSE>=2) && (UseAVX == 0));
  1244   match(Set dst (MulD dst (LoadD src)));
  1246   format %{ "mulsd   $dst, $src" %}
  1247   ins_cost(150);
  1248   ins_encode %{
  1249     __ mulsd($dst$$XMMRegister, $src$$Address);
  1250   %}
  1251   ins_pipe(pipe_slow);
  1252 %}
  1254 instruct mulD_imm(regD dst, immD con) %{
  1255   predicate((UseSSE>=2) && (UseAVX == 0));
  1256   match(Set dst (MulD dst con));
  1257   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
  1258   ins_cost(150);
  1259   ins_encode %{
  1260     __ mulsd($dst$$XMMRegister, $constantaddress($con));
  1261   %}
  1262   ins_pipe(pipe_slow);
  1263 %}
  1265 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
  1266   predicate(UseAVX > 0);
  1267   match(Set dst (MulD src1 src2));
  1269   format %{ "vmulsd  $dst, $src1, $src2" %}
  1270   ins_cost(150);
  1271   ins_encode %{
  1272     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  1273   %}
  1274   ins_pipe(pipe_slow);
  1275 %}
  1277 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
  1278   predicate(UseAVX > 0);
  1279   match(Set dst (MulD src1 (LoadD src2)));
  1281   format %{ "vmulsd  $dst, $src1, $src2" %}
  1282   ins_cost(150);
  1283   ins_encode %{
  1284     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
  1285   %}
  1286   ins_pipe(pipe_slow);
  1287 %}
  1289 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
  1290   predicate(UseAVX > 0);
  1291   match(Set dst (MulD src con));
  1293   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
  1294   ins_cost(150);
  1295   ins_encode %{
  1296     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
  1297   %}
  1298   ins_pipe(pipe_slow);
  1299 %}
  1301 instruct divF_reg(regF dst, regF src) %{
  1302   predicate((UseSSE>=1) && (UseAVX == 0));
  1303   match(Set dst (DivF dst src));
  1305   format %{ "divss   $dst, $src" %}
  1306   ins_cost(150);
  1307   ins_encode %{
  1308     __ divss($dst$$XMMRegister, $src$$XMMRegister);
  1309   %}
  1310   ins_pipe(pipe_slow);
  1311 %}
  1313 instruct divF_mem(regF dst, memory src) %{
  1314   predicate((UseSSE>=1) && (UseAVX == 0));
  1315   match(Set dst (DivF dst (LoadF src)));
  1317   format %{ "divss   $dst, $src" %}
  1318   ins_cost(150);
  1319   ins_encode %{
  1320     __ divss($dst$$XMMRegister, $src$$Address);
  1321   %}
  1322   ins_pipe(pipe_slow);
  1323 %}
  1325 instruct divF_imm(regF dst, immF con) %{
  1326   predicate((UseSSE>=1) && (UseAVX == 0));
  1327   match(Set dst (DivF dst con));
  1328   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
  1329   ins_cost(150);
  1330   ins_encode %{
  1331     __ divss($dst$$XMMRegister, $constantaddress($con));
  1332   %}
  1333   ins_pipe(pipe_slow);
  1334 %}
  1336 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
  1337   predicate(UseAVX > 0);
  1338   match(Set dst (DivF src1 src2));
  1340   format %{ "vdivss  $dst, $src1, $src2" %}
  1341   ins_cost(150);
  1342   ins_encode %{
  1343     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  1344   %}
  1345   ins_pipe(pipe_slow);
  1346 %}
  1348 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
  1349   predicate(UseAVX > 0);
  1350   match(Set dst (DivF src1 (LoadF src2)));
  1352   format %{ "vdivss  $dst, $src1, $src2" %}
  1353   ins_cost(150);
  1354   ins_encode %{
  1355     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
  1356   %}
  1357   ins_pipe(pipe_slow);
  1358 %}
  1360 instruct divF_reg_imm(regF dst, regF src, immF con) %{
  1361   predicate(UseAVX > 0);
  1362   match(Set dst (DivF src con));
  1364   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
  1365   ins_cost(150);
  1366   ins_encode %{
  1367     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
  1368   %}
  1369   ins_pipe(pipe_slow);
  1370 %}
  1372 instruct divD_reg(regD dst, regD src) %{
  1373   predicate((UseSSE>=2) && (UseAVX == 0));
  1374   match(Set dst (DivD dst src));
  1376   format %{ "divsd   $dst, $src" %}
  1377   ins_cost(150);
  1378   ins_encode %{
  1379     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
  1380   %}
  1381   ins_pipe(pipe_slow);
  1382 %}
  1384 instruct divD_mem(regD dst, memory src) %{
  1385   predicate((UseSSE>=2) && (UseAVX == 0));
  1386   match(Set dst (DivD dst (LoadD src)));
  1388   format %{ "divsd   $dst, $src" %}
  1389   ins_cost(150);
  1390   ins_encode %{
  1391     __ divsd($dst$$XMMRegister, $src$$Address);
  1392   %}
  1393   ins_pipe(pipe_slow);
  1394 %}
  1396 instruct divD_imm(regD dst, immD con) %{
  1397   predicate((UseSSE>=2) && (UseAVX == 0));
  1398   match(Set dst (DivD dst con));
  1399   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
  1400   ins_cost(150);
  1401   ins_encode %{
  1402     __ divsd($dst$$XMMRegister, $constantaddress($con));
  1403   %}
  1404   ins_pipe(pipe_slow);
  1405 %}
  1407 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
  1408   predicate(UseAVX > 0);
  1409   match(Set dst (DivD src1 src2));
  1411   format %{ "vdivsd  $dst, $src1, $src2" %}
  1412   ins_cost(150);
  1413   ins_encode %{
  1414     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  1415   %}
  1416   ins_pipe(pipe_slow);
  1417 %}
  1419 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
  1420   predicate(UseAVX > 0);
  1421   match(Set dst (DivD src1 (LoadD src2)));
  1423   format %{ "vdivsd  $dst, $src1, $src2" %}
  1424   ins_cost(150);
  1425   ins_encode %{
  1426     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
  1427   %}
  1428   ins_pipe(pipe_slow);
  1429 %}
  1431 instruct divD_reg_imm(regD dst, regD src, immD con) %{
  1432   predicate(UseAVX > 0);
  1433   match(Set dst (DivD src con));
  1435   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
  1436   ins_cost(150);
  1437   ins_encode %{
  1438     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
  1439   %}
  1440   ins_pipe(pipe_slow);
  1441 %}
  1443 instruct absF_reg(regF dst) %{
  1444   predicate((UseSSE>=1) && (UseAVX == 0));
  1445   match(Set dst (AbsF dst));
  1446   ins_cost(150);
  1447   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
  1448   ins_encode %{
  1449     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
  1450   %}
  1451   ins_pipe(pipe_slow);
  1452 %}
  1454 instruct absF_reg_reg(regF dst, regF src) %{
  1455   predicate(UseAVX > 0);
  1456   match(Set dst (AbsF src));
  1457   ins_cost(150);
  1458   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
  1459   ins_encode %{
  1460     bool vector256 = false;
  1461     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
  1462               ExternalAddress(float_signmask()), vector256);
  1463   %}
  1464   ins_pipe(pipe_slow);
  1465 %}
  1467 instruct absD_reg(regD dst) %{
  1468   predicate((UseSSE>=2) && (UseAVX == 0));
  1469   match(Set dst (AbsD dst));
  1470   ins_cost(150);
  1471   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
  1472             "# abs double by sign masking" %}
  1473   ins_encode %{
  1474     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
  1475   %}
  1476   ins_pipe(pipe_slow);
  1477 %}
  1479 instruct absD_reg_reg(regD dst, regD src) %{
  1480   predicate(UseAVX > 0);
  1481   match(Set dst (AbsD src));
  1482   ins_cost(150);
  1483   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
  1484             "# abs double by sign masking" %}
  1485   ins_encode %{
  1486     bool vector256 = false;
  1487     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
  1488               ExternalAddress(double_signmask()), vector256);
  1489   %}
  1490   ins_pipe(pipe_slow);
  1491 %}
  1493 instruct negF_reg(regF dst) %{
  1494   predicate((UseSSE>=1) && (UseAVX == 0));
  1495   match(Set dst (NegF dst));
  1496   ins_cost(150);
  1497   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
  1498   ins_encode %{
  1499     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
  1500   %}
  1501   ins_pipe(pipe_slow);
  1502 %}
  1504 instruct negF_reg_reg(regF dst, regF src) %{
  1505   predicate(UseAVX > 0);
  1506   match(Set dst (NegF src));
  1507   ins_cost(150);
  1508   format %{ "vxorps  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
  1509   ins_encode %{
  1510     bool vector256 = false;
  1511     __ vxorps($dst$$XMMRegister, $src$$XMMRegister,
  1512               ExternalAddress(float_signflip()), vector256);
  1513   %}
  1514   ins_pipe(pipe_slow);
  1515 %}
  1517 instruct negD_reg(regD dst) %{
  1518   predicate((UseSSE>=2) && (UseAVX == 0));
  1519   match(Set dst (NegD dst));
  1520   ins_cost(150);
  1521   format %{ "xorpd   $dst, [0x8000000000000000]\t"
  1522             "# neg double by sign flipping" %}
  1523   ins_encode %{
  1524     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
  1525   %}
  1526   ins_pipe(pipe_slow);
  1527 %}
  1529 instruct negD_reg_reg(regD dst, regD src) %{
  1530   predicate(UseAVX > 0);
  1531   match(Set dst (NegD src));
  1532   ins_cost(150);
  1533   format %{ "vxorpd  $dst, $src, [0x8000000000000000]\t"
  1534             "# neg double by sign flipping" %}
  1535   ins_encode %{
  1536     bool vector256 = false;
  1537     __ vxorpd($dst$$XMMRegister, $src$$XMMRegister,
  1538               ExternalAddress(double_signflip()), vector256);
  1539   %}
  1540   ins_pipe(pipe_slow);
  1541 %}
  1543 instruct sqrtF_reg(regF dst, regF src) %{
  1544   predicate(UseSSE>=1);
  1545   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
  1547   format %{ "sqrtss  $dst, $src" %}
  1548   ins_cost(150);
  1549   ins_encode %{
  1550     __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
  1551   %}
  1552   ins_pipe(pipe_slow);
  1553 %}
  1555 instruct sqrtF_mem(regF dst, memory src) %{
  1556   predicate(UseSSE>=1);
  1557   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
  1559   format %{ "sqrtss  $dst, $src" %}
  1560   ins_cost(150);
  1561   ins_encode %{
  1562     __ sqrtss($dst$$XMMRegister, $src$$Address);
  1563   %}
  1564   ins_pipe(pipe_slow);
  1565 %}
  1567 instruct sqrtF_imm(regF dst, immF con) %{
  1568   predicate(UseSSE>=1);
  1569   match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
  1570   format %{ "sqrtss  $dst, [$constantaddress]\t# load from constant table: float=$con" %}
  1571   ins_cost(150);
  1572   ins_encode %{
  1573     __ sqrtss($dst$$XMMRegister, $constantaddress($con));
  1574   %}
  1575   ins_pipe(pipe_slow);
  1576 %}
  1578 instruct sqrtD_reg(regD dst, regD src) %{
  1579   predicate(UseSSE>=2);
  1580   match(Set dst (SqrtD src));
  1582   format %{ "sqrtsd  $dst, $src" %}
  1583   ins_cost(150);
  1584   ins_encode %{
  1585     __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
  1586   %}
  1587   ins_pipe(pipe_slow);
  1588 %}
  1590 instruct sqrtD_mem(regD dst, memory src) %{
  1591   predicate(UseSSE>=2);
  1592   match(Set dst (SqrtD (LoadD src)));
  1594   format %{ "sqrtsd  $dst, $src" %}
  1595   ins_cost(150);
  1596   ins_encode %{
  1597     __ sqrtsd($dst$$XMMRegister, $src$$Address);
  1598   %}
  1599   ins_pipe(pipe_slow);
  1600 %}
  1602 instruct sqrtD_imm(regD dst, immD con) %{
  1603   predicate(UseSSE>=2);
  1604   match(Set dst (SqrtD con));
  1605   format %{ "sqrtsd  $dst, [$constantaddress]\t# load from constant table: double=$con" %}
  1606   ins_cost(150);
  1607   ins_encode %{
  1608     __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
  1609   %}
  1610   ins_pipe(pipe_slow);
  1611 %}
  1614 // ====================VECTOR INSTRUCTIONS=====================================
  1616 // Load vectors (4 bytes long)
  1617 instruct loadV4(vecS dst, memory mem) %{
  1618   predicate(n->as_LoadVector()->memory_size() == 4);
  1619   match(Set dst (LoadVector mem));
  1620   ins_cost(125);
  1621   format %{ "movd    $dst,$mem\t! load vector (4 bytes)" %}
  1622   ins_encode %{
  1623     __ movdl($dst$$XMMRegister, $mem$$Address);
  1624   %}
  1625   ins_pipe( pipe_slow );
  1626 %}
  1628 // Load vectors (8 bytes long)
  1629 instruct loadV8(vecD dst, memory mem) %{
  1630   predicate(n->as_LoadVector()->memory_size() == 8);
  1631   match(Set dst (LoadVector mem));
  1632   ins_cost(125);
  1633   format %{ "movq    $dst,$mem\t! load vector (8 bytes)" %}
  1634   ins_encode %{
  1635     __ movq($dst$$XMMRegister, $mem$$Address);
  1636   %}
  1637   ins_pipe( pipe_slow );
  1638 %}
  1640 // Load vectors (16 bytes long)
  1641 instruct loadV16(vecX dst, memory mem) %{
  1642   predicate(n->as_LoadVector()->memory_size() == 16);
  1643   match(Set dst (LoadVector mem));
  1644   ins_cost(125);
  1645   format %{ "movdqu  $dst,$mem\t! load vector (16 bytes)" %}
  1646   ins_encode %{
  1647     __ movdqu($dst$$XMMRegister, $mem$$Address);
  1648   %}
  1649   ins_pipe( pipe_slow );
  1650 %}
  1652 // Load vectors (32 bytes long)
  1653 instruct loadV32(vecY dst, memory mem) %{
  1654   predicate(n->as_LoadVector()->memory_size() == 32);
  1655   match(Set dst (LoadVector mem));
  1656   ins_cost(125);
  1657   format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %}
  1658   ins_encode %{
  1659     __ vmovdqu($dst$$XMMRegister, $mem$$Address);
  1660   %}
  1661   ins_pipe( pipe_slow );
  1662 %}
  1664 // Store vectors
  1665 instruct storeV4(memory mem, vecS src) %{
  1666   predicate(n->as_StoreVector()->memory_size() == 4);
  1667   match(Set mem (StoreVector mem src));
  1668   ins_cost(145);
  1669   format %{ "movd    $mem,$src\t! store vector (4 bytes)" %}
  1670   ins_encode %{
  1671     __ movdl($mem$$Address, $src$$XMMRegister);
  1672   %}
  1673   ins_pipe( pipe_slow );
  1674 %}
  1676 instruct storeV8(memory mem, vecD src) %{
  1677   predicate(n->as_StoreVector()->memory_size() == 8);
  1678   match(Set mem (StoreVector mem src));
  1679   ins_cost(145);
  1680   format %{ "movq    $mem,$src\t! store vector (8 bytes)" %}
  1681   ins_encode %{
  1682     __ movq($mem$$Address, $src$$XMMRegister);
  1683   %}
  1684   ins_pipe( pipe_slow );
  1685 %}
  1687 instruct storeV16(memory mem, vecX src) %{
  1688   predicate(n->as_StoreVector()->memory_size() == 16);
  1689   match(Set mem (StoreVector mem src));
  1690   ins_cost(145);
  1691   format %{ "movdqu  $mem,$src\t! store vector (16 bytes)" %}
  1692   ins_encode %{
  1693     __ movdqu($mem$$Address, $src$$XMMRegister);
  1694   %}
  1695   ins_pipe( pipe_slow );
  1696 %}
  1698 instruct storeV32(memory mem, vecY src) %{
  1699   predicate(n->as_StoreVector()->memory_size() == 32);
  1700   match(Set mem (StoreVector mem src));
  1701   ins_cost(145);
  1702   format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %}
  1703   ins_encode %{
  1704     __ vmovdqu($mem$$Address, $src$$XMMRegister);
  1705   %}
  1706   ins_pipe( pipe_slow );
  1707 %}
  1709 // Replicate byte scalar to be vector
  1710 instruct Repl4B(vecS dst, rRegI src) %{
  1711   predicate(n->as_Vector()->length() == 4);
  1712   match(Set dst (ReplicateB src));
  1713   format %{ "movd    $dst,$src\n\t"
  1714             "punpcklbw $dst,$dst\n\t"
  1715             "pshuflw $dst,$dst,0x00\t! replicate4B" %}
  1716   ins_encode %{
  1717     __ movdl($dst$$XMMRegister, $src$$Register);
  1718     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
  1719     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  1720   %}
  1721   ins_pipe( pipe_slow );
  1722 %}
  1724 instruct Repl8B(vecD dst, rRegI src) %{
  1725   predicate(n->as_Vector()->length() == 8);
  1726   match(Set dst (ReplicateB src));
  1727   format %{ "movd    $dst,$src\n\t"
  1728             "punpcklbw $dst,$dst\n\t"
  1729             "pshuflw $dst,$dst,0x00\t! replicate8B" %}
  1730   ins_encode %{
  1731     __ movdl($dst$$XMMRegister, $src$$Register);
  1732     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
  1733     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  1734   %}
  1735   ins_pipe( pipe_slow );
  1736 %}
  1738 instruct Repl16B(vecX dst, rRegI src) %{
  1739   predicate(n->as_Vector()->length() == 16);
  1740   match(Set dst (ReplicateB src));
  1741   format %{ "movd    $dst,$src\n\t"
  1742             "punpcklbw $dst,$dst\n\t"
  1743             "pshuflw $dst,$dst,0x00\n\t"
  1744             "punpcklqdq $dst,$dst\t! replicate16B" %}
  1745   ins_encode %{
  1746     __ movdl($dst$$XMMRegister, $src$$Register);
  1747     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
  1748     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  1749     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  1750   %}
  1751   ins_pipe( pipe_slow );
  1752 %}
  1754 instruct Repl32B(vecY dst, rRegI src) %{
  1755   predicate(n->as_Vector()->length() == 32);
  1756   match(Set dst (ReplicateB src));
  1757   format %{ "movd    $dst,$src\n\t"
  1758             "punpcklbw $dst,$dst\n\t"
  1759             "pshuflw $dst,$dst,0x00\n\t"
  1760             "punpcklqdq $dst,$dst\n\t"
  1761             "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
  1762   ins_encode %{
  1763     __ movdl($dst$$XMMRegister, $src$$Register);
  1764     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
  1765     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  1766     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  1767     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  1768   %}
  1769   ins_pipe( pipe_slow );
  1770 %}
  1772 // Replicate byte scalar immediate to be vector by loading from const table.
  1773 instruct Repl4B_imm(vecS dst, immI con) %{
  1774   predicate(n->as_Vector()->length() == 4);
  1775   match(Set dst (ReplicateB con));
  1776   format %{ "movdl   $dst,[$constantaddress]\t! replicate4B($con)" %}
  1777   ins_encode %{
  1778     __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1)));
  1779   %}
  1780   ins_pipe( pipe_slow );
  1781 %}
  1783 instruct Repl8B_imm(vecD dst, immI con) %{
  1784   predicate(n->as_Vector()->length() == 8);
  1785   match(Set dst (ReplicateB con));
  1786   format %{ "movq    $dst,[$constantaddress]\t! replicate8B($con)" %}
  1787   ins_encode %{
  1788     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
  1789   %}
  1790   ins_pipe( pipe_slow );
  1791 %}
  1793 instruct Repl16B_imm(vecX dst, immI con) %{
  1794   predicate(n->as_Vector()->length() == 16);
  1795   match(Set dst (ReplicateB con));
  1796   format %{ "movq    $dst,[$constantaddress]\n\t"
  1797             "punpcklqdq $dst,$dst\t! replicate16B($con)" %}
  1798   ins_encode %{
  1799     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
  1800     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  1801   %}
  1802   ins_pipe( pipe_slow );
  1803 %}
  1805 instruct Repl32B_imm(vecY dst, immI con) %{
  1806   predicate(n->as_Vector()->length() == 32);
  1807   match(Set dst (ReplicateB con));
  1808   format %{ "movq    $dst,[$constantaddress]\n\t"
  1809             "punpcklqdq $dst,$dst\n\t"
  1810             "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %}
  1811   ins_encode %{
  1812     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
  1813     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  1814     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  1815   %}
  1816   ins_pipe( pipe_slow );
  1817 %}
  1819 // Replicate byte scalar zero to be vector
  1820 instruct Repl4B_zero(vecS dst, immI0 zero) %{
  1821   predicate(n->as_Vector()->length() == 4);
  1822   match(Set dst (ReplicateB zero));
  1823   format %{ "pxor    $dst,$dst\t! replicate4B zero" %}
  1824   ins_encode %{
  1825     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  1826   %}
  1827   ins_pipe( fpu_reg_reg );
  1828 %}
  1830 instruct Repl8B_zero(vecD dst, immI0 zero) %{
  1831   predicate(n->as_Vector()->length() == 8);
  1832   match(Set dst (ReplicateB zero));
  1833   format %{ "pxor    $dst,$dst\t! replicate8B zero" %}
  1834   ins_encode %{
  1835     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  1836   %}
  1837   ins_pipe( fpu_reg_reg );
  1838 %}
  1840 instruct Repl16B_zero(vecX dst, immI0 zero) %{
  1841   predicate(n->as_Vector()->length() == 16);
  1842   match(Set dst (ReplicateB zero));
  1843   format %{ "pxor    $dst,$dst\t! replicate16B zero" %}
  1844   ins_encode %{
  1845     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  1846   %}
  1847   ins_pipe( fpu_reg_reg );
  1848 %}
  1850 instruct Repl32B_zero(vecY dst, immI0 zero) %{
  1851   predicate(n->as_Vector()->length() == 32);
  1852   match(Set dst (ReplicateB zero));
  1853   format %{ "vpxor   $dst,$dst,$dst\t! replicate32B zero" %}
  1854   ins_encode %{
  1855     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
  1856     bool vector256 = true;
  1857     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
  1858   %}
  1859   ins_pipe( fpu_reg_reg );
  1860 %}
  1862 // Replicate char/short (2 byte) scalar to be vector
  1863 instruct Repl2S(vecS dst, rRegI src) %{
  1864   predicate(n->as_Vector()->length() == 2);
  1865   match(Set dst (ReplicateS src));
  1866   format %{ "movd    $dst,$src\n\t"
  1867             "pshuflw $dst,$dst,0x00\t! replicate2S" %}
  1868   ins_encode %{
  1869     __ movdl($dst$$XMMRegister, $src$$Register);
  1870     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  1871   %}
  1872   ins_pipe( fpu_reg_reg );
  1873 %}
  1875 instruct Repl4S(vecD dst, rRegI src) %{
  1876   predicate(n->as_Vector()->length() == 4);
  1877   match(Set dst (ReplicateS src));
  1878   format %{ "movd    $dst,$src\n\t"
  1879             "pshuflw $dst,$dst,0x00\t! replicate4S" %}
  1880   ins_encode %{
  1881     __ movdl($dst$$XMMRegister, $src$$Register);
  1882     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  1883   %}
  1884   ins_pipe( fpu_reg_reg );
  1885 %}
  1887 instruct Repl8S(vecX dst, rRegI src) %{
  1888   predicate(n->as_Vector()->length() == 8);
  1889   match(Set dst (ReplicateS src));
  1890   format %{ "movd    $dst,$src\n\t"
  1891             "pshuflw $dst,$dst,0x00\n\t"
  1892             "punpcklqdq $dst,$dst\t! replicate8S" %}
  1893   ins_encode %{
  1894     __ movdl($dst$$XMMRegister, $src$$Register);
  1895     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  1896     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  1897   %}
  1898   ins_pipe( pipe_slow );
  1899 %}
  1901 instruct Repl16S(vecY dst, rRegI src) %{
  1902   predicate(n->as_Vector()->length() == 16);
  1903   match(Set dst (ReplicateS src));
  1904   format %{ "movd    $dst,$src\n\t"
  1905             "pshuflw $dst,$dst,0x00\n\t"
  1906             "punpcklqdq $dst,$dst\n\t"
  1907             "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
  1908   ins_encode %{
  1909     __ movdl($dst$$XMMRegister, $src$$Register);
  1910     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  1911     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  1912     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  1913   %}
  1914   ins_pipe( pipe_slow );
  1915 %}
  1917 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table.
  1918 instruct Repl2S_imm(vecS dst, immI con) %{
  1919   predicate(n->as_Vector()->length() == 2);
  1920   match(Set dst (ReplicateS con));
  1921   format %{ "movdl   $dst,[$constantaddress]\t! replicate2S($con)" %}
  1922   ins_encode %{
  1923     __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2)));
  1924   %}
  1925   ins_pipe( fpu_reg_reg );
  1926 %}
  1928 instruct Repl4S_imm(vecD dst, immI con) %{
  1929   predicate(n->as_Vector()->length() == 4);
  1930   match(Set dst (ReplicateS con));
  1931   format %{ "movq    $dst,[$constantaddress]\t! replicate4S($con)" %}
  1932   ins_encode %{
  1933     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
  1934   %}
  1935   ins_pipe( fpu_reg_reg );
  1936 %}
  1938 instruct Repl8S_imm(vecX dst, immI con) %{
  1939   predicate(n->as_Vector()->length() == 8);
  1940   match(Set dst (ReplicateS con));
  1941   format %{ "movq    $dst,[$constantaddress]\n\t"
  1942             "punpcklqdq $dst,$dst\t! replicate8S($con)" %}
  1943   ins_encode %{
  1944     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
  1945     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  1946   %}
  1947   ins_pipe( pipe_slow );
  1948 %}
  1950 instruct Repl16S_imm(vecY dst, immI con) %{
  1951   predicate(n->as_Vector()->length() == 16);
  1952   match(Set dst (ReplicateS con));
  1953   format %{ "movq    $dst,[$constantaddress]\n\t"
  1954             "punpcklqdq $dst,$dst\n\t"
  1955             "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %}
  1956   ins_encode %{
  1957     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
  1958     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  1959     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  1960   %}
  1961   ins_pipe( pipe_slow );
  1962 %}
  1964 // Replicate char/short (2 byte) scalar zero to be vector
  1965 instruct Repl2S_zero(vecS dst, immI0 zero) %{
  1966   predicate(n->as_Vector()->length() == 2);
  1967   match(Set dst (ReplicateS zero));
  1968   format %{ "pxor    $dst,$dst\t! replicate2S zero" %}
  1969   ins_encode %{
  1970     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  1971   %}
  1972   ins_pipe( fpu_reg_reg );
  1973 %}
  1975 instruct Repl4S_zero(vecD dst, immI0 zero) %{
  1976   predicate(n->as_Vector()->length() == 4);
  1977   match(Set dst (ReplicateS zero));
  1978   format %{ "pxor    $dst,$dst\t! replicate4S zero" %}
  1979   ins_encode %{
  1980     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  1981   %}
  1982   ins_pipe( fpu_reg_reg );
  1983 %}
  1985 instruct Repl8S_zero(vecX dst, immI0 zero) %{
  1986   predicate(n->as_Vector()->length() == 8);
  1987   match(Set dst (ReplicateS zero));
  1988   format %{ "pxor    $dst,$dst\t! replicate8S zero" %}
  1989   ins_encode %{
  1990     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  1991   %}
  1992   ins_pipe( fpu_reg_reg );
  1993 %}
  1995 instruct Repl16S_zero(vecY dst, immI0 zero) %{
  1996   predicate(n->as_Vector()->length() == 16);
  1997   match(Set dst (ReplicateS zero));
  1998   format %{ "vpxor   $dst,$dst,$dst\t! replicate16S zero" %}
  1999   ins_encode %{
  2000     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
  2001     bool vector256 = true;
  2002     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
  2003   %}
  2004   ins_pipe( fpu_reg_reg );
  2005 %}
  2007 // Replicate integer (4 byte) scalar to be vector
  2008 instruct Repl2I(vecD dst, rRegI src) %{
  2009   predicate(n->as_Vector()->length() == 2);
  2010   match(Set dst (ReplicateI src));
  2011   format %{ "movd    $dst,$src\n\t"
  2012             "pshufd  $dst,$dst,0x00\t! replicate2I" %}
  2013   ins_encode %{
  2014     __ movdl($dst$$XMMRegister, $src$$Register);
  2015     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  2016   %}
  2017   ins_pipe( fpu_reg_reg );
  2018 %}
  2020 instruct Repl4I(vecX dst, rRegI src) %{
  2021   predicate(n->as_Vector()->length() == 4);
  2022   match(Set dst (ReplicateI src));
  2023   format %{ "movd    $dst,$src\n\t"
  2024             "pshufd  $dst,$dst,0x00\t! replicate4I" %}
  2025   ins_encode %{
  2026     __ movdl($dst$$XMMRegister, $src$$Register);
  2027     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  2028   %}
  2029   ins_pipe( pipe_slow );
  2030 %}
  2032 instruct Repl8I(vecY dst, rRegI src) %{
  2033   predicate(n->as_Vector()->length() == 8);
  2034   match(Set dst (ReplicateI src));
  2035   format %{ "movd    $dst,$src\n\t"
  2036             "pshufd  $dst,$dst,0x00\n\t"
  2037             "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
  2038   ins_encode %{
  2039     __ movdl($dst$$XMMRegister, $src$$Register);
  2040     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  2041     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2042   %}
  2043   ins_pipe( pipe_slow );
  2044 %}
  2046 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
  2047 instruct Repl2I_imm(vecD dst, immI con) %{
  2048   predicate(n->as_Vector()->length() == 2);
  2049   match(Set dst (ReplicateI con));
  2050   format %{ "movq    $dst,[$constantaddress]\t! replicate2I($con)" %}
  2051   ins_encode %{
  2052     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
  2053   %}
  2054   ins_pipe( fpu_reg_reg );
  2055 %}
  2057 instruct Repl4I_imm(vecX dst, immI con) %{
  2058   predicate(n->as_Vector()->length() == 4);
  2059   match(Set dst (ReplicateI con));
  2060   format %{ "movq    $dst,[$constantaddress]\t! replicate4I($con)\n\t"
  2061             "punpcklqdq $dst,$dst" %}
  2062   ins_encode %{
  2063     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
  2064     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  2065   %}
  2066   ins_pipe( pipe_slow );
  2067 %}
  2069 instruct Repl8I_imm(vecY dst, immI con) %{
  2070   predicate(n->as_Vector()->length() == 8);
  2071   match(Set dst (ReplicateI con));
  2072   format %{ "movq    $dst,[$constantaddress]\t! replicate8I($con)\n\t"
  2073             "punpcklqdq $dst,$dst\n\t"
  2074             "vinserti128h $dst,$dst,$dst" %}
  2075   ins_encode %{
  2076     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
  2077     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  2078     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2079   %}
  2080   ins_pipe( pipe_slow );
  2081 %}
  2083 // Integer could be loaded into xmm register directly from memory.
  2084 instruct Repl2I_mem(vecD dst, memory mem) %{
  2085   predicate(n->as_Vector()->length() == 2);
  2086   match(Set dst (ReplicateI (LoadI mem)));
  2087   format %{ "movd    $dst,$mem\n\t"
  2088             "pshufd  $dst,$dst,0x00\t! replicate2I" %}
  2089   ins_encode %{
  2090     __ movdl($dst$$XMMRegister, $mem$$Address);
  2091     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  2092   %}
  2093   ins_pipe( fpu_reg_reg );
  2094 %}
  2096 instruct Repl4I_mem(vecX dst, memory mem) %{
  2097   predicate(n->as_Vector()->length() == 4);
  2098   match(Set dst (ReplicateI (LoadI mem)));
  2099   format %{ "movd    $dst,$mem\n\t"
  2100             "pshufd  $dst,$dst,0x00\t! replicate4I" %}
  2101   ins_encode %{
  2102     __ movdl($dst$$XMMRegister, $mem$$Address);
  2103     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  2104   %}
  2105   ins_pipe( pipe_slow );
  2106 %}
  2108 instruct Repl8I_mem(vecY dst, memory mem) %{
  2109   predicate(n->as_Vector()->length() == 8);
  2110   match(Set dst (ReplicateI (LoadI mem)));
  2111   format %{ "movd    $dst,$mem\n\t"
  2112             "pshufd  $dst,$dst,0x00\n\t"
  2113             "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
  2114   ins_encode %{
  2115     __ movdl($dst$$XMMRegister, $mem$$Address);
  2116     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
  2117     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2118   %}
  2119   ins_pipe( pipe_slow );
  2120 %}
  2122 // Replicate integer (4 byte) scalar zero to be vector
  2123 instruct Repl2I_zero(vecD dst, immI0 zero) %{
  2124   predicate(n->as_Vector()->length() == 2);
  2125   match(Set dst (ReplicateI zero));
  2126   format %{ "pxor    $dst,$dst\t! replicate2I" %}
  2127   ins_encode %{
  2128     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  2129   %}
  2130   ins_pipe( fpu_reg_reg );
  2131 %}
  2133 instruct Repl4I_zero(vecX dst, immI0 zero) %{
  2134   predicate(n->as_Vector()->length() == 4);
  2135   match(Set dst (ReplicateI zero));
  2136   format %{ "pxor    $dst,$dst\t! replicate4I zero)" %}
  2137   ins_encode %{
  2138     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  2139   %}
  2140   ins_pipe( fpu_reg_reg );
  2141 %}
  2143 instruct Repl8I_zero(vecY dst, immI0 zero) %{
  2144   predicate(n->as_Vector()->length() == 8);
  2145   match(Set dst (ReplicateI zero));
  2146   format %{ "vpxor   $dst,$dst,$dst\t! replicate8I zero" %}
  2147   ins_encode %{
  2148     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
  2149     bool vector256 = true;
  2150     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
  2151   %}
  2152   ins_pipe( fpu_reg_reg );
  2153 %}
  2155 // Replicate long (8 byte) scalar to be vector
  2156 #ifdef _LP64
  2157 instruct Repl2L(vecX dst, rRegL src) %{
  2158   predicate(n->as_Vector()->length() == 2);
  2159   match(Set dst (ReplicateL src));
  2160   format %{ "movdq   $dst,$src\n\t"
  2161             "punpcklqdq $dst,$dst\t! replicate2L" %}
  2162   ins_encode %{
  2163     __ movdq($dst$$XMMRegister, $src$$Register);
  2164     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  2165   %}
  2166   ins_pipe( pipe_slow );
  2167 %}
  2169 instruct Repl4L(vecY dst, rRegL src) %{
  2170   predicate(n->as_Vector()->length() == 4);
  2171   match(Set dst (ReplicateL src));
  2172   format %{ "movdq   $dst,$src\n\t"
  2173             "punpcklqdq $dst,$dst\n\t"
  2174             "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
  2175   ins_encode %{
  2176     __ movdq($dst$$XMMRegister, $src$$Register);
  2177     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  2178     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2179   %}
  2180   ins_pipe( pipe_slow );
  2181 %}
  2182 #else // _LP64
  2183 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{
  2184   predicate(n->as_Vector()->length() == 2);
  2185   match(Set dst (ReplicateL src));
  2186   effect(TEMP dst, USE src, TEMP tmp);
  2187   format %{ "movdl   $dst,$src.lo\n\t"
  2188             "movdl   $tmp,$src.hi\n\t"
  2189             "punpckldq $dst,$tmp\n\t"
  2190             "punpcklqdq $dst,$dst\t! replicate2L"%}
  2191   ins_encode %{
  2192     __ movdl($dst$$XMMRegister, $src$$Register);
  2193     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
  2194     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
  2195     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  2196   %}
  2197   ins_pipe( pipe_slow );
  2198 %}
  2200 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
  2201   predicate(n->as_Vector()->length() == 4);
  2202   match(Set dst (ReplicateL src));
  2203   effect(TEMP dst, USE src, TEMP tmp);
  2204   format %{ "movdl   $dst,$src.lo\n\t"
  2205             "movdl   $tmp,$src.hi\n\t"
  2206             "punpckldq $dst,$tmp\n\t"
  2207             "punpcklqdq $dst,$dst\n\t"
  2208             "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
  2209   ins_encode %{
  2210     __ movdl($dst$$XMMRegister, $src$$Register);
  2211     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
  2212     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
  2213     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  2214     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2215   %}
  2216   ins_pipe( pipe_slow );
  2217 %}
  2218 #endif // _LP64
  2220 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
  2221 instruct Repl2L_imm(vecX dst, immL con) %{
  2222   predicate(n->as_Vector()->length() == 2);
  2223   match(Set dst (ReplicateL con));
  2224   format %{ "movq    $dst,[$constantaddress]\n\t"
  2225             "punpcklqdq $dst,$dst\t! replicate2L($con)" %}
  2226   ins_encode %{
  2227     __ movq($dst$$XMMRegister, $constantaddress($con));
  2228     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  2229   %}
  2230   ins_pipe( pipe_slow );
  2231 %}
  2233 instruct Repl4L_imm(vecY dst, immL con) %{
  2234   predicate(n->as_Vector()->length() == 4);
  2235   match(Set dst (ReplicateL con));
  2236   format %{ "movq    $dst,[$constantaddress]\n\t"
  2237             "punpcklqdq $dst,$dst\n\t"
  2238             "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %}
  2239   ins_encode %{
  2240     __ movq($dst$$XMMRegister, $constantaddress($con));
  2241     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  2242     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2243   %}
  2244   ins_pipe( pipe_slow );
  2245 %}
  2247 // Long could be loaded into xmm register directly from memory.
  2248 instruct Repl2L_mem(vecX dst, memory mem) %{
  2249   predicate(n->as_Vector()->length() == 2);
  2250   match(Set dst (ReplicateL (LoadL mem)));
  2251   format %{ "movq    $dst,$mem\n\t"
  2252             "punpcklqdq $dst,$dst\t! replicate2L" %}
  2253   ins_encode %{
  2254     __ movq($dst$$XMMRegister, $mem$$Address);
  2255     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  2256   %}
  2257   ins_pipe( pipe_slow );
  2258 %}
  2260 instruct Repl4L_mem(vecY dst, memory mem) %{
  2261   predicate(n->as_Vector()->length() == 4);
  2262   match(Set dst (ReplicateL (LoadL mem)));
  2263   format %{ "movq    $dst,$mem\n\t"
  2264             "punpcklqdq $dst,$dst\n\t"
  2265             "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
  2266   ins_encode %{
  2267     __ movq($dst$$XMMRegister, $mem$$Address);
  2268     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
  2269     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2270   %}
  2271   ins_pipe( pipe_slow );
  2272 %}
  2274 // Replicate long (8 byte) scalar zero to be vector
  2275 instruct Repl2L_zero(vecX dst, immL0 zero) %{
  2276   predicate(n->as_Vector()->length() == 2);
  2277   match(Set dst (ReplicateL zero));
  2278   format %{ "pxor    $dst,$dst\t! replicate2L zero" %}
  2279   ins_encode %{
  2280     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
  2281   %}
  2282   ins_pipe( fpu_reg_reg );
  2283 %}
  2285 instruct Repl4L_zero(vecY dst, immL0 zero) %{
  2286   predicate(n->as_Vector()->length() == 4);
  2287   match(Set dst (ReplicateL zero));
  2288   format %{ "vpxor   $dst,$dst,$dst\t! replicate4L zero" %}
  2289   ins_encode %{
  2290     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
  2291     bool vector256 = true;
  2292     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
  2293   %}
  2294   ins_pipe( fpu_reg_reg );
  2295 %}
  2297 // Replicate float (4 byte) scalar to be vector
  2298 instruct Repl2F(vecD dst, regF src) %{
  2299   predicate(n->as_Vector()->length() == 2);
  2300   match(Set dst (ReplicateF src));
  2301   format %{ "pshufd  $dst,$dst,0x00\t! replicate2F" %}
  2302   ins_encode %{
  2303     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
  2304   %}
  2305   ins_pipe( fpu_reg_reg );
  2306 %}
  2308 instruct Repl4F(vecX dst, regF src) %{
  2309   predicate(n->as_Vector()->length() == 4);
  2310   match(Set dst (ReplicateF src));
  2311   format %{ "pshufd  $dst,$dst,0x00\t! replicate4F" %}
  2312   ins_encode %{
  2313     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
  2314   %}
  2315   ins_pipe( pipe_slow );
  2316 %}
  2318 instruct Repl8F(vecY dst, regF src) %{
  2319   predicate(n->as_Vector()->length() == 8);
  2320   match(Set dst (ReplicateF src));
  2321   format %{ "pshufd  $dst,$src,0x00\n\t"
  2322             "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
  2323   ins_encode %{
  2324     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
  2325     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2326   %}
  2327   ins_pipe( pipe_slow );
  2328 %}
  2330 // Replicate float (4 byte) scalar zero to be vector
  2331 instruct Repl2F_zero(vecD dst, immF0 zero) %{
  2332   predicate(n->as_Vector()->length() == 2);
  2333   match(Set dst (ReplicateF zero));
  2334   format %{ "xorps   $dst,$dst\t! replicate2F zero" %}
  2335   ins_encode %{
  2336     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
  2337   %}
  2338   ins_pipe( fpu_reg_reg );
  2339 %}
  2341 instruct Repl4F_zero(vecX dst, immF0 zero) %{
  2342   predicate(n->as_Vector()->length() == 4);
  2343   match(Set dst (ReplicateF zero));
  2344   format %{ "xorps   $dst,$dst\t! replicate4F zero" %}
  2345   ins_encode %{
  2346     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
  2347   %}
  2348   ins_pipe( fpu_reg_reg );
  2349 %}
  2351 instruct Repl8F_zero(vecY dst, immF0 zero) %{
  2352   predicate(n->as_Vector()->length() == 8);
  2353   match(Set dst (ReplicateF zero));
  2354   format %{ "vxorps  $dst,$dst,$dst\t! replicate8F zero" %}
  2355   ins_encode %{
  2356     bool vector256 = true;
  2357     __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
  2358   %}
  2359   ins_pipe( fpu_reg_reg );
  2360 %}
  2362 // Replicate double (8 bytes) scalar to be vector
  2363 instruct Repl2D(vecX dst, regD src) %{
  2364   predicate(n->as_Vector()->length() == 2);
  2365   match(Set dst (ReplicateD src));
  2366   format %{ "pshufd  $dst,$src,0x44\t! replicate2D" %}
  2367   ins_encode %{
  2368     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
  2369   %}
  2370   ins_pipe( pipe_slow );
  2371 %}
  2373 instruct Repl4D(vecY dst, regD src) %{
  2374   predicate(n->as_Vector()->length() == 4);
  2375   match(Set dst (ReplicateD src));
  2376   format %{ "pshufd  $dst,$src,0x44\n\t"
  2377             "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
  2378   ins_encode %{
  2379     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
  2380     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
  2381   %}
  2382   ins_pipe( pipe_slow );
  2383 %}
  2385 // Replicate double (8 byte) scalar zero to be vector
  2386 instruct Repl2D_zero(vecX dst, immD0 zero) %{
  2387   predicate(n->as_Vector()->length() == 2);
  2388   match(Set dst (ReplicateD zero));
  2389   format %{ "xorpd   $dst,$dst\t! replicate2D zero" %}
  2390   ins_encode %{
  2391     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
  2392   %}
  2393   ins_pipe( fpu_reg_reg );
  2394 %}
  2396 instruct Repl4D_zero(vecY dst, immD0 zero) %{
  2397   predicate(n->as_Vector()->length() == 4);
  2398   match(Set dst (ReplicateD zero));
  2399   format %{ "vxorpd  $dst,$dst,$dst,vect256\t! replicate4D zero" %}
  2400   ins_encode %{
  2401     bool vector256 = true;
  2402     __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
  2403   %}
  2404   ins_pipe( fpu_reg_reg );
  2405 %}
  2407 // ====================VECTOR ARITHMETIC=======================================
  2409 // --------------------------------- ADD --------------------------------------
  2411 // Bytes vector add
  2412 instruct vadd4B(vecS dst, vecS src) %{
  2413   predicate(n->as_Vector()->length() == 4);
  2414   match(Set dst (AddVB dst src));
  2415   format %{ "paddb   $dst,$src\t! add packed4B" %}
  2416   ins_encode %{
  2417     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
  2418   %}
  2419   ins_pipe( pipe_slow );
  2420 %}
  2422 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{
  2423   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  2424   match(Set dst (AddVB src1 src2));
  2425   format %{ "vpaddb  $dst,$src1,$src2\t! add packed4B" %}
  2426   ins_encode %{
  2427     bool vector256 = false;
  2428     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2429   %}
  2430   ins_pipe( pipe_slow );
  2431 %}
  2433 instruct vadd8B(vecD dst, vecD src) %{
  2434   predicate(n->as_Vector()->length() == 8);
  2435   match(Set dst (AddVB dst src));
  2436   format %{ "paddb   $dst,$src\t! add packed8B" %}
  2437   ins_encode %{
  2438     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
  2439   %}
  2440   ins_pipe( pipe_slow );
  2441 %}
  2443 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
  2444   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  2445   match(Set dst (AddVB src1 src2));
  2446   format %{ "vpaddb  $dst,$src1,$src2\t! add packed8B" %}
  2447   ins_encode %{
  2448     bool vector256 = false;
  2449     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2450   %}
  2451   ins_pipe( pipe_slow );
  2452 %}
  2454 instruct vadd16B(vecX dst, vecX src) %{
  2455   predicate(n->as_Vector()->length() == 16);
  2456   match(Set dst (AddVB dst src));
  2457   format %{ "paddb   $dst,$src\t! add packed16B" %}
  2458   ins_encode %{
  2459     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
  2460   %}
  2461   ins_pipe( pipe_slow );
  2462 %}
  2464 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
  2465   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
  2466   match(Set dst (AddVB src1 src2));
  2467   format %{ "vpaddb  $dst,$src1,$src2\t! add packed16B" %}
  2468   ins_encode %{
  2469     bool vector256 = false;
  2470     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2471   %}
  2472   ins_pipe( pipe_slow );
  2473 %}
  2475 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{
  2476   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
  2477   match(Set dst (AddVB src (LoadVector mem)));
  2478   format %{ "vpaddb  $dst,$src,$mem\t! add packed16B" %}
  2479   ins_encode %{
  2480     bool vector256 = false;
  2481     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2482   %}
  2483   ins_pipe( pipe_slow );
  2484 %}
  2486 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{
  2487   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
  2488   match(Set dst (AddVB src1 src2));
  2489   format %{ "vpaddb  $dst,$src1,$src2\t! add packed32B" %}
  2490   ins_encode %{
  2491     bool vector256 = true;
  2492     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2493   %}
  2494   ins_pipe( pipe_slow );
  2495 %}
  2497 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{
  2498   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
  2499   match(Set dst (AddVB src (LoadVector mem)));
  2500   format %{ "vpaddb  $dst,$src,$mem\t! add packed32B" %}
  2501   ins_encode %{
  2502     bool vector256 = true;
  2503     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2504   %}
  2505   ins_pipe( pipe_slow );
  2506 %}
  2508 // Shorts/Chars vector add
  2509 instruct vadd2S(vecS dst, vecS src) %{
  2510   predicate(n->as_Vector()->length() == 2);
  2511   match(Set dst (AddVS dst src));
  2512   format %{ "paddw   $dst,$src\t! add packed2S" %}
  2513   ins_encode %{
  2514     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
  2515   %}
  2516   ins_pipe( pipe_slow );
  2517 %}
  2519 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{
  2520   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  2521   match(Set dst (AddVS src1 src2));
  2522   format %{ "vpaddw  $dst,$src1,$src2\t! add packed2S" %}
  2523   ins_encode %{
  2524     bool vector256 = false;
  2525     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2526   %}
  2527   ins_pipe( pipe_slow );
  2528 %}
  2530 instruct vadd4S(vecD dst, vecD src) %{
  2531   predicate(n->as_Vector()->length() == 4);
  2532   match(Set dst (AddVS dst src));
  2533   format %{ "paddw   $dst,$src\t! add packed4S" %}
  2534   ins_encode %{
  2535     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
  2536   %}
  2537   ins_pipe( pipe_slow );
  2538 %}
  2540 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
  2541   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  2542   match(Set dst (AddVS src1 src2));
  2543   format %{ "vpaddw  $dst,$src1,$src2\t! add packed4S" %}
  2544   ins_encode %{
  2545     bool vector256 = false;
  2546     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2547   %}
  2548   ins_pipe( pipe_slow );
  2549 %}
  2551 instruct vadd8S(vecX dst, vecX src) %{
  2552   predicate(n->as_Vector()->length() == 8);
  2553   match(Set dst (AddVS dst src));
  2554   format %{ "paddw   $dst,$src\t! add packed8S" %}
  2555   ins_encode %{
  2556     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
  2557   %}
  2558   ins_pipe( pipe_slow );
  2559 %}
  2561 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
  2562   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  2563   match(Set dst (AddVS src1 src2));
  2564   format %{ "vpaddw  $dst,$src1,$src2\t! add packed8S" %}
  2565   ins_encode %{
  2566     bool vector256 = false;
  2567     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2568   %}
  2569   ins_pipe( pipe_slow );
  2570 %}
  2572 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{
  2573   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  2574   match(Set dst (AddVS src (LoadVector mem)));
  2575   format %{ "vpaddw  $dst,$src,$mem\t! add packed8S" %}
  2576   ins_encode %{
  2577     bool vector256 = false;
  2578     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2579   %}
  2580   ins_pipe( pipe_slow );
  2581 %}
  2583 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{
  2584   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  2585   match(Set dst (AddVS src1 src2));
  2586   format %{ "vpaddw  $dst,$src1,$src2\t! add packed16S" %}
  2587   ins_encode %{
  2588     bool vector256 = true;
  2589     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2590   %}
  2591   ins_pipe( pipe_slow );
  2592 %}
  2594 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{
  2595   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  2596   match(Set dst (AddVS src (LoadVector mem)));
  2597   format %{ "vpaddw  $dst,$src,$mem\t! add packed16S" %}
  2598   ins_encode %{
  2599     bool vector256 = true;
  2600     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2601   %}
  2602   ins_pipe( pipe_slow );
  2603 %}
  2605 // Integers vector add
  2606 instruct vadd2I(vecD dst, vecD src) %{
  2607   predicate(n->as_Vector()->length() == 2);
  2608   match(Set dst (AddVI dst src));
  2609   format %{ "paddd   $dst,$src\t! add packed2I" %}
  2610   ins_encode %{
  2611     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
  2612   %}
  2613   ins_pipe( pipe_slow );
  2614 %}
  2616 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{
  2617   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  2618   match(Set dst (AddVI src1 src2));
  2619   format %{ "vpaddd  $dst,$src1,$src2\t! add packed2I" %}
  2620   ins_encode %{
  2621     bool vector256 = false;
  2622     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2623   %}
  2624   ins_pipe( pipe_slow );
  2625 %}
  2627 instruct vadd4I(vecX dst, vecX src) %{
  2628   predicate(n->as_Vector()->length() == 4);
  2629   match(Set dst (AddVI dst src));
  2630   format %{ "paddd   $dst,$src\t! add packed4I" %}
  2631   ins_encode %{
  2632     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
  2633   %}
  2634   ins_pipe( pipe_slow );
  2635 %}
  2637 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
  2638   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  2639   match(Set dst (AddVI src1 src2));
  2640   format %{ "vpaddd  $dst,$src1,$src2\t! add packed4I" %}
  2641   ins_encode %{
  2642     bool vector256 = false;
  2643     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2644   %}
  2645   ins_pipe( pipe_slow );
  2646 %}
  2648 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{
  2649   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  2650   match(Set dst (AddVI src (LoadVector mem)));
  2651   format %{ "vpaddd  $dst,$src,$mem\t! add packed4I" %}
  2652   ins_encode %{
  2653     bool vector256 = false;
  2654     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2655   %}
  2656   ins_pipe( pipe_slow );
  2657 %}
  2659 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{
  2660   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  2661   match(Set dst (AddVI src1 src2));
  2662   format %{ "vpaddd  $dst,$src1,$src2\t! add packed8I" %}
  2663   ins_encode %{
  2664     bool vector256 = true;
  2665     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2666   %}
  2667   ins_pipe( pipe_slow );
  2668 %}
  2670 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{
  2671   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  2672   match(Set dst (AddVI src (LoadVector mem)));
  2673   format %{ "vpaddd  $dst,$src,$mem\t! add packed8I" %}
  2674   ins_encode %{
  2675     bool vector256 = true;
  2676     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2677   %}
  2678   ins_pipe( pipe_slow );
  2679 %}
  2681 // Longs vector add
  2682 instruct vadd2L(vecX dst, vecX src) %{
  2683   predicate(n->as_Vector()->length() == 2);
  2684   match(Set dst (AddVL dst src));
  2685   format %{ "paddq   $dst,$src\t! add packed2L" %}
  2686   ins_encode %{
  2687     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
  2688   %}
  2689   ins_pipe( pipe_slow );
  2690 %}
  2692 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
  2693   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  2694   match(Set dst (AddVL src1 src2));
  2695   format %{ "vpaddq  $dst,$src1,$src2\t! add packed2L" %}
  2696   ins_encode %{
  2697     bool vector256 = false;
  2698     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2699   %}
  2700   ins_pipe( pipe_slow );
  2701 %}
  2703 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{
  2704   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  2705   match(Set dst (AddVL src (LoadVector mem)));
  2706   format %{ "vpaddq  $dst,$src,$mem\t! add packed2L" %}
  2707   ins_encode %{
  2708     bool vector256 = false;
  2709     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2710   %}
  2711   ins_pipe( pipe_slow );
  2712 %}
  2714 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{
  2715   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
  2716   match(Set dst (AddVL src1 src2));
  2717   format %{ "vpaddq  $dst,$src1,$src2\t! add packed4L" %}
  2718   ins_encode %{
  2719     bool vector256 = true;
  2720     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2721   %}
  2722   ins_pipe( pipe_slow );
  2723 %}
  2725 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{
  2726   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
  2727   match(Set dst (AddVL src (LoadVector mem)));
  2728   format %{ "vpaddq  $dst,$src,$mem\t! add packed4L" %}
  2729   ins_encode %{
  2730     bool vector256 = true;
  2731     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2732   %}
  2733   ins_pipe( pipe_slow );
  2734 %}
  2736 // Floats vector add
  2737 instruct vadd2F(vecD dst, vecD src) %{
  2738   predicate(n->as_Vector()->length() == 2);
  2739   match(Set dst (AddVF dst src));
  2740   format %{ "addps   $dst,$src\t! add packed2F" %}
  2741   ins_encode %{
  2742     __ addps($dst$$XMMRegister, $src$$XMMRegister);
  2743   %}
  2744   ins_pipe( pipe_slow );
  2745 %}
  2747 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{
  2748   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  2749   match(Set dst (AddVF src1 src2));
  2750   format %{ "vaddps  $dst,$src1,$src2\t! add packed2F" %}
  2751   ins_encode %{
  2752     bool vector256 = false;
  2753     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2754   %}
  2755   ins_pipe( pipe_slow );
  2756 %}
  2758 instruct vadd4F(vecX dst, vecX src) %{
  2759   predicate(n->as_Vector()->length() == 4);
  2760   match(Set dst (AddVF dst src));
  2761   format %{ "addps   $dst,$src\t! add packed4F" %}
  2762   ins_encode %{
  2763     __ addps($dst$$XMMRegister, $src$$XMMRegister);
  2764   %}
  2765   ins_pipe( pipe_slow );
  2766 %}
  2768 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
  2769   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  2770   match(Set dst (AddVF src1 src2));
  2771   format %{ "vaddps  $dst,$src1,$src2\t! add packed4F" %}
  2772   ins_encode %{
  2773     bool vector256 = false;
  2774     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2775   %}
  2776   ins_pipe( pipe_slow );
  2777 %}
  2779 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{
  2780   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  2781   match(Set dst (AddVF src (LoadVector mem)));
  2782   format %{ "vaddps  $dst,$src,$mem\t! add packed4F" %}
  2783   ins_encode %{
  2784     bool vector256 = false;
  2785     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2786   %}
  2787   ins_pipe( pipe_slow );
  2788 %}
  2790 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{
  2791   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  2792   match(Set dst (AddVF src1 src2));
  2793   format %{ "vaddps  $dst,$src1,$src2\t! add packed8F" %}
  2794   ins_encode %{
  2795     bool vector256 = true;
  2796     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2797   %}
  2798   ins_pipe( pipe_slow );
  2799 %}
  2801 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{
  2802   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  2803   match(Set dst (AddVF src (LoadVector mem)));
  2804   format %{ "vaddps  $dst,$src,$mem\t! add packed8F" %}
  2805   ins_encode %{
  2806     bool vector256 = true;
  2807     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2808   %}
  2809   ins_pipe( pipe_slow );
  2810 %}
  2812 // Doubles vector add
  2813 instruct vadd2D(vecX dst, vecX src) %{
  2814   predicate(n->as_Vector()->length() == 2);
  2815   match(Set dst (AddVD dst src));
  2816   format %{ "addpd   $dst,$src\t! add packed2D" %}
  2817   ins_encode %{
  2818     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
  2819   %}
  2820   ins_pipe( pipe_slow );
  2821 %}
  2823 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
  2824   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  2825   match(Set dst (AddVD src1 src2));
  2826   format %{ "vaddpd  $dst,$src1,$src2\t! add packed2D" %}
  2827   ins_encode %{
  2828     bool vector256 = false;
  2829     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2830   %}
  2831   ins_pipe( pipe_slow );
  2832 %}
  2834 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{
  2835   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  2836   match(Set dst (AddVD src (LoadVector mem)));
  2837   format %{ "vaddpd  $dst,$src,$mem\t! add packed2D" %}
  2838   ins_encode %{
  2839     bool vector256 = false;
  2840     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2841   %}
  2842   ins_pipe( pipe_slow );
  2843 %}
  2845 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{
  2846   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  2847   match(Set dst (AddVD src1 src2));
  2848   format %{ "vaddpd  $dst,$src1,$src2\t! add packed4D" %}
  2849   ins_encode %{
  2850     bool vector256 = true;
  2851     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2852   %}
  2853   ins_pipe( pipe_slow );
  2854 %}
  2856 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{
  2857   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  2858   match(Set dst (AddVD src (LoadVector mem)));
  2859   format %{ "vaddpd  $dst,$src,$mem\t! add packed4D" %}
  2860   ins_encode %{
  2861     bool vector256 = true;
  2862     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2863   %}
  2864   ins_pipe( pipe_slow );
  2865 %}
  2867 // --------------------------------- SUB --------------------------------------
  2869 // Bytes vector sub
  2870 instruct vsub4B(vecS dst, vecS src) %{
  2871   predicate(n->as_Vector()->length() == 4);
  2872   match(Set dst (SubVB dst src));
  2873   format %{ "psubb   $dst,$src\t! sub packed4B" %}
  2874   ins_encode %{
  2875     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
  2876   %}
  2877   ins_pipe( pipe_slow );
  2878 %}
  2880 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{
  2881   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  2882   match(Set dst (SubVB src1 src2));
  2883   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed4B" %}
  2884   ins_encode %{
  2885     bool vector256 = false;
  2886     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2887   %}
  2888   ins_pipe( pipe_slow );
  2889 %}
  2891 instruct vsub8B(vecD dst, vecD src) %{
  2892   predicate(n->as_Vector()->length() == 8);
  2893   match(Set dst (SubVB dst src));
  2894   format %{ "psubb   $dst,$src\t! sub packed8B" %}
  2895   ins_encode %{
  2896     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
  2897   %}
  2898   ins_pipe( pipe_slow );
  2899 %}
  2901 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{
  2902   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  2903   match(Set dst (SubVB src1 src2));
  2904   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed8B" %}
  2905   ins_encode %{
  2906     bool vector256 = false;
  2907     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2908   %}
  2909   ins_pipe( pipe_slow );
  2910 %}
  2912 instruct vsub16B(vecX dst, vecX src) %{
  2913   predicate(n->as_Vector()->length() == 16);
  2914   match(Set dst (SubVB dst src));
  2915   format %{ "psubb   $dst,$src\t! sub packed16B" %}
  2916   ins_encode %{
  2917     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
  2918   %}
  2919   ins_pipe( pipe_slow );
  2920 %}
  2922 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
  2923   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
  2924   match(Set dst (SubVB src1 src2));
  2925   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed16B" %}
  2926   ins_encode %{
  2927     bool vector256 = false;
  2928     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2929   %}
  2930   ins_pipe( pipe_slow );
  2931 %}
  2933 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{
  2934   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
  2935   match(Set dst (SubVB src (LoadVector mem)));
  2936   format %{ "vpsubb  $dst,$src,$mem\t! sub packed16B" %}
  2937   ins_encode %{
  2938     bool vector256 = false;
  2939     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2940   %}
  2941   ins_pipe( pipe_slow );
  2942 %}
  2944 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{
  2945   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
  2946   match(Set dst (SubVB src1 src2));
  2947   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed32B" %}
  2948   ins_encode %{
  2949     bool vector256 = true;
  2950     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2951   %}
  2952   ins_pipe( pipe_slow );
  2953 %}
  2955 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{
  2956   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
  2957   match(Set dst (SubVB src (LoadVector mem)));
  2958   format %{ "vpsubb  $dst,$src,$mem\t! sub packed32B" %}
  2959   ins_encode %{
  2960     bool vector256 = true;
  2961     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  2962   %}
  2963   ins_pipe( pipe_slow );
  2964 %}
  2966 // Shorts/Chars vector sub
  2967 instruct vsub2S(vecS dst, vecS src) %{
  2968   predicate(n->as_Vector()->length() == 2);
  2969   match(Set dst (SubVS dst src));
  2970   format %{ "psubw   $dst,$src\t! sub packed2S" %}
  2971   ins_encode %{
  2972     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
  2973   %}
  2974   ins_pipe( pipe_slow );
  2975 %}
  2977 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{
  2978   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  2979   match(Set dst (SubVS src1 src2));
  2980   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed2S" %}
  2981   ins_encode %{
  2982     bool vector256 = false;
  2983     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  2984   %}
  2985   ins_pipe( pipe_slow );
  2986 %}
  2988 instruct vsub4S(vecD dst, vecD src) %{
  2989   predicate(n->as_Vector()->length() == 4);
  2990   match(Set dst (SubVS dst src));
  2991   format %{ "psubw   $dst,$src\t! sub packed4S" %}
  2992   ins_encode %{
  2993     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
  2994   %}
  2995   ins_pipe( pipe_slow );
  2996 %}
  2998 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{
  2999   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3000   match(Set dst (SubVS src1 src2));
  3001   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed4S" %}
  3002   ins_encode %{
  3003     bool vector256 = false;
  3004     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3005   %}
  3006   ins_pipe( pipe_slow );
  3007 %}
  3009 instruct vsub8S(vecX dst, vecX src) %{
  3010   predicate(n->as_Vector()->length() == 8);
  3011   match(Set dst (SubVS dst src));
  3012   format %{ "psubw   $dst,$src\t! sub packed8S" %}
  3013   ins_encode %{
  3014     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
  3015   %}
  3016   ins_pipe( pipe_slow );
  3017 %}
  3019 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
  3020   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3021   match(Set dst (SubVS src1 src2));
  3022   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed8S" %}
  3023   ins_encode %{
  3024     bool vector256 = false;
  3025     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3026   %}
  3027   ins_pipe( pipe_slow );
  3028 %}
  3030 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{
  3031   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3032   match(Set dst (SubVS src (LoadVector mem)));
  3033   format %{ "vpsubw  $dst,$src,$mem\t! sub packed8S" %}
  3034   ins_encode %{
  3035     bool vector256 = false;
  3036     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3037   %}
  3038   ins_pipe( pipe_slow );
  3039 %}
  3041 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{
  3042   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  3043   match(Set dst (SubVS src1 src2));
  3044   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed16S" %}
  3045   ins_encode %{
  3046     bool vector256 = true;
  3047     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3048   %}
  3049   ins_pipe( pipe_slow );
  3050 %}
  3052 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{
  3053   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  3054   match(Set dst (SubVS src (LoadVector mem)));
  3055   format %{ "vpsubw  $dst,$src,$mem\t! sub packed16S" %}
  3056   ins_encode %{
  3057     bool vector256 = true;
  3058     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3059   %}
  3060   ins_pipe( pipe_slow );
  3061 %}
  3063 // Integers vector sub
  3064 instruct vsub2I(vecD dst, vecD src) %{
  3065   predicate(n->as_Vector()->length() == 2);
  3066   match(Set dst (SubVI dst src));
  3067   format %{ "psubd   $dst,$src\t! sub packed2I" %}
  3068   ins_encode %{
  3069     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
  3070   %}
  3071   ins_pipe( pipe_slow );
  3072 %}
  3074 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{
  3075   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3076   match(Set dst (SubVI src1 src2));
  3077   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed2I" %}
  3078   ins_encode %{
  3079     bool vector256 = false;
  3080     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3081   %}
  3082   ins_pipe( pipe_slow );
  3083 %}
  3085 instruct vsub4I(vecX dst, vecX src) %{
  3086   predicate(n->as_Vector()->length() == 4);
  3087   match(Set dst (SubVI dst src));
  3088   format %{ "psubd   $dst,$src\t! sub packed4I" %}
  3089   ins_encode %{
  3090     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
  3091   %}
  3092   ins_pipe( pipe_slow );
  3093 %}
  3095 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
  3096   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3097   match(Set dst (SubVI src1 src2));
  3098   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed4I" %}
  3099   ins_encode %{
  3100     bool vector256 = false;
  3101     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3102   %}
  3103   ins_pipe( pipe_slow );
  3104 %}
  3106 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{
  3107   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3108   match(Set dst (SubVI src (LoadVector mem)));
  3109   format %{ "vpsubd  $dst,$src,$mem\t! sub packed4I" %}
  3110   ins_encode %{
  3111     bool vector256 = false;
  3112     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3113   %}
  3114   ins_pipe( pipe_slow );
  3115 %}
  3117 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{
  3118   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  3119   match(Set dst (SubVI src1 src2));
  3120   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed8I" %}
  3121   ins_encode %{
  3122     bool vector256 = true;
  3123     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3124   %}
  3125   ins_pipe( pipe_slow );
  3126 %}
  3128 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{
  3129   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  3130   match(Set dst (SubVI src (LoadVector mem)));
  3131   format %{ "vpsubd  $dst,$src,$mem\t! sub packed8I" %}
  3132   ins_encode %{
  3133     bool vector256 = true;
  3134     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3135   %}
  3136   ins_pipe( pipe_slow );
  3137 %}
  3139 // Longs vector sub
  3140 instruct vsub2L(vecX dst, vecX src) %{
  3141   predicate(n->as_Vector()->length() == 2);
  3142   match(Set dst (SubVL dst src));
  3143   format %{ "psubq   $dst,$src\t! sub packed2L" %}
  3144   ins_encode %{
  3145     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
  3146   %}
  3147   ins_pipe( pipe_slow );
  3148 %}
  3150 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
  3151   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3152   match(Set dst (SubVL src1 src2));
  3153   format %{ "vpsubq  $dst,$src1,$src2\t! sub packed2L" %}
  3154   ins_encode %{
  3155     bool vector256 = false;
  3156     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3157   %}
  3158   ins_pipe( pipe_slow );
  3159 %}
  3161 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{
  3162   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3163   match(Set dst (SubVL src (LoadVector mem)));
  3164   format %{ "vpsubq  $dst,$src,$mem\t! sub packed2L" %}
  3165   ins_encode %{
  3166     bool vector256 = false;
  3167     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3168   %}
  3169   ins_pipe( pipe_slow );
  3170 %}
  3172 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{
  3173   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
  3174   match(Set dst (SubVL src1 src2));
  3175   format %{ "vpsubq  $dst,$src1,$src2\t! sub packed4L" %}
  3176   ins_encode %{
  3177     bool vector256 = true;
  3178     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3179   %}
  3180   ins_pipe( pipe_slow );
  3181 %}
  3183 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{
  3184   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
  3185   match(Set dst (SubVL src (LoadVector mem)));
  3186   format %{ "vpsubq  $dst,$src,$mem\t! sub packed4L" %}
  3187   ins_encode %{
  3188     bool vector256 = true;
  3189     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3190   %}
  3191   ins_pipe( pipe_slow );
  3192 %}
  3194 // Floats vector sub
  3195 instruct vsub2F(vecD dst, vecD src) %{
  3196   predicate(n->as_Vector()->length() == 2);
  3197   match(Set dst (SubVF dst src));
  3198   format %{ "subps   $dst,$src\t! sub packed2F" %}
  3199   ins_encode %{
  3200     __ subps($dst$$XMMRegister, $src$$XMMRegister);
  3201   %}
  3202   ins_pipe( pipe_slow );
  3203 %}
  3205 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{
  3206   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3207   match(Set dst (SubVF src1 src2));
  3208   format %{ "vsubps  $dst,$src1,$src2\t! sub packed2F" %}
  3209   ins_encode %{
  3210     bool vector256 = false;
  3211     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3212   %}
  3213   ins_pipe( pipe_slow );
  3214 %}
  3216 instruct vsub4F(vecX dst, vecX src) %{
  3217   predicate(n->as_Vector()->length() == 4);
  3218   match(Set dst (SubVF dst src));
  3219   format %{ "subps   $dst,$src\t! sub packed4F" %}
  3220   ins_encode %{
  3221     __ subps($dst$$XMMRegister, $src$$XMMRegister);
  3222   %}
  3223   ins_pipe( pipe_slow );
  3224 %}
  3226 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
  3227   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3228   match(Set dst (SubVF src1 src2));
  3229   format %{ "vsubps  $dst,$src1,$src2\t! sub packed4F" %}
  3230   ins_encode %{
  3231     bool vector256 = false;
  3232     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3233   %}
  3234   ins_pipe( pipe_slow );
  3235 %}
  3237 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{
  3238   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3239   match(Set dst (SubVF src (LoadVector mem)));
  3240   format %{ "vsubps  $dst,$src,$mem\t! sub packed4F" %}
  3241   ins_encode %{
  3242     bool vector256 = false;
  3243     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3244   %}
  3245   ins_pipe( pipe_slow );
  3246 %}
  3248 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{
  3249   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3250   match(Set dst (SubVF src1 src2));
  3251   format %{ "vsubps  $dst,$src1,$src2\t! sub packed8F" %}
  3252   ins_encode %{
  3253     bool vector256 = true;
  3254     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3255   %}
  3256   ins_pipe( pipe_slow );
  3257 %}
  3259 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{
  3260   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3261   match(Set dst (SubVF src (LoadVector mem)));
  3262   format %{ "vsubps  $dst,$src,$mem\t! sub packed8F" %}
  3263   ins_encode %{
  3264     bool vector256 = true;
  3265     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3266   %}
  3267   ins_pipe( pipe_slow );
  3268 %}
  3270 // Doubles vector sub
  3271 instruct vsub2D(vecX dst, vecX src) %{
  3272   predicate(n->as_Vector()->length() == 2);
  3273   match(Set dst (SubVD dst src));
  3274   format %{ "subpd   $dst,$src\t! sub packed2D" %}
  3275   ins_encode %{
  3276     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
  3277   %}
  3278   ins_pipe( pipe_slow );
  3279 %}
  3281 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{
  3282   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3283   match(Set dst (SubVD src1 src2));
  3284   format %{ "vsubpd  $dst,$src1,$src2\t! sub packed2D" %}
  3285   ins_encode %{
  3286     bool vector256 = false;
  3287     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3288   %}
  3289   ins_pipe( pipe_slow );
  3290 %}
  3292 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{
  3293   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3294   match(Set dst (SubVD src (LoadVector mem)));
  3295   format %{ "vsubpd  $dst,$src,$mem\t! sub packed2D" %}
  3296   ins_encode %{
  3297     bool vector256 = false;
  3298     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3299   %}
  3300   ins_pipe( pipe_slow );
  3301 %}
  3303 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{
  3304   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3305   match(Set dst (SubVD src1 src2));
  3306   format %{ "vsubpd  $dst,$src1,$src2\t! sub packed4D" %}
  3307   ins_encode %{
  3308     bool vector256 = true;
  3309     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3310   %}
  3311   ins_pipe( pipe_slow );
  3312 %}
  3314 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{
  3315   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3316   match(Set dst (SubVD src (LoadVector mem)));
  3317   format %{ "vsubpd  $dst,$src,$mem\t! sub packed4D" %}
  3318   ins_encode %{
  3319     bool vector256 = true;
  3320     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3321   %}
  3322   ins_pipe( pipe_slow );
  3323 %}
  3325 // --------------------------------- MUL --------------------------------------
  3327 // Shorts/Chars vector mul
  3328 instruct vmul2S(vecS dst, vecS src) %{
  3329   predicate(n->as_Vector()->length() == 2);
  3330   match(Set dst (MulVS dst src));
  3331   format %{ "pmullw $dst,$src\t! mul packed2S" %}
  3332   ins_encode %{
  3333     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
  3334   %}
  3335   ins_pipe( pipe_slow );
  3336 %}
  3338 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{
  3339   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3340   match(Set dst (MulVS src1 src2));
  3341   format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
  3342   ins_encode %{
  3343     bool vector256 = false;
  3344     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3345   %}
  3346   ins_pipe( pipe_slow );
  3347 %}
  3349 instruct vmul4S(vecD dst, vecD src) %{
  3350   predicate(n->as_Vector()->length() == 4);
  3351   match(Set dst (MulVS dst src));
  3352   format %{ "pmullw  $dst,$src\t! mul packed4S" %}
  3353   ins_encode %{
  3354     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
  3355   %}
  3356   ins_pipe( pipe_slow );
  3357 %}
  3359 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
  3360   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3361   match(Set dst (MulVS src1 src2));
  3362   format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
  3363   ins_encode %{
  3364     bool vector256 = false;
  3365     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3366   %}
  3367   ins_pipe( pipe_slow );
  3368 %}
  3370 instruct vmul8S(vecX dst, vecX src) %{
  3371   predicate(n->as_Vector()->length() == 8);
  3372   match(Set dst (MulVS dst src));
  3373   format %{ "pmullw  $dst,$src\t! mul packed8S" %}
  3374   ins_encode %{
  3375     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
  3376   %}
  3377   ins_pipe( pipe_slow );
  3378 %}
  3380 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
  3381   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3382   match(Set dst (MulVS src1 src2));
  3383   format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
  3384   ins_encode %{
  3385     bool vector256 = false;
  3386     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3387   %}
  3388   ins_pipe( pipe_slow );
  3389 %}
  3391 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{
  3392   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3393   match(Set dst (MulVS src (LoadVector mem)));
  3394   format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
  3395   ins_encode %{
  3396     bool vector256 = false;
  3397     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3398   %}
  3399   ins_pipe( pipe_slow );
  3400 %}
  3402 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{
  3403   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  3404   match(Set dst (MulVS src1 src2));
  3405   format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
  3406   ins_encode %{
  3407     bool vector256 = true;
  3408     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3409   %}
  3410   ins_pipe( pipe_slow );
  3411 %}
  3413 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{
  3414   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  3415   match(Set dst (MulVS src (LoadVector mem)));
  3416   format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
  3417   ins_encode %{
  3418     bool vector256 = true;
  3419     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3420   %}
  3421   ins_pipe( pipe_slow );
  3422 %}
  3424 // Integers vector mul (sse4_1)
  3425 instruct vmul2I(vecD dst, vecD src) %{
  3426   predicate(UseSSE > 3 && n->as_Vector()->length() == 2);
  3427   match(Set dst (MulVI dst src));
  3428   format %{ "pmulld  $dst,$src\t! mul packed2I" %}
  3429   ins_encode %{
  3430     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
  3431   %}
  3432   ins_pipe( pipe_slow );
  3433 %}
  3435 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{
  3436   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3437   match(Set dst (MulVI src1 src2));
  3438   format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %}
  3439   ins_encode %{
  3440     bool vector256 = false;
  3441     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3442   %}
  3443   ins_pipe( pipe_slow );
  3444 %}
  3446 instruct vmul4I(vecX dst, vecX src) %{
  3447   predicate(UseSSE > 3 && n->as_Vector()->length() == 4);
  3448   match(Set dst (MulVI dst src));
  3449   format %{ "pmulld  $dst,$src\t! mul packed4I" %}
  3450   ins_encode %{
  3451     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
  3452   %}
  3453   ins_pipe( pipe_slow );
  3454 %}
  3456 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
  3457   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3458   match(Set dst (MulVI src1 src2));
  3459   format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %}
  3460   ins_encode %{
  3461     bool vector256 = false;
  3462     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3463   %}
  3464   ins_pipe( pipe_slow );
  3465 %}
  3467 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{
  3468   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3469   match(Set dst (MulVI src (LoadVector mem)));
  3470   format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %}
  3471   ins_encode %{
  3472     bool vector256 = false;
  3473     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3474   %}
  3475   ins_pipe( pipe_slow );
  3476 %}
  3478 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{
  3479   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  3480   match(Set dst (MulVI src1 src2));
  3481   format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %}
  3482   ins_encode %{
  3483     bool vector256 = true;
  3484     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3485   %}
  3486   ins_pipe( pipe_slow );
  3487 %}
  3489 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{
  3490   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  3491   match(Set dst (MulVI src (LoadVector mem)));
  3492   format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %}
  3493   ins_encode %{
  3494     bool vector256 = true;
  3495     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3496   %}
  3497   ins_pipe( pipe_slow );
  3498 %}
  3500 // Floats vector mul
  3501 instruct vmul2F(vecD dst, vecD src) %{
  3502   predicate(n->as_Vector()->length() == 2);
  3503   match(Set dst (MulVF dst src));
  3504   format %{ "mulps   $dst,$src\t! mul packed2F" %}
  3505   ins_encode %{
  3506     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
  3507   %}
  3508   ins_pipe( pipe_slow );
  3509 %}
  3511 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{
  3512   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3513   match(Set dst (MulVF src1 src2));
  3514   format %{ "vmulps  $dst,$src1,$src2\t! mul packed2F" %}
  3515   ins_encode %{
  3516     bool vector256 = false;
  3517     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3518   %}
  3519   ins_pipe( pipe_slow );
  3520 %}
  3522 instruct vmul4F(vecX dst, vecX src) %{
  3523   predicate(n->as_Vector()->length() == 4);
  3524   match(Set dst (MulVF dst src));
  3525   format %{ "mulps   $dst,$src\t! mul packed4F" %}
  3526   ins_encode %{
  3527     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
  3528   %}
  3529   ins_pipe( pipe_slow );
  3530 %}
  3532 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
  3533   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3534   match(Set dst (MulVF src1 src2));
  3535   format %{ "vmulps  $dst,$src1,$src2\t! mul packed4F" %}
  3536   ins_encode %{
  3537     bool vector256 = false;
  3538     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3539   %}
  3540   ins_pipe( pipe_slow );
  3541 %}
  3543 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{
  3544   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3545   match(Set dst (MulVF src (LoadVector mem)));
  3546   format %{ "vmulps  $dst,$src,$mem\t! mul packed4F" %}
  3547   ins_encode %{
  3548     bool vector256 = false;
  3549     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3550   %}
  3551   ins_pipe( pipe_slow );
  3552 %}
  3554 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{
  3555   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3556   match(Set dst (MulVF src1 src2));
  3557   format %{ "vmulps  $dst,$src1,$src2\t! mul packed8F" %}
  3558   ins_encode %{
  3559     bool vector256 = true;
  3560     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3561   %}
  3562   ins_pipe( pipe_slow );
  3563 %}
  3565 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{
  3566   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3567   match(Set dst (MulVF src (LoadVector mem)));
  3568   format %{ "vmulps  $dst,$src,$mem\t! mul packed8F" %}
  3569   ins_encode %{
  3570     bool vector256 = true;
  3571     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3572   %}
  3573   ins_pipe( pipe_slow );
  3574 %}
  3576 // Doubles vector mul
  3577 instruct vmul2D(vecX dst, vecX src) %{
  3578   predicate(n->as_Vector()->length() == 2);
  3579   match(Set dst (MulVD dst src));
  3580   format %{ "mulpd   $dst,$src\t! mul packed2D" %}
  3581   ins_encode %{
  3582     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
  3583   %}
  3584   ins_pipe( pipe_slow );
  3585 %}
  3587 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
  3588   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3589   match(Set dst (MulVD src1 src2));
  3590   format %{ "vmulpd  $dst,$src1,$src2\t! mul packed2D" %}
  3591   ins_encode %{
  3592     bool vector256 = false;
  3593     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3594   %}
  3595   ins_pipe( pipe_slow );
  3596 %}
  3598 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{
  3599   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3600   match(Set dst (MulVD src (LoadVector mem)));
  3601   format %{ "vmulpd  $dst,$src,$mem\t! mul packed2D" %}
  3602   ins_encode %{
  3603     bool vector256 = false;
  3604     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3605   %}
  3606   ins_pipe( pipe_slow );
  3607 %}
  3609 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{
  3610   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3611   match(Set dst (MulVD src1 src2));
  3612   format %{ "vmulpd  $dst,$src1,$src2\t! mul packed4D" %}
  3613   ins_encode %{
  3614     bool vector256 = true;
  3615     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3616   %}
  3617   ins_pipe( pipe_slow );
  3618 %}
  3620 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{
  3621   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3622   match(Set dst (MulVD src (LoadVector mem)));
  3623   format %{ "vmulpd  $dst,$src,$mem\t! mul packed4D" %}
  3624   ins_encode %{
  3625     bool vector256 = true;
  3626     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3627   %}
  3628   ins_pipe( pipe_slow );
  3629 %}
  3631 // --------------------------------- DIV --------------------------------------
  3633 // Floats vector div
  3634 instruct vdiv2F(vecD dst, vecD src) %{
  3635   predicate(n->as_Vector()->length() == 2);
  3636   match(Set dst (DivVF dst src));
  3637   format %{ "divps   $dst,$src\t! div packed2F" %}
  3638   ins_encode %{
  3639     __ divps($dst$$XMMRegister, $src$$XMMRegister);
  3640   %}
  3641   ins_pipe( pipe_slow );
  3642 %}
  3644 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{
  3645   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3646   match(Set dst (DivVF src1 src2));
  3647   format %{ "vdivps  $dst,$src1,$src2\t! div packed2F" %}
  3648   ins_encode %{
  3649     bool vector256 = false;
  3650     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3651   %}
  3652   ins_pipe( pipe_slow );
  3653 %}
  3655 instruct vdiv4F(vecX dst, vecX src) %{
  3656   predicate(n->as_Vector()->length() == 4);
  3657   match(Set dst (DivVF dst src));
  3658   format %{ "divps   $dst,$src\t! div packed4F" %}
  3659   ins_encode %{
  3660     __ divps($dst$$XMMRegister, $src$$XMMRegister);
  3661   %}
  3662   ins_pipe( pipe_slow );
  3663 %}
  3665 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{
  3666   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3667   match(Set dst (DivVF src1 src2));
  3668   format %{ "vdivps  $dst,$src1,$src2\t! div packed4F" %}
  3669   ins_encode %{
  3670     bool vector256 = false;
  3671     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3672   %}
  3673   ins_pipe( pipe_slow );
  3674 %}
  3676 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{
  3677   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3678   match(Set dst (DivVF src (LoadVector mem)));
  3679   format %{ "vdivps  $dst,$src,$mem\t! div packed4F" %}
  3680   ins_encode %{
  3681     bool vector256 = false;
  3682     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3683   %}
  3684   ins_pipe( pipe_slow );
  3685 %}
  3687 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{
  3688   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3689   match(Set dst (DivVF src1 src2));
  3690   format %{ "vdivps  $dst,$src1,$src2\t! div packed8F" %}
  3691   ins_encode %{
  3692     bool vector256 = true;
  3693     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3694   %}
  3695   ins_pipe( pipe_slow );
  3696 %}
  3698 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{
  3699   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3700   match(Set dst (DivVF src (LoadVector mem)));
  3701   format %{ "vdivps  $dst,$src,$mem\t! div packed8F" %}
  3702   ins_encode %{
  3703     bool vector256 = true;
  3704     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3705   %}
  3706   ins_pipe( pipe_slow );
  3707 %}
  3709 // Doubles vector div
  3710 instruct vdiv2D(vecX dst, vecX src) %{
  3711   predicate(n->as_Vector()->length() == 2);
  3712   match(Set dst (DivVD dst src));
  3713   format %{ "divpd   $dst,$src\t! div packed2D" %}
  3714   ins_encode %{
  3715     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
  3716   %}
  3717   ins_pipe( pipe_slow );
  3718 %}
  3720 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
  3721   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3722   match(Set dst (DivVD src1 src2));
  3723   format %{ "vdivpd  $dst,$src1,$src2\t! div packed2D" %}
  3724   ins_encode %{
  3725     bool vector256 = false;
  3726     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3727   %}
  3728   ins_pipe( pipe_slow );
  3729 %}
  3731 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{
  3732   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3733   match(Set dst (DivVD src (LoadVector mem)));
  3734   format %{ "vdivpd  $dst,$src,$mem\t! div packed2D" %}
  3735   ins_encode %{
  3736     bool vector256 = false;
  3737     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3738   %}
  3739   ins_pipe( pipe_slow );
  3740 %}
  3742 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{
  3743   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3744   match(Set dst (DivVD src1 src2));
  3745   format %{ "vdivpd  $dst,$src1,$src2\t! div packed4D" %}
  3746   ins_encode %{
  3747     bool vector256 = true;
  3748     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  3749   %}
  3750   ins_pipe( pipe_slow );
  3751 %}
  3753 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{
  3754   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3755   match(Set dst (DivVD src (LoadVector mem)));
  3756   format %{ "vdivpd  $dst,$src,$mem\t! div packed4D" %}
  3757   ins_encode %{
  3758     bool vector256 = true;
  3759     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  3760   %}
  3761   ins_pipe( pipe_slow );
  3762 %}
  3764 // ------------------------------ LeftShift -----------------------------------
  3766 // Shorts/Chars vector left shift
  3767 instruct vsll2S(vecS dst, regF shift) %{
  3768   predicate(n->as_Vector()->length() == 2);
  3769   match(Set dst (LShiftVS dst shift));
  3770   format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
  3771   ins_encode %{
  3772     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
  3773   %}
  3774   ins_pipe( pipe_slow );
  3775 %}
  3777 instruct vsll2S_imm(vecS dst, immI8 shift) %{
  3778   predicate(n->as_Vector()->length() == 2);
  3779   match(Set dst (LShiftVS dst shift));
  3780   format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
  3781   ins_encode %{
  3782     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
  3783   %}
  3784   ins_pipe( pipe_slow );
  3785 %}
  3787 instruct vsll2S_reg(vecS dst, vecS src, regF shift) %{
  3788   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3789   match(Set dst (LShiftVS src shift));
  3790   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
  3791   ins_encode %{
  3792     bool vector256 = false;
  3793     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  3794   %}
  3795   ins_pipe( pipe_slow );
  3796 %}
  3798 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
  3799   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3800   match(Set dst (LShiftVS src shift));
  3801   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
  3802   ins_encode %{
  3803     bool vector256 = false;
  3804     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  3805   %}
  3806   ins_pipe( pipe_slow );
  3807 %}
  3809 instruct vsll4S(vecD dst, regF shift) %{
  3810   predicate(n->as_Vector()->length() == 4);
  3811   match(Set dst (LShiftVS dst shift));
  3812   format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
  3813   ins_encode %{
  3814     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
  3815   %}
  3816   ins_pipe( pipe_slow );
  3817 %}
  3819 instruct vsll4S_imm(vecD dst, immI8 shift) %{
  3820   predicate(n->as_Vector()->length() == 4);
  3821   match(Set dst (LShiftVS dst shift));
  3822   format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
  3823   ins_encode %{
  3824     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
  3825   %}
  3826   ins_pipe( pipe_slow );
  3827 %}
  3829 instruct vsll4S_reg(vecD dst, vecD src, regF shift) %{
  3830   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3831   match(Set dst (LShiftVS src shift));
  3832   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
  3833   ins_encode %{
  3834     bool vector256 = false;
  3835     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  3836   %}
  3837   ins_pipe( pipe_slow );
  3838 %}
  3840 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
  3841   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3842   match(Set dst (LShiftVS src shift));
  3843   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
  3844   ins_encode %{
  3845     bool vector256 = false;
  3846     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  3847   %}
  3848   ins_pipe( pipe_slow );
  3849 %}
  3851 instruct vsll8S(vecX dst, regF shift) %{
  3852   predicate(n->as_Vector()->length() == 8);
  3853   match(Set dst (LShiftVS dst shift));
  3854   format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
  3855   ins_encode %{
  3856     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
  3857   %}
  3858   ins_pipe( pipe_slow );
  3859 %}
  3861 instruct vsll8S_imm(vecX dst, immI8 shift) %{
  3862   predicate(n->as_Vector()->length() == 8);
  3863   match(Set dst (LShiftVS dst shift));
  3864   format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
  3865   ins_encode %{
  3866     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
  3867   %}
  3868   ins_pipe( pipe_slow );
  3869 %}
  3871 instruct vsll8S_reg(vecX dst, vecX src, regF shift) %{
  3872   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3873   match(Set dst (LShiftVS src shift));
  3874   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
  3875   ins_encode %{
  3876     bool vector256 = false;
  3877     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  3878   %}
  3879   ins_pipe( pipe_slow );
  3880 %}
  3882 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
  3883   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  3884   match(Set dst (LShiftVS src shift));
  3885   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
  3886   ins_encode %{
  3887     bool vector256 = false;
  3888     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  3889   %}
  3890   ins_pipe( pipe_slow );
  3891 %}
  3893 instruct vsll16S_reg(vecY dst, vecY src, regF shift) %{
  3894   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  3895   match(Set dst (LShiftVS src shift));
  3896   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
  3897   ins_encode %{
  3898     bool vector256 = true;
  3899     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  3900   %}
  3901   ins_pipe( pipe_slow );
  3902 %}
  3904 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
  3905   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  3906   match(Set dst (LShiftVS src shift));
  3907   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
  3908   ins_encode %{
  3909     bool vector256 = true;
  3910     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  3911   %}
  3912   ins_pipe( pipe_slow );
  3913 %}
  3915 // Integers vector left shift
  3916 instruct vsll2I(vecD dst, regF shift) %{
  3917   predicate(n->as_Vector()->length() == 2);
  3918   match(Set dst (LShiftVI dst shift));
  3919   format %{ "pslld   $dst,$shift\t! left shift packed2I" %}
  3920   ins_encode %{
  3921     __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
  3922   %}
  3923   ins_pipe( pipe_slow );
  3924 %}
  3926 instruct vsll2I_imm(vecD dst, immI8 shift) %{
  3927   predicate(n->as_Vector()->length() == 2);
  3928   match(Set dst (LShiftVI dst shift));
  3929   format %{ "pslld   $dst,$shift\t! left shift packed2I" %}
  3930   ins_encode %{
  3931     __ pslld($dst$$XMMRegister, (int)$shift$$constant);
  3932   %}
  3933   ins_pipe( pipe_slow );
  3934 %}
  3936 instruct vsll2I_reg(vecD dst, vecD src, regF shift) %{
  3937   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3938   match(Set dst (LShiftVI src shift));
  3939   format %{ "vpslld  $dst,$src,$shift\t! left shift packed2I" %}
  3940   ins_encode %{
  3941     bool vector256 = false;
  3942     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  3943   %}
  3944   ins_pipe( pipe_slow );
  3945 %}
  3947 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
  3948   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  3949   match(Set dst (LShiftVI src shift));
  3950   format %{ "vpslld  $dst,$src,$shift\t! left shift packed2I" %}
  3951   ins_encode %{
  3952     bool vector256 = false;
  3953     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  3954   %}
  3955   ins_pipe( pipe_slow );
  3956 %}
  3958 instruct vsll4I(vecX dst, regF shift) %{
  3959   predicate(n->as_Vector()->length() == 4);
  3960   match(Set dst (LShiftVI dst shift));
  3961   format %{ "pslld   $dst,$shift\t! left shift packed4I" %}
  3962   ins_encode %{
  3963     __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
  3964   %}
  3965   ins_pipe( pipe_slow );
  3966 %}
  3968 instruct vsll4I_imm(vecX dst, immI8 shift) %{
  3969   predicate(n->as_Vector()->length() == 4);
  3970   match(Set dst (LShiftVI dst shift));
  3971   format %{ "pslld   $dst,$shift\t! left shift packed4I" %}
  3972   ins_encode %{
  3973     __ pslld($dst$$XMMRegister, (int)$shift$$constant);
  3974   %}
  3975   ins_pipe( pipe_slow );
  3976 %}
  3978 instruct vsll4I_reg(vecX dst, vecX src, regF shift) %{
  3979   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3980   match(Set dst (LShiftVI src shift));
  3981   format %{ "vpslld  $dst,$src,$shift\t! left shift packed4I" %}
  3982   ins_encode %{
  3983     bool vector256 = false;
  3984     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  3985   %}
  3986   ins_pipe( pipe_slow );
  3987 %}
  3989 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
  3990   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  3991   match(Set dst (LShiftVI src shift));
  3992   format %{ "vpslld  $dst,$src,$shift\t! left shift packed4I" %}
  3993   ins_encode %{
  3994     bool vector256 = false;
  3995     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  3996   %}
  3997   ins_pipe( pipe_slow );
  3998 %}
  4000 instruct vsll8I_reg(vecY dst, vecY src, regF shift) %{
  4001   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  4002   match(Set dst (LShiftVI src shift));
  4003   format %{ "vpslld  $dst,$src,$shift\t! left shift packed8I" %}
  4004   ins_encode %{
  4005     bool vector256 = true;
  4006     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4007   %}
  4008   ins_pipe( pipe_slow );
  4009 %}
  4011 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
  4012   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  4013   match(Set dst (LShiftVI src shift));
  4014   format %{ "vpslld  $dst,$src,$shift\t! left shift packed8I" %}
  4015   ins_encode %{
  4016     bool vector256 = true;
  4017     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4018   %}
  4019   ins_pipe( pipe_slow );
  4020 %}
  4022 // Longs vector left shift
  4023 instruct vsll2L(vecX dst, regF shift) %{
  4024   predicate(n->as_Vector()->length() == 2);
  4025   match(Set dst (LShiftVL dst shift));
  4026   format %{ "psllq   $dst,$shift\t! left shift packed2L" %}
  4027   ins_encode %{
  4028     __ psllq($dst$$XMMRegister, $shift$$XMMRegister);
  4029   %}
  4030   ins_pipe( pipe_slow );
  4031 %}
  4033 instruct vsll2L_imm(vecX dst, immI8 shift) %{
  4034   predicate(n->as_Vector()->length() == 2);
  4035   match(Set dst (LShiftVL dst shift));
  4036   format %{ "psllq   $dst,$shift\t! left shift packed2L" %}
  4037   ins_encode %{
  4038     __ psllq($dst$$XMMRegister, (int)$shift$$constant);
  4039   %}
  4040   ins_pipe( pipe_slow );
  4041 %}
  4043 instruct vsll2L_reg(vecX dst, vecX src, regF shift) %{
  4044   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  4045   match(Set dst (LShiftVL src shift));
  4046   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed2L" %}
  4047   ins_encode %{
  4048     bool vector256 = false;
  4049     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4050   %}
  4051   ins_pipe( pipe_slow );
  4052 %}
  4054 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
  4055   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  4056   match(Set dst (LShiftVL src shift));
  4057   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed2L" %}
  4058   ins_encode %{
  4059     bool vector256 = false;
  4060     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4061   %}
  4062   ins_pipe( pipe_slow );
  4063 %}
  4065 instruct vsll4L_reg(vecY dst, vecY src, regF shift) %{
  4066   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
  4067   match(Set dst (LShiftVL src shift));
  4068   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed4L" %}
  4069   ins_encode %{
  4070     bool vector256 = true;
  4071     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4072   %}
  4073   ins_pipe( pipe_slow );
  4074 %}
  4076 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
  4077   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
  4078   match(Set dst (LShiftVL src shift));
  4079   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed4L" %}
  4080   ins_encode %{
  4081     bool vector256 = true;
  4082     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4083   %}
  4084   ins_pipe( pipe_slow );
  4085 %}
  4087 // ----------------------- LogicalRightShift -----------------------------------
  4089 // Shorts/Chars vector logical right shift produces incorrect Java result
  4090 // for negative data because java code convert short value into int with
  4091 // sign extension before a shift.
  4093 // Integers vector logical right shift
  4094 instruct vsrl2I(vecD dst, regF shift) %{
  4095   predicate(n->as_Vector()->length() == 2);
  4096   match(Set dst (URShiftVI dst shift));
  4097   format %{ "psrld   $dst,$shift\t! logical right shift packed2I" %}
  4098   ins_encode %{
  4099     __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
  4100   %}
  4101   ins_pipe( pipe_slow );
  4102 %}
  4104 instruct vsrl2I_imm(vecD dst, immI8 shift) %{
  4105   predicate(n->as_Vector()->length() == 2);
  4106   match(Set dst (URShiftVI dst shift));
  4107   format %{ "psrld   $dst,$shift\t! logical right shift packed2I" %}
  4108   ins_encode %{
  4109     __ psrld($dst$$XMMRegister, (int)$shift$$constant);
  4110   %}
  4111   ins_pipe( pipe_slow );
  4112 %}
  4114 instruct vsrl2I_reg(vecD dst, vecD src, regF shift) %{
  4115   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  4116   match(Set dst (URShiftVI src shift));
  4117   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed2I" %}
  4118   ins_encode %{
  4119     bool vector256 = false;
  4120     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4121   %}
  4122   ins_pipe( pipe_slow );
  4123 %}
  4125 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
  4126   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  4127   match(Set dst (URShiftVI src shift));
  4128   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed2I" %}
  4129   ins_encode %{
  4130     bool vector256 = false;
  4131     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4132   %}
  4133   ins_pipe( pipe_slow );
  4134 %}
  4136 instruct vsrl4I(vecX dst, regF shift) %{
  4137   predicate(n->as_Vector()->length() == 4);
  4138   match(Set dst (URShiftVI dst shift));
  4139   format %{ "psrld   $dst,$shift\t! logical right shift packed4I" %}
  4140   ins_encode %{
  4141     __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
  4142   %}
  4143   ins_pipe( pipe_slow );
  4144 %}
  4146 instruct vsrl4I_imm(vecX dst, immI8 shift) %{
  4147   predicate(n->as_Vector()->length() == 4);
  4148   match(Set dst (URShiftVI dst shift));
  4149   format %{ "psrld   $dst,$shift\t! logical right shift packed4I" %}
  4150   ins_encode %{
  4151     __ psrld($dst$$XMMRegister, (int)$shift$$constant);
  4152   %}
  4153   ins_pipe( pipe_slow );
  4154 %}
  4156 instruct vsrl4I_reg(vecX dst, vecX src, regF shift) %{
  4157   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  4158   match(Set dst (URShiftVI src shift));
  4159   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed4I" %}
  4160   ins_encode %{
  4161     bool vector256 = false;
  4162     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4163   %}
  4164   ins_pipe( pipe_slow );
  4165 %}
  4167 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
  4168   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  4169   match(Set dst (URShiftVI src shift));
  4170   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed4I" %}
  4171   ins_encode %{
  4172     bool vector256 = false;
  4173     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4174   %}
  4175   ins_pipe( pipe_slow );
  4176 %}
  4178 instruct vsrl8I_reg(vecY dst, vecY src, regF shift) %{
  4179   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  4180   match(Set dst (URShiftVI src shift));
  4181   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed8I" %}
  4182   ins_encode %{
  4183     bool vector256 = true;
  4184     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4185   %}
  4186   ins_pipe( pipe_slow );
  4187 %}
  4189 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
  4190   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  4191   match(Set dst (URShiftVI src shift));
  4192   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed8I" %}
  4193   ins_encode %{
  4194     bool vector256 = true;
  4195     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4196   %}
  4197   ins_pipe( pipe_slow );
  4198 %}
  4200 // Longs vector logical right shift
  4201 instruct vsrl2L(vecX dst, regF shift) %{
  4202   predicate(n->as_Vector()->length() == 2);
  4203   match(Set dst (URShiftVL dst shift));
  4204   format %{ "psrlq   $dst,$shift\t! logical right shift packed2L" %}
  4205   ins_encode %{
  4206     __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
  4207   %}
  4208   ins_pipe( pipe_slow );
  4209 %}
  4211 instruct vsrl2L_imm(vecX dst, immI8 shift) %{
  4212   predicate(n->as_Vector()->length() == 2);
  4213   match(Set dst (URShiftVL dst shift));
  4214   format %{ "psrlq   $dst,$shift\t! logical right shift packed2L" %}
  4215   ins_encode %{
  4216     __ psrlq($dst$$XMMRegister, (int)$shift$$constant);
  4217   %}
  4218   ins_pipe( pipe_slow );
  4219 %}
  4221 instruct vsrl2L_reg(vecX dst, vecX src, regF shift) %{
  4222   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  4223   match(Set dst (URShiftVL src shift));
  4224   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed2L" %}
  4225   ins_encode %{
  4226     bool vector256 = false;
  4227     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4228   %}
  4229   ins_pipe( pipe_slow );
  4230 %}
  4232 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
  4233   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  4234   match(Set dst (URShiftVL src shift));
  4235   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed2L" %}
  4236   ins_encode %{
  4237     bool vector256 = false;
  4238     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4239   %}
  4240   ins_pipe( pipe_slow );
  4241 %}
  4243 instruct vsrl4L_reg(vecY dst, vecY src, regF shift) %{
  4244   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
  4245   match(Set dst (URShiftVL src shift));
  4246   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed4L" %}
  4247   ins_encode %{
  4248     bool vector256 = true;
  4249     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4250   %}
  4251   ins_pipe( pipe_slow );
  4252 %}
  4254 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
  4255   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
  4256   match(Set dst (URShiftVL src shift));
  4257   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed4L" %}
  4258   ins_encode %{
  4259     bool vector256 = true;
  4260     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4261   %}
  4262   ins_pipe( pipe_slow );
  4263 %}
  4265 // ------------------- ArithmeticRightShift -----------------------------------
  4267 // Shorts/Chars vector arithmetic right shift
  4268 instruct vsra2S(vecS dst, regF shift) %{
  4269   predicate(n->as_Vector()->length() == 2);
  4270   match(Set dst (RShiftVS dst shift));
  4271   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
  4272   ins_encode %{
  4273     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
  4274   %}
  4275   ins_pipe( pipe_slow );
  4276 %}
  4278 instruct vsra2S_imm(vecS dst, immI8 shift) %{
  4279   predicate(n->as_Vector()->length() == 2);
  4280   match(Set dst (RShiftVS dst shift));
  4281   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
  4282   ins_encode %{
  4283     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
  4284   %}
  4285   ins_pipe( pipe_slow );
  4286 %}
  4288 instruct vsra2S_reg(vecS dst, vecS src, regF shift) %{
  4289   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  4290   match(Set dst (RShiftVS src shift));
  4291   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
  4292   ins_encode %{
  4293     bool vector256 = false;
  4294     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4295   %}
  4296   ins_pipe( pipe_slow );
  4297 %}
  4299 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
  4300   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  4301   match(Set dst (RShiftVS src shift));
  4302   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
  4303   ins_encode %{
  4304     bool vector256 = false;
  4305     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4306   %}
  4307   ins_pipe( pipe_slow );
  4308 %}
  4310 instruct vsra4S(vecD dst, regF shift) %{
  4311   predicate(n->as_Vector()->length() == 4);
  4312   match(Set dst (RShiftVS dst shift));
  4313   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
  4314   ins_encode %{
  4315     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
  4316   %}
  4317   ins_pipe( pipe_slow );
  4318 %}
  4320 instruct vsra4S_imm(vecD dst, immI8 shift) %{
  4321   predicate(n->as_Vector()->length() == 4);
  4322   match(Set dst (RShiftVS dst shift));
  4323   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
  4324   ins_encode %{
  4325     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
  4326   %}
  4327   ins_pipe( pipe_slow );
  4328 %}
  4330 instruct vsra4S_reg(vecD dst, vecD src, regF shift) %{
  4331   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  4332   match(Set dst (RShiftVS src shift));
  4333   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
  4334   ins_encode %{
  4335     bool vector256 = false;
  4336     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4337   %}
  4338   ins_pipe( pipe_slow );
  4339 %}
  4341 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
  4342   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  4343   match(Set dst (RShiftVS src shift));
  4344   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
  4345   ins_encode %{
  4346     bool vector256 = false;
  4347     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4348   %}
  4349   ins_pipe( pipe_slow );
  4350 %}
  4352 instruct vsra8S(vecX dst, regF shift) %{
  4353   predicate(n->as_Vector()->length() == 8);
  4354   match(Set dst (RShiftVS dst shift));
  4355   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
  4356   ins_encode %{
  4357     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
  4358   %}
  4359   ins_pipe( pipe_slow );
  4360 %}
  4362 instruct vsra8S_imm(vecX dst, immI8 shift) %{
  4363   predicate(n->as_Vector()->length() == 8);
  4364   match(Set dst (RShiftVS dst shift));
  4365   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
  4366   ins_encode %{
  4367     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
  4368   %}
  4369   ins_pipe( pipe_slow );
  4370 %}
  4372 instruct vsra8S_reg(vecX dst, vecX src, regF shift) %{
  4373   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  4374   match(Set dst (RShiftVS src shift));
  4375   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
  4376   ins_encode %{
  4377     bool vector256 = false;
  4378     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4379   %}
  4380   ins_pipe( pipe_slow );
  4381 %}
  4383 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
  4384   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  4385   match(Set dst (RShiftVS src shift));
  4386   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
  4387   ins_encode %{
  4388     bool vector256 = false;
  4389     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4390   %}
  4391   ins_pipe( pipe_slow );
  4392 %}
  4394 instruct vsra16S_reg(vecY dst, vecY src, regF shift) %{
  4395   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  4396   match(Set dst (RShiftVS src shift));
  4397   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
  4398   ins_encode %{
  4399     bool vector256 = true;
  4400     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4401   %}
  4402   ins_pipe( pipe_slow );
  4403 %}
  4405 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
  4406   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  4407   match(Set dst (RShiftVS src shift));
  4408   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
  4409   ins_encode %{
  4410     bool vector256 = true;
  4411     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4412   %}
  4413   ins_pipe( pipe_slow );
  4414 %}
  4416 // Integers vector arithmetic right shift
  4417 instruct vsra2I(vecD dst, regF shift) %{
  4418   predicate(n->as_Vector()->length() == 2);
  4419   match(Set dst (RShiftVI dst shift));
  4420   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed2I" %}
  4421   ins_encode %{
  4422     __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
  4423   %}
  4424   ins_pipe( pipe_slow );
  4425 %}
  4427 instruct vsra2I_imm(vecD dst, immI8 shift) %{
  4428   predicate(n->as_Vector()->length() == 2);
  4429   match(Set dst (RShiftVI dst shift));
  4430   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed2I" %}
  4431   ins_encode %{
  4432     __ psrad($dst$$XMMRegister, (int)$shift$$constant);
  4433   %}
  4434   ins_pipe( pipe_slow );
  4435 %}
  4437 instruct vsra2I_reg(vecD dst, vecD src, regF shift) %{
  4438   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  4439   match(Set dst (RShiftVI src shift));
  4440   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed2I" %}
  4441   ins_encode %{
  4442     bool vector256 = false;
  4443     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4444   %}
  4445   ins_pipe( pipe_slow );
  4446 %}
  4448 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
  4449   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  4450   match(Set dst (RShiftVI src shift));
  4451   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed2I" %}
  4452   ins_encode %{
  4453     bool vector256 = false;
  4454     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4455   %}
  4456   ins_pipe( pipe_slow );
  4457 %}
  4459 instruct vsra4I(vecX dst, regF shift) %{
  4460   predicate(n->as_Vector()->length() == 4);
  4461   match(Set dst (RShiftVI dst shift));
  4462   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed4I" %}
  4463   ins_encode %{
  4464     __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
  4465   %}
  4466   ins_pipe( pipe_slow );
  4467 %}
  4469 instruct vsra4I_imm(vecX dst, immI8 shift) %{
  4470   predicate(n->as_Vector()->length() == 4);
  4471   match(Set dst (RShiftVI dst shift));
  4472   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed4I" %}
  4473   ins_encode %{
  4474     __ psrad($dst$$XMMRegister, (int)$shift$$constant);
  4475   %}
  4476   ins_pipe( pipe_slow );
  4477 %}
  4479 instruct vsra4I_reg(vecX dst, vecX src, regF shift) %{
  4480   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  4481   match(Set dst (RShiftVI src shift));
  4482   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed4I" %}
  4483   ins_encode %{
  4484     bool vector256 = false;
  4485     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4486   %}
  4487   ins_pipe( pipe_slow );
  4488 %}
  4490 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
  4491   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  4492   match(Set dst (RShiftVI src shift));
  4493   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed4I" %}
  4494   ins_encode %{
  4495     bool vector256 = false;
  4496     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4497   %}
  4498   ins_pipe( pipe_slow );
  4499 %}
  4501 instruct vsra8I_reg(vecY dst, vecY src, regF shift) %{
  4502   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  4503   match(Set dst (RShiftVI src shift));
  4504   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed8I" %}
  4505   ins_encode %{
  4506     bool vector256 = true;
  4507     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
  4508   %}
  4509   ins_pipe( pipe_slow );
  4510 %}
  4512 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
  4513   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  4514   match(Set dst (RShiftVI src shift));
  4515   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed8I" %}
  4516   ins_encode %{
  4517     bool vector256 = true;
  4518     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
  4519   %}
  4520   ins_pipe( pipe_slow );
  4521 %}
  4523 // There are no longs vector arithmetic right shift instructions.
  4526 // --------------------------------- AND --------------------------------------
  4528 instruct vand4B(vecS dst, vecS src) %{
  4529   predicate(n->as_Vector()->length_in_bytes() == 4);
  4530   match(Set dst (AndV dst src));
  4531   format %{ "pand    $dst,$src\t! and vectors (4 bytes)" %}
  4532   ins_encode %{
  4533     __ pand($dst$$XMMRegister, $src$$XMMRegister);
  4534   %}
  4535   ins_pipe( pipe_slow );
  4536 %}
  4538 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{
  4539   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
  4540   match(Set dst (AndV src1 src2));
  4541   format %{ "vpand   $dst,$src1,$src2\t! and vectors (4 bytes)" %}
  4542   ins_encode %{
  4543     bool vector256 = false;
  4544     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4545   %}
  4546   ins_pipe( pipe_slow );
  4547 %}
  4549 instruct vand8B(vecD dst, vecD src) %{
  4550   predicate(n->as_Vector()->length_in_bytes() == 8);
  4551   match(Set dst (AndV dst src));
  4552   format %{ "pand    $dst,$src\t! and vectors (8 bytes)" %}
  4553   ins_encode %{
  4554     __ pand($dst$$XMMRegister, $src$$XMMRegister);
  4555   %}
  4556   ins_pipe( pipe_slow );
  4557 %}
  4559 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{
  4560   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
  4561   match(Set dst (AndV src1 src2));
  4562   format %{ "vpand   $dst,$src1,$src2\t! and vectors (8 bytes)" %}
  4563   ins_encode %{
  4564     bool vector256 = false;
  4565     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4566   %}
  4567   ins_pipe( pipe_slow );
  4568 %}
  4570 instruct vand16B(vecX dst, vecX src) %{
  4571   predicate(n->as_Vector()->length_in_bytes() == 16);
  4572   match(Set dst (AndV dst src));
  4573   format %{ "pand    $dst,$src\t! and vectors (16 bytes)" %}
  4574   ins_encode %{
  4575     __ pand($dst$$XMMRegister, $src$$XMMRegister);
  4576   %}
  4577   ins_pipe( pipe_slow );
  4578 %}
  4580 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{
  4581   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
  4582   match(Set dst (AndV src1 src2));
  4583   format %{ "vpand   $dst,$src1,$src2\t! and vectors (16 bytes)" %}
  4584   ins_encode %{
  4585     bool vector256 = false;
  4586     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4587   %}
  4588   ins_pipe( pipe_slow );
  4589 %}
  4591 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{
  4592   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
  4593   match(Set dst (AndV src (LoadVector mem)));
  4594   format %{ "vpand   $dst,$src,$mem\t! and vectors (16 bytes)" %}
  4595   ins_encode %{
  4596     bool vector256 = false;
  4597     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  4598   %}
  4599   ins_pipe( pipe_slow );
  4600 %}
  4602 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{
  4603   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
  4604   match(Set dst (AndV src1 src2));
  4605   format %{ "vpand   $dst,$src1,$src2\t! and vectors (32 bytes)" %}
  4606   ins_encode %{
  4607     bool vector256 = true;
  4608     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4609   %}
  4610   ins_pipe( pipe_slow );
  4611 %}
  4613 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{
  4614   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
  4615   match(Set dst (AndV src (LoadVector mem)));
  4616   format %{ "vpand   $dst,$src,$mem\t! and vectors (32 bytes)" %}
  4617   ins_encode %{
  4618     bool vector256 = true;
  4619     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  4620   %}
  4621   ins_pipe( pipe_slow );
  4622 %}
  4624 // --------------------------------- OR ---------------------------------------
  4626 instruct vor4B(vecS dst, vecS src) %{
  4627   predicate(n->as_Vector()->length_in_bytes() == 4);
  4628   match(Set dst (OrV dst src));
  4629   format %{ "por     $dst,$src\t! or vectors (4 bytes)" %}
  4630   ins_encode %{
  4631     __ por($dst$$XMMRegister, $src$$XMMRegister);
  4632   %}
  4633   ins_pipe( pipe_slow );
  4634 %}
  4636 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{
  4637   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
  4638   match(Set dst (OrV src1 src2));
  4639   format %{ "vpor    $dst,$src1,$src2\t! or vectors (4 bytes)" %}
  4640   ins_encode %{
  4641     bool vector256 = false;
  4642     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4643   %}
  4644   ins_pipe( pipe_slow );
  4645 %}
  4647 instruct vor8B(vecD dst, vecD src) %{
  4648   predicate(n->as_Vector()->length_in_bytes() == 8);
  4649   match(Set dst (OrV dst src));
  4650   format %{ "por     $dst,$src\t! or vectors (8 bytes)" %}
  4651   ins_encode %{
  4652     __ por($dst$$XMMRegister, $src$$XMMRegister);
  4653   %}
  4654   ins_pipe( pipe_slow );
  4655 %}
  4657 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{
  4658   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
  4659   match(Set dst (OrV src1 src2));
  4660   format %{ "vpor    $dst,$src1,$src2\t! or vectors (8 bytes)" %}
  4661   ins_encode %{
  4662     bool vector256 = false;
  4663     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4664   %}
  4665   ins_pipe( pipe_slow );
  4666 %}
  4668 instruct vor16B(vecX dst, vecX src) %{
  4669   predicate(n->as_Vector()->length_in_bytes() == 16);
  4670   match(Set dst (OrV dst src));
  4671   format %{ "por     $dst,$src\t! or vectors (16 bytes)" %}
  4672   ins_encode %{
  4673     __ por($dst$$XMMRegister, $src$$XMMRegister);
  4674   %}
  4675   ins_pipe( pipe_slow );
  4676 %}
  4678 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{
  4679   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
  4680   match(Set dst (OrV src1 src2));
  4681   format %{ "vpor    $dst,$src1,$src2\t! or vectors (16 bytes)" %}
  4682   ins_encode %{
  4683     bool vector256 = false;
  4684     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4685   %}
  4686   ins_pipe( pipe_slow );
  4687 %}
  4689 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{
  4690   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
  4691   match(Set dst (OrV src (LoadVector mem)));
  4692   format %{ "vpor    $dst,$src,$mem\t! or vectors (16 bytes)" %}
  4693   ins_encode %{
  4694     bool vector256 = false;
  4695     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  4696   %}
  4697   ins_pipe( pipe_slow );
  4698 %}
  4700 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{
  4701   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
  4702   match(Set dst (OrV src1 src2));
  4703   format %{ "vpor    $dst,$src1,$src2\t! or vectors (32 bytes)" %}
  4704   ins_encode %{
  4705     bool vector256 = true;
  4706     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4707   %}
  4708   ins_pipe( pipe_slow );
  4709 %}
  4711 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{
  4712   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
  4713   match(Set dst (OrV src (LoadVector mem)));
  4714   format %{ "vpor    $dst,$src,$mem\t! or vectors (32 bytes)" %}
  4715   ins_encode %{
  4716     bool vector256 = true;
  4717     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  4718   %}
  4719   ins_pipe( pipe_slow );
  4720 %}
  4722 // --------------------------------- XOR --------------------------------------
  4724 instruct vxor4B(vecS dst, vecS src) %{
  4725   predicate(n->as_Vector()->length_in_bytes() == 4);
  4726   match(Set dst (XorV dst src));
  4727   format %{ "pxor    $dst,$src\t! xor vectors (4 bytes)" %}
  4728   ins_encode %{
  4729     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
  4730   %}
  4731   ins_pipe( pipe_slow );
  4732 %}
  4734 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{
  4735   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
  4736   match(Set dst (XorV src1 src2));
  4737   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (4 bytes)" %}
  4738   ins_encode %{
  4739     bool vector256 = false;
  4740     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4741   %}
  4742   ins_pipe( pipe_slow );
  4743 %}
  4745 instruct vxor8B(vecD dst, vecD src) %{
  4746   predicate(n->as_Vector()->length_in_bytes() == 8);
  4747   match(Set dst (XorV dst src));
  4748   format %{ "pxor    $dst,$src\t! xor vectors (8 bytes)" %}
  4749   ins_encode %{
  4750     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
  4751   %}
  4752   ins_pipe( pipe_slow );
  4753 %}
  4755 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{
  4756   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
  4757   match(Set dst (XorV src1 src2));
  4758   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (8 bytes)" %}
  4759   ins_encode %{
  4760     bool vector256 = false;
  4761     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4762   %}
  4763   ins_pipe( pipe_slow );
  4764 %}
  4766 instruct vxor16B(vecX dst, vecX src) %{
  4767   predicate(n->as_Vector()->length_in_bytes() == 16);
  4768   match(Set dst (XorV dst src));
  4769   format %{ "pxor    $dst,$src\t! xor vectors (16 bytes)" %}
  4770   ins_encode %{
  4771     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
  4772   %}
  4773   ins_pipe( pipe_slow );
  4774 %}
  4776 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{
  4777   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
  4778   match(Set dst (XorV src1 src2));
  4779   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (16 bytes)" %}
  4780   ins_encode %{
  4781     bool vector256 = false;
  4782     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4783   %}
  4784   ins_pipe( pipe_slow );
  4785 %}
  4787 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{
  4788   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
  4789   match(Set dst (XorV src (LoadVector mem)));
  4790   format %{ "vpxor   $dst,$src,$mem\t! xor vectors (16 bytes)" %}
  4791   ins_encode %{
  4792     bool vector256 = false;
  4793     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  4794   %}
  4795   ins_pipe( pipe_slow );
  4796 %}
  4798 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{
  4799   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
  4800   match(Set dst (XorV src1 src2));
  4801   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (32 bytes)" %}
  4802   ins_encode %{
  4803     bool vector256 = true;
  4804     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
  4805   %}
  4806   ins_pipe( pipe_slow );
  4807 %}
  4809 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{
  4810   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
  4811   match(Set dst (XorV src (LoadVector mem)));
  4812   format %{ "vpxor   $dst,$src,$mem\t! xor vectors (32 bytes)" %}
  4813   ins_encode %{
  4814     bool vector256 = true;
  4815     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
  4816   %}
  4817   ins_pipe( pipe_slow );
  4818 %}

mercurial