src/cpu/x86/vm/x86.ad

Thu, 27 Dec 2018 11:43:33 +0800

author
aoqi
date
Thu, 27 Dec 2018 11:43:33 +0800
changeset 9448
73d689add964
parent 9333
2fccf735a116
parent 8604
04d83ba48607
permissions
-rw-r--r--

Merge

aoqi@0 1 //
kevinw@9333 2 // Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved.
aoqi@0 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
aoqi@0 4 //
aoqi@0 5 // This code is free software; you can redistribute it and/or modify it
aoqi@0 6 // under the terms of the GNU General Public License version 2 only, as
aoqi@0 7 // published by the Free Software Foundation.
aoqi@0 8 //
aoqi@0 9 // This code is distributed in the hope that it will be useful, but WITHOUT
aoqi@0 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
aoqi@0 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
aoqi@0 12 // version 2 for more details (a copy is included in the LICENSE file that
aoqi@0 13 // accompanied this code).
aoqi@0 14 //
aoqi@0 15 // You should have received a copy of the GNU General Public License version
aoqi@0 16 // 2 along with this work; if not, write to the Free Software Foundation,
aoqi@0 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
aoqi@0 18 //
aoqi@0 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
aoqi@0 20 // or visit www.oracle.com if you need additional information or have any
aoqi@0 21 // questions.
aoqi@0 22 //
aoqi@0 23 //
aoqi@0 24
aoqi@0 25 // X86 Common Architecture Description File
aoqi@0 26
aoqi@0 27 //----------REGISTER DEFINITION BLOCK------------------------------------------
aoqi@0 28 // This information is used by the matcher and the register allocator to
aoqi@0 29 // describe individual registers and classes of registers within the target
aoqi@0 30 // archtecture.
aoqi@0 31
aoqi@0 32 register %{
aoqi@0 33 //----------Architecture Description Register Definitions----------------------
aoqi@0 34 // General Registers
aoqi@0 35 // "reg_def" name ( register save type, C convention save type,
aoqi@0 36 // ideal register type, encoding );
aoqi@0 37 // Register Save Types:
aoqi@0 38 //
aoqi@0 39 // NS = No-Save: The register allocator assumes that these registers
aoqi@0 40 // can be used without saving upon entry to the method, &
aoqi@0 41 // that they do not need to be saved at call sites.
aoqi@0 42 //
aoqi@0 43 // SOC = Save-On-Call: The register allocator assumes that these registers
aoqi@0 44 // can be used without saving upon entry to the method,
aoqi@0 45 // but that they must be saved at call sites.
aoqi@0 46 //
aoqi@0 47 // SOE = Save-On-Entry: The register allocator assumes that these registers
aoqi@0 48 // must be saved before using them upon entry to the
aoqi@0 49 // method, but they do not need to be saved at call
aoqi@0 50 // sites.
aoqi@0 51 //
aoqi@0 52 // AS = Always-Save: The register allocator assumes that these registers
aoqi@0 53 // must be saved before using them upon entry to the
aoqi@0 54 // method, & that they must be saved at call sites.
aoqi@0 55 //
aoqi@0 56 // Ideal Register Type is used to determine how to save & restore a
aoqi@0 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
aoqi@0 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
aoqi@0 59 //
aoqi@0 60 // The encoding number is the actual bit-pattern placed into the opcodes.
aoqi@0 61
aoqi@0 62 // XMM registers. 256-bit registers or 8 words each, labeled (a)-h.
aoqi@0 63 // Word a in each register holds a Float, words ab hold a Double.
aoqi@0 64 // The whole registers are used in SSE4.2 version intrinsics,
aoqi@0 65 // array copy stubs and superword operations (see UseSSE42Intrinsics,
aoqi@0 66 // UseXMMForArrayCopy and UseSuperword flags).
aoqi@0 67 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX).
aoqi@0 68 // Linux ABI: No register preserved across function calls
aoqi@0 69 // XMM0-XMM7 might hold parameters
aoqi@0 70 // Windows ABI: XMM6-XMM15 preserved across function calls
aoqi@0 71 // XMM0-XMM3 might hold parameters
aoqi@0 72
aoqi@0 73 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
aoqi@0 74 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
aoqi@0 75 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
aoqi@0 76 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
aoqi@0 77 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
aoqi@0 78 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
aoqi@0 79 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
aoqi@0 80 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
aoqi@0 81
aoqi@0 82 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
aoqi@0 83 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
aoqi@0 84 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
aoqi@0 85 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
aoqi@0 86 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
aoqi@0 87 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
aoqi@0 88 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
aoqi@0 89 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
aoqi@0 90
aoqi@0 91 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
aoqi@0 92 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
aoqi@0 93 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
aoqi@0 94 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
aoqi@0 95 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
aoqi@0 96 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
aoqi@0 97 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
aoqi@0 98 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
aoqi@0 99
aoqi@0 100 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
aoqi@0 101 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
aoqi@0 102 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
aoqi@0 103 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
aoqi@0 104 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
aoqi@0 105 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
aoqi@0 106 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
aoqi@0 107 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
aoqi@0 108
aoqi@0 109 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
aoqi@0 110 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
aoqi@0 111 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
aoqi@0 112 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
aoqi@0 113 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
aoqi@0 114 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
aoqi@0 115 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
aoqi@0 116 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
aoqi@0 117
aoqi@0 118 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
aoqi@0 119 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
aoqi@0 120 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
aoqi@0 121 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
aoqi@0 122 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
aoqi@0 123 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
aoqi@0 124 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
aoqi@0 125 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
aoqi@0 126
aoqi@0 127 #ifdef _WIN64
aoqi@0 128
aoqi@0 129 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg());
aoqi@0 130 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1));
aoqi@0 131 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2));
aoqi@0 132 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3));
aoqi@0 133 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4));
aoqi@0 134 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5));
aoqi@0 135 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6));
aoqi@0 136 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7));
aoqi@0 137
aoqi@0 138 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg());
aoqi@0 139 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1));
aoqi@0 140 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2));
aoqi@0 141 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3));
aoqi@0 142 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4));
aoqi@0 143 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5));
aoqi@0 144 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6));
aoqi@0 145 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7));
aoqi@0 146
aoqi@0 147 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg());
aoqi@0 148 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1));
aoqi@0 149 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2));
aoqi@0 150 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3));
aoqi@0 151 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4));
aoqi@0 152 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5));
aoqi@0 153 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6));
aoqi@0 154 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7));
aoqi@0 155
aoqi@0 156 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg());
aoqi@0 157 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1));
aoqi@0 158 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2));
aoqi@0 159 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3));
aoqi@0 160 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4));
aoqi@0 161 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5));
aoqi@0 162 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6));
aoqi@0 163 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7));
aoqi@0 164
aoqi@0 165 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
aoqi@0 166 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1));
aoqi@0 167 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2));
aoqi@0 168 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3));
aoqi@0 169 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4));
aoqi@0 170 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5));
aoqi@0 171 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6));
aoqi@0 172 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7));
aoqi@0 173
aoqi@0 174 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
aoqi@0 175 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1));
aoqi@0 176 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2));
aoqi@0 177 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3));
aoqi@0 178 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4));
aoqi@0 179 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5));
aoqi@0 180 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6));
aoqi@0 181 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7));
aoqi@0 182
aoqi@0 183 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
aoqi@0 184 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1));
aoqi@0 185 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2));
aoqi@0 186 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3));
aoqi@0 187 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4));
aoqi@0 188 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5));
aoqi@0 189 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6));
aoqi@0 190 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7));
aoqi@0 191
aoqi@0 192 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
aoqi@0 193 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1));
aoqi@0 194 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2));
aoqi@0 195 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3));
aoqi@0 196 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4));
aoqi@0 197 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5));
aoqi@0 198 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6));
aoqi@0 199 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7));
aoqi@0 200
aoqi@0 201 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
aoqi@0 202 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1));
aoqi@0 203 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2));
aoqi@0 204 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3));
aoqi@0 205 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4));
aoqi@0 206 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5));
aoqi@0 207 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6));
aoqi@0 208 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7));
aoqi@0 209
aoqi@0 210 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
aoqi@0 211 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1));
aoqi@0 212 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2));
aoqi@0 213 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3));
aoqi@0 214 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4));
aoqi@0 215 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5));
aoqi@0 216 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6));
aoqi@0 217 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7));
aoqi@0 218
aoqi@0 219 #else // _WIN64
aoqi@0 220
aoqi@0 221 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
aoqi@0 222 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
aoqi@0 223 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
aoqi@0 224 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
aoqi@0 225 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
aoqi@0 226 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
aoqi@0 227 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
aoqi@0 228 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
aoqi@0 229
aoqi@0 230 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
aoqi@0 231 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
aoqi@0 232 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
aoqi@0 233 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
aoqi@0 234 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
aoqi@0 235 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
aoqi@0 236 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
aoqi@0 237 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
aoqi@0 238
aoqi@0 239 #ifdef _LP64
aoqi@0 240
aoqi@0 241 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
aoqi@0 242 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
aoqi@0 243 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
aoqi@0 244 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
aoqi@0 245 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
aoqi@0 246 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
aoqi@0 247 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
aoqi@0 248 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
aoqi@0 249
aoqi@0 250 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
aoqi@0 251 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
aoqi@0 252 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
aoqi@0 253 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
aoqi@0 254 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
aoqi@0 255 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
aoqi@0 256 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
aoqi@0 257 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
aoqi@0 258
aoqi@0 259 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
aoqi@0 260 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
aoqi@0 261 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
aoqi@0 262 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
aoqi@0 263 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
aoqi@0 264 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
aoqi@0 265 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
aoqi@0 266 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
aoqi@0 267
aoqi@0 268 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
aoqi@0 269 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
aoqi@0 270 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
aoqi@0 271 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
aoqi@0 272 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
aoqi@0 273 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
aoqi@0 274 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
aoqi@0 275 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
aoqi@0 276
aoqi@0 277 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
aoqi@0 278 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
aoqi@0 279 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
aoqi@0 280 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
aoqi@0 281 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
aoqi@0 282 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
aoqi@0 283 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
aoqi@0 284 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
aoqi@0 285
aoqi@0 286 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
aoqi@0 287 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
aoqi@0 288 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
aoqi@0 289 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
aoqi@0 290 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
aoqi@0 291 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
aoqi@0 292 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
aoqi@0 293 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
aoqi@0 294
aoqi@0 295 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
aoqi@0 296 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
aoqi@0 297 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
aoqi@0 298 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
aoqi@0 299 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
aoqi@0 300 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
aoqi@0 301 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
aoqi@0 302 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
aoqi@0 303
aoqi@0 304 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
aoqi@0 305 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
aoqi@0 306 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
aoqi@0 307 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
aoqi@0 308 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
aoqi@0 309 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
aoqi@0 310 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
aoqi@0 311 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
aoqi@0 312
aoqi@0 313 #endif // _LP64
aoqi@0 314
aoqi@0 315 #endif // _WIN64
aoqi@0 316
aoqi@0 317 #ifdef _LP64
aoqi@0 318 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
aoqi@0 319 #else
aoqi@0 320 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
aoqi@0 321 #endif // _LP64
aoqi@0 322
aoqi@0 323 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
aoqi@0 324 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
aoqi@0 325 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
aoqi@0 326 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
aoqi@0 327 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
aoqi@0 328 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
aoqi@0 329 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
aoqi@0 330 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h
aoqi@0 331 #ifdef _LP64
aoqi@0 332 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
aoqi@0 333 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
aoqi@0 334 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
aoqi@0 335 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
aoqi@0 336 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
aoqi@0 337 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
aoqi@0 338 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
aoqi@0 339 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
aoqi@0 340 #endif
aoqi@0 341 );
aoqi@0 342
aoqi@0 343 // flags allocation class should be last.
aoqi@0 344 alloc_class chunk2(RFLAGS);
aoqi@0 345
aoqi@0 346 // Singleton class for condition codes
aoqi@0 347 reg_class int_flags(RFLAGS);
aoqi@0 348
aoqi@0 349 // Class for all float registers
aoqi@0 350 reg_class float_reg(XMM0,
aoqi@0 351 XMM1,
aoqi@0 352 XMM2,
aoqi@0 353 XMM3,
aoqi@0 354 XMM4,
aoqi@0 355 XMM5,
aoqi@0 356 XMM6,
aoqi@0 357 XMM7
aoqi@0 358 #ifdef _LP64
aoqi@0 359 ,XMM8,
aoqi@0 360 XMM9,
aoqi@0 361 XMM10,
aoqi@0 362 XMM11,
aoqi@0 363 XMM12,
aoqi@0 364 XMM13,
aoqi@0 365 XMM14,
aoqi@0 366 XMM15
aoqi@0 367 #endif
aoqi@0 368 );
aoqi@0 369
aoqi@0 370 // Class for all double registers
aoqi@0 371 reg_class double_reg(XMM0, XMM0b,
aoqi@0 372 XMM1, XMM1b,
aoqi@0 373 XMM2, XMM2b,
aoqi@0 374 XMM3, XMM3b,
aoqi@0 375 XMM4, XMM4b,
aoqi@0 376 XMM5, XMM5b,
aoqi@0 377 XMM6, XMM6b,
aoqi@0 378 XMM7, XMM7b
aoqi@0 379 #ifdef _LP64
aoqi@0 380 ,XMM8, XMM8b,
aoqi@0 381 XMM9, XMM9b,
aoqi@0 382 XMM10, XMM10b,
aoqi@0 383 XMM11, XMM11b,
aoqi@0 384 XMM12, XMM12b,
aoqi@0 385 XMM13, XMM13b,
aoqi@0 386 XMM14, XMM14b,
aoqi@0 387 XMM15, XMM15b
aoqi@0 388 #endif
aoqi@0 389 );
aoqi@0 390
aoqi@0 391 // Class for all 32bit vector registers
aoqi@0 392 reg_class vectors_reg(XMM0,
aoqi@0 393 XMM1,
aoqi@0 394 XMM2,
aoqi@0 395 XMM3,
aoqi@0 396 XMM4,
aoqi@0 397 XMM5,
aoqi@0 398 XMM6,
aoqi@0 399 XMM7
aoqi@0 400 #ifdef _LP64
aoqi@0 401 ,XMM8,
aoqi@0 402 XMM9,
aoqi@0 403 XMM10,
aoqi@0 404 XMM11,
aoqi@0 405 XMM12,
aoqi@0 406 XMM13,
aoqi@0 407 XMM14,
aoqi@0 408 XMM15
aoqi@0 409 #endif
aoqi@0 410 );
aoqi@0 411
aoqi@0 412 // Class for all 64bit vector registers
aoqi@0 413 reg_class vectord_reg(XMM0, XMM0b,
aoqi@0 414 XMM1, XMM1b,
aoqi@0 415 XMM2, XMM2b,
aoqi@0 416 XMM3, XMM3b,
aoqi@0 417 XMM4, XMM4b,
aoqi@0 418 XMM5, XMM5b,
aoqi@0 419 XMM6, XMM6b,
aoqi@0 420 XMM7, XMM7b
aoqi@0 421 #ifdef _LP64
aoqi@0 422 ,XMM8, XMM8b,
aoqi@0 423 XMM9, XMM9b,
aoqi@0 424 XMM10, XMM10b,
aoqi@0 425 XMM11, XMM11b,
aoqi@0 426 XMM12, XMM12b,
aoqi@0 427 XMM13, XMM13b,
aoqi@0 428 XMM14, XMM14b,
aoqi@0 429 XMM15, XMM15b
aoqi@0 430 #endif
aoqi@0 431 );
aoqi@0 432
aoqi@0 433 // Class for all 128bit vector registers
aoqi@0 434 reg_class vectorx_reg(XMM0, XMM0b, XMM0c, XMM0d,
aoqi@0 435 XMM1, XMM1b, XMM1c, XMM1d,
aoqi@0 436 XMM2, XMM2b, XMM2c, XMM2d,
aoqi@0 437 XMM3, XMM3b, XMM3c, XMM3d,
aoqi@0 438 XMM4, XMM4b, XMM4c, XMM4d,
aoqi@0 439 XMM5, XMM5b, XMM5c, XMM5d,
aoqi@0 440 XMM6, XMM6b, XMM6c, XMM6d,
aoqi@0 441 XMM7, XMM7b, XMM7c, XMM7d
aoqi@0 442 #ifdef _LP64
aoqi@0 443 ,XMM8, XMM8b, XMM8c, XMM8d,
aoqi@0 444 XMM9, XMM9b, XMM9c, XMM9d,
aoqi@0 445 XMM10, XMM10b, XMM10c, XMM10d,
aoqi@0 446 XMM11, XMM11b, XMM11c, XMM11d,
aoqi@0 447 XMM12, XMM12b, XMM12c, XMM12d,
aoqi@0 448 XMM13, XMM13b, XMM13c, XMM13d,
aoqi@0 449 XMM14, XMM14b, XMM14c, XMM14d,
aoqi@0 450 XMM15, XMM15b, XMM15c, XMM15d
aoqi@0 451 #endif
aoqi@0 452 );
aoqi@0 453
aoqi@0 454 // Class for all 256bit vector registers
aoqi@0 455 reg_class vectory_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
aoqi@0 456 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
aoqi@0 457 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
aoqi@0 458 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
aoqi@0 459 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
aoqi@0 460 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
aoqi@0 461 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
aoqi@0 462 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h
aoqi@0 463 #ifdef _LP64
aoqi@0 464 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
aoqi@0 465 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
aoqi@0 466 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
aoqi@0 467 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
aoqi@0 468 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
aoqi@0 469 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
aoqi@0 470 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
aoqi@0 471 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
aoqi@0 472 #endif
aoqi@0 473 );
aoqi@0 474
aoqi@0 475 %}
aoqi@0 476
aoqi@0 477
aoqi@0 478 //----------SOURCE BLOCK-------------------------------------------------------
aoqi@0 479 // This is a block of C++ code which provides values, functions, and
aoqi@0 480 // definitions necessary in the rest of the architecture description
aoqi@0 481
aoqi@0 482 source_hpp %{
aoqi@0 483 // Header information of the source block.
aoqi@0 484 // Method declarations/definitions which are used outside
aoqi@0 485 // the ad-scope can conveniently be defined here.
aoqi@0 486 //
aoqi@0 487 // To keep related declarations/definitions/uses close together,
aoqi@0 488 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
aoqi@0 489
aoqi@0 490 class CallStubImpl {
aoqi@0 491
aoqi@0 492 //--------------------------------------------------------------
aoqi@0 493 //---< Used for optimization in Compile::shorten_branches >---
aoqi@0 494 //--------------------------------------------------------------
aoqi@0 495
aoqi@0 496 public:
aoqi@0 497 // Size of call trampoline stub.
aoqi@0 498 static uint size_call_trampoline() {
aoqi@0 499 return 0; // no call trampolines on this platform
aoqi@0 500 }
aoqi@0 501
aoqi@0 502 // number of relocations needed by a call trampoline stub
aoqi@0 503 static uint reloc_call_trampoline() {
aoqi@0 504 return 0; // no call trampolines on this platform
aoqi@0 505 }
aoqi@0 506 };
aoqi@0 507
aoqi@0 508 class HandlerImpl {
aoqi@0 509
aoqi@0 510 public:
aoqi@0 511
aoqi@0 512 static int emit_exception_handler(CodeBuffer &cbuf);
aoqi@0 513 static int emit_deopt_handler(CodeBuffer& cbuf);
aoqi@0 514
aoqi@0 515 static uint size_exception_handler() {
aoqi@0 516 // NativeCall instruction size is the same as NativeJump.
aoqi@0 517 // exception handler starts out as jump and can be patched to
aoqi@0 518 // a call be deoptimization. (4932387)
aoqi@0 519 // Note that this value is also credited (in output.cpp) to
aoqi@0 520 // the size of the code section.
aoqi@0 521 return NativeJump::instruction_size;
aoqi@0 522 }
aoqi@0 523
aoqi@0 524 #ifdef _LP64
aoqi@0 525 static uint size_deopt_handler() {
aoqi@0 526 // three 5 byte instructions
aoqi@0 527 return 15;
aoqi@0 528 }
aoqi@0 529 #else
aoqi@0 530 static uint size_deopt_handler() {
aoqi@0 531 // NativeCall instruction size is the same as NativeJump.
aoqi@0 532 // exception handler starts out as jump and can be patched to
aoqi@0 533 // a call be deoptimization. (4932387)
aoqi@0 534 // Note that this value is also credited (in output.cpp) to
aoqi@0 535 // the size of the code section.
aoqi@0 536 return 5 + NativeJump::instruction_size; // pushl(); jmp;
aoqi@0 537 }
aoqi@0 538 #endif
aoqi@0 539 };
aoqi@0 540
aoqi@0 541 %} // end source_hpp
aoqi@0 542
aoqi@0 543 source %{
aoqi@0 544
aoqi@0 545 // Emit exception handler code.
aoqi@0 546 // Stuff framesize into a register and call a VM stub routine.
aoqi@0 547 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
aoqi@0 548
aoqi@0 549 // Note that the code buffer's insts_mark is always relative to insts.
aoqi@0 550 // That's why we must use the macroassembler to generate a handler.
aoqi@0 551 MacroAssembler _masm(&cbuf);
aoqi@0 552 address base = __ start_a_stub(size_exception_handler());
vkempik@8427 553 if (base == NULL) {
vkempik@8427 554 ciEnv::current()->record_failure("CodeCache is full");
vkempik@8427 555 return 0; // CodeBuffer::expand failed
vkempik@8427 556 }
aoqi@0 557 int offset = __ offset();
aoqi@0 558 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
aoqi@0 559 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
aoqi@0 560 __ end_a_stub();
aoqi@0 561 return offset;
aoqi@0 562 }
aoqi@0 563
aoqi@0 564 // Emit deopt handler code.
aoqi@0 565 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
aoqi@0 566
aoqi@0 567 // Note that the code buffer's insts_mark is always relative to insts.
aoqi@0 568 // That's why we must use the macroassembler to generate a handler.
aoqi@0 569 MacroAssembler _masm(&cbuf);
aoqi@0 570 address base = __ start_a_stub(size_deopt_handler());
vkempik@8427 571 if (base == NULL) {
vkempik@8427 572 ciEnv::current()->record_failure("CodeCache is full");
vkempik@8427 573 return 0; // CodeBuffer::expand failed
vkempik@8427 574 }
aoqi@0 575 int offset = __ offset();
aoqi@0 576
aoqi@0 577 #ifdef _LP64
aoqi@0 578 address the_pc = (address) __ pc();
aoqi@0 579 Label next;
aoqi@0 580 // push a "the_pc" on the stack without destroying any registers
aoqi@0 581 // as they all may be live.
aoqi@0 582
aoqi@0 583 // push address of "next"
aoqi@0 584 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
aoqi@0 585 __ bind(next);
aoqi@0 586 // adjust it so it matches "the_pc"
aoqi@0 587 __ subptr(Address(rsp, 0), __ offset() - offset);
aoqi@0 588 #else
aoqi@0 589 InternalAddress here(__ pc());
aoqi@0 590 __ pushptr(here.addr());
aoqi@0 591 #endif
aoqi@0 592
aoqi@0 593 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
aoqi@0 594 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
aoqi@0 595 __ end_a_stub();
aoqi@0 596 return offset;
aoqi@0 597 }
aoqi@0 598
aoqi@0 599
aoqi@0 600 //=============================================================================
aoqi@0 601
aoqi@0 602 // Float masks come from different places depending on platform.
aoqi@0 603 #ifdef _LP64
aoqi@0 604 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
aoqi@0 605 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
aoqi@0 606 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
aoqi@0 607 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
aoqi@0 608 #else
aoqi@0 609 static address float_signmask() { return (address)float_signmask_pool; }
aoqi@0 610 static address float_signflip() { return (address)float_signflip_pool; }
aoqi@0 611 static address double_signmask() { return (address)double_signmask_pool; }
aoqi@0 612 static address double_signflip() { return (address)double_signflip_pool; }
aoqi@0 613 #endif
aoqi@0 614
aoqi@0 615
aoqi@0 616 const bool Matcher::match_rule_supported(int opcode) {
aoqi@0 617 if (!has_match_rule(opcode))
aoqi@0 618 return false;
aoqi@0 619
aoqi@0 620 switch (opcode) {
aoqi@0 621 case Op_PopCountI:
aoqi@0 622 case Op_PopCountL:
aoqi@0 623 if (!UsePopCountInstruction)
aoqi@0 624 return false;
aoqi@0 625 break;
aoqi@0 626 case Op_MulVI:
aoqi@0 627 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
aoqi@0 628 return false;
aoqi@0 629 break;
aoqi@0 630 case Op_CompareAndSwapL:
aoqi@0 631 #ifdef _LP64
aoqi@0 632 case Op_CompareAndSwapP:
aoqi@0 633 #endif
aoqi@0 634 if (!VM_Version::supports_cx8())
aoqi@0 635 return false;
aoqi@0 636 break;
aoqi@0 637 }
aoqi@0 638
aoqi@0 639 return true; // Per default match rules are supported.
aoqi@0 640 }
aoqi@0 641
aoqi@0 642 // Max vector size in bytes. 0 if not supported.
aoqi@0 643 const int Matcher::vector_width_in_bytes(BasicType bt) {
aoqi@0 644 assert(is_java_primitive(bt), "only primitive type vectors");
aoqi@0 645 if (UseSSE < 2) return 0;
aoqi@0 646 // SSE2 supports 128bit vectors for all types.
aoqi@0 647 // AVX2 supports 256bit vectors for all types.
aoqi@0 648 int size = (UseAVX > 1) ? 32 : 16;
aoqi@0 649 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
aoqi@0 650 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
aoqi@0 651 size = 32;
aoqi@0 652 // Use flag to limit vector size.
aoqi@0 653 size = MIN2(size,(int)MaxVectorSize);
aoqi@0 654 // Minimum 2 values in vector (or 4 for bytes).
aoqi@0 655 switch (bt) {
aoqi@0 656 case T_DOUBLE:
aoqi@0 657 case T_LONG:
aoqi@0 658 if (size < 16) return 0;
aoqi@0 659 case T_FLOAT:
aoqi@0 660 case T_INT:
aoqi@0 661 if (size < 8) return 0;
aoqi@0 662 case T_BOOLEAN:
aoqi@0 663 case T_BYTE:
aoqi@0 664 case T_CHAR:
aoqi@0 665 case T_SHORT:
aoqi@0 666 if (size < 4) return 0;
aoqi@0 667 break;
aoqi@0 668 default:
aoqi@0 669 ShouldNotReachHere();
aoqi@0 670 }
aoqi@0 671 return size;
aoqi@0 672 }
aoqi@0 673
aoqi@0 674 // Limits on vector size (number of elements) loaded into vector.
aoqi@0 675 const int Matcher::max_vector_size(const BasicType bt) {
aoqi@0 676 return vector_width_in_bytes(bt)/type2aelembytes(bt);
aoqi@0 677 }
aoqi@0 678 const int Matcher::min_vector_size(const BasicType bt) {
aoqi@0 679 int max_size = max_vector_size(bt);
aoqi@0 680 // Min size which can be loaded into vector is 4 bytes.
aoqi@0 681 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
aoqi@0 682 return MIN2(size,max_size);
aoqi@0 683 }
aoqi@0 684
aoqi@0 685 // Vector ideal reg corresponding to specidied size in bytes
kevinw@9333 686 const uint Matcher::vector_ideal_reg(int size) {
aoqi@0 687 assert(MaxVectorSize >= size, "");
aoqi@0 688 switch(size) {
aoqi@0 689 case 4: return Op_VecS;
aoqi@0 690 case 8: return Op_VecD;
aoqi@0 691 case 16: return Op_VecX;
aoqi@0 692 case 32: return Op_VecY;
aoqi@0 693 }
aoqi@0 694 ShouldNotReachHere();
aoqi@0 695 return 0;
aoqi@0 696 }
aoqi@0 697
aoqi@0 698 // Only lowest bits of xmm reg are used for vector shift count.
kevinw@9333 699 const uint Matcher::vector_shift_count_ideal_reg(int size) {
aoqi@0 700 return Op_VecS;
aoqi@0 701 }
aoqi@0 702
aoqi@0 703 // x86 supports misaligned vectors store/load.
aoqi@0 704 const bool Matcher::misaligned_vectors_ok() {
aoqi@0 705 return !AlignVector; // can be changed by flag
aoqi@0 706 }
aoqi@0 707
aoqi@0 708 // x86 AES instructions are compatible with SunJCE expanded
aoqi@0 709 // keys, hence we do not need to pass the original key to stubs
aoqi@0 710 const bool Matcher::pass_original_key_for_aes() {
aoqi@0 711 return false;
aoqi@0 712 }
aoqi@0 713
aoqi@0 714 // Helper methods for MachSpillCopyNode::implementation().
aoqi@0 715 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
aoqi@0 716 int src_hi, int dst_hi, uint ireg, outputStream* st) {
aoqi@0 717 // In 64-bit VM size calculation is very complex. Emitting instructions
aoqi@0 718 // into scratch buffer is used to get size in 64-bit VM.
aoqi@0 719 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
aoqi@0 720 assert(ireg == Op_VecS || // 32bit vector
aoqi@0 721 (src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
aoqi@0 722 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
aoqi@0 723 "no non-adjacent vector moves" );
aoqi@0 724 if (cbuf) {
aoqi@0 725 MacroAssembler _masm(cbuf);
aoqi@0 726 int offset = __ offset();
aoqi@0 727 switch (ireg) {
aoqi@0 728 case Op_VecS: // copy whole register
aoqi@0 729 case Op_VecD:
aoqi@0 730 case Op_VecX:
aoqi@0 731 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
aoqi@0 732 break;
aoqi@0 733 case Op_VecY:
aoqi@0 734 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
aoqi@0 735 break;
aoqi@0 736 default:
aoqi@0 737 ShouldNotReachHere();
aoqi@0 738 }
aoqi@0 739 int size = __ offset() - offset;
aoqi@0 740 #ifdef ASSERT
aoqi@0 741 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
aoqi@0 742 assert(!do_size || size == 4, "incorrect size calculattion");
aoqi@0 743 #endif
aoqi@0 744 return size;
aoqi@0 745 #ifndef PRODUCT
aoqi@0 746 } else if (!do_size) {
aoqi@0 747 switch (ireg) {
aoqi@0 748 case Op_VecS:
aoqi@0 749 case Op_VecD:
aoqi@0 750 case Op_VecX:
aoqi@0 751 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
aoqi@0 752 break;
aoqi@0 753 case Op_VecY:
aoqi@0 754 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
aoqi@0 755 break;
aoqi@0 756 default:
aoqi@0 757 ShouldNotReachHere();
aoqi@0 758 }
aoqi@0 759 #endif
aoqi@0 760 }
aoqi@0 761 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
aoqi@0 762 return 4;
aoqi@0 763 }
aoqi@0 764
aoqi@0 765 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
aoqi@0 766 int stack_offset, int reg, uint ireg, outputStream* st) {
aoqi@0 767 // In 64-bit VM size calculation is very complex. Emitting instructions
aoqi@0 768 // into scratch buffer is used to get size in 64-bit VM.
aoqi@0 769 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
aoqi@0 770 if (cbuf) {
aoqi@0 771 MacroAssembler _masm(cbuf);
aoqi@0 772 int offset = __ offset();
aoqi@0 773 if (is_load) {
aoqi@0 774 switch (ireg) {
aoqi@0 775 case Op_VecS:
aoqi@0 776 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
aoqi@0 777 break;
aoqi@0 778 case Op_VecD:
aoqi@0 779 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
aoqi@0 780 break;
aoqi@0 781 case Op_VecX:
aoqi@0 782 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
aoqi@0 783 break;
aoqi@0 784 case Op_VecY:
aoqi@0 785 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
aoqi@0 786 break;
aoqi@0 787 default:
aoqi@0 788 ShouldNotReachHere();
aoqi@0 789 }
aoqi@0 790 } else { // store
aoqi@0 791 switch (ireg) {
aoqi@0 792 case Op_VecS:
aoqi@0 793 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
aoqi@0 794 break;
aoqi@0 795 case Op_VecD:
aoqi@0 796 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
aoqi@0 797 break;
aoqi@0 798 case Op_VecX:
aoqi@0 799 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
aoqi@0 800 break;
aoqi@0 801 case Op_VecY:
aoqi@0 802 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
aoqi@0 803 break;
aoqi@0 804 default:
aoqi@0 805 ShouldNotReachHere();
aoqi@0 806 }
aoqi@0 807 }
aoqi@0 808 int size = __ offset() - offset;
aoqi@0 809 #ifdef ASSERT
aoqi@0 810 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
aoqi@0 811 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
aoqi@0 812 assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
aoqi@0 813 #endif
aoqi@0 814 return size;
aoqi@0 815 #ifndef PRODUCT
aoqi@0 816 } else if (!do_size) {
aoqi@0 817 if (is_load) {
aoqi@0 818 switch (ireg) {
aoqi@0 819 case Op_VecS:
aoqi@0 820 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
aoqi@0 821 break;
aoqi@0 822 case Op_VecD:
aoqi@0 823 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
aoqi@0 824 break;
aoqi@0 825 case Op_VecX:
aoqi@0 826 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
aoqi@0 827 break;
aoqi@0 828 case Op_VecY:
aoqi@0 829 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
aoqi@0 830 break;
aoqi@0 831 default:
aoqi@0 832 ShouldNotReachHere();
aoqi@0 833 }
aoqi@0 834 } else { // store
aoqi@0 835 switch (ireg) {
aoqi@0 836 case Op_VecS:
aoqi@0 837 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
aoqi@0 838 break;
aoqi@0 839 case Op_VecD:
aoqi@0 840 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
aoqi@0 841 break;
aoqi@0 842 case Op_VecX:
aoqi@0 843 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
aoqi@0 844 break;
aoqi@0 845 case Op_VecY:
aoqi@0 846 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
aoqi@0 847 break;
aoqi@0 848 default:
aoqi@0 849 ShouldNotReachHere();
aoqi@0 850 }
aoqi@0 851 }
aoqi@0 852 #endif
aoqi@0 853 }
aoqi@0 854 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
aoqi@0 855 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
aoqi@0 856 return 5+offset_size;
aoqi@0 857 }
aoqi@0 858
aoqi@0 859 static inline jfloat replicate4_imm(int con, int width) {
aoqi@0 860 // Load a constant of "width" (in bytes) and replicate it to fill 32bit.
aoqi@0 861 assert(width == 1 || width == 2, "only byte or short types here");
aoqi@0 862 int bit_width = width * 8;
aoqi@0 863 jint val = con;
aoqi@0 864 val &= (1 << bit_width) - 1; // mask off sign bits
aoqi@0 865 while(bit_width < 32) {
aoqi@0 866 val |= (val << bit_width);
aoqi@0 867 bit_width <<= 1;
aoqi@0 868 }
aoqi@0 869 jfloat fval = *((jfloat*) &val); // coerce to float type
aoqi@0 870 return fval;
aoqi@0 871 }
aoqi@0 872
aoqi@0 873 static inline jdouble replicate8_imm(int con, int width) {
aoqi@0 874 // Load a constant of "width" (in bytes) and replicate it to fill 64bit.
aoqi@0 875 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here");
aoqi@0 876 int bit_width = width * 8;
aoqi@0 877 jlong val = con;
aoqi@0 878 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits
aoqi@0 879 while(bit_width < 64) {
aoqi@0 880 val |= (val << bit_width);
aoqi@0 881 bit_width <<= 1;
aoqi@0 882 }
aoqi@0 883 jdouble dval = *((jdouble*) &val); // coerce to double type
aoqi@0 884 return dval;
aoqi@0 885 }
aoqi@0 886
aoqi@0 887 #ifndef PRODUCT
aoqi@0 888 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
aoqi@0 889 st->print("nop \t# %d bytes pad for loops and calls", _count);
aoqi@0 890 }
aoqi@0 891 #endif
aoqi@0 892
aoqi@0 893 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
aoqi@0 894 MacroAssembler _masm(&cbuf);
aoqi@0 895 __ nop(_count);
aoqi@0 896 }
aoqi@0 897
aoqi@0 898 uint MachNopNode::size(PhaseRegAlloc*) const {
aoqi@0 899 return _count;
aoqi@0 900 }
aoqi@0 901
aoqi@0 902 #ifndef PRODUCT
aoqi@0 903 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
aoqi@0 904 st->print("# breakpoint");
aoqi@0 905 }
aoqi@0 906 #endif
aoqi@0 907
aoqi@0 908 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
aoqi@0 909 MacroAssembler _masm(&cbuf);
aoqi@0 910 __ int3();
aoqi@0 911 }
aoqi@0 912
aoqi@0 913 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
aoqi@0 914 return MachNode::size(ra_);
aoqi@0 915 }
aoqi@0 916
aoqi@0 917 %}
aoqi@0 918
aoqi@0 919 encode %{
aoqi@0 920
aoqi@0 921 enc_class call_epilog %{
aoqi@0 922 if (VerifyStackAtCalls) {
aoqi@0 923 // Check that stack depth is unchanged: find majik cookie on stack
aoqi@0 924 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
aoqi@0 925 MacroAssembler _masm(&cbuf);
aoqi@0 926 Label L;
aoqi@0 927 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
aoqi@0 928 __ jccb(Assembler::equal, L);
aoqi@0 929 // Die if stack mismatch
aoqi@0 930 __ int3();
aoqi@0 931 __ bind(L);
aoqi@0 932 }
aoqi@0 933 %}
aoqi@0 934
aoqi@0 935 %}
aoqi@0 936
aoqi@0 937
aoqi@0 938 //----------OPERANDS-----------------------------------------------------------
aoqi@0 939 // Operand definitions must precede instruction definitions for correct parsing
aoqi@0 940 // in the ADLC because operands constitute user defined types which are used in
aoqi@0 941 // instruction definitions.
aoqi@0 942
aoqi@0 943 // Vectors
aoqi@0 944 operand vecS() %{
aoqi@0 945 constraint(ALLOC_IN_RC(vectors_reg));
aoqi@0 946 match(VecS);
aoqi@0 947
aoqi@0 948 format %{ %}
aoqi@0 949 interface(REG_INTER);
aoqi@0 950 %}
aoqi@0 951
aoqi@0 952 operand vecD() %{
aoqi@0 953 constraint(ALLOC_IN_RC(vectord_reg));
aoqi@0 954 match(VecD);
aoqi@0 955
aoqi@0 956 format %{ %}
aoqi@0 957 interface(REG_INTER);
aoqi@0 958 %}
aoqi@0 959
aoqi@0 960 operand vecX() %{
aoqi@0 961 constraint(ALLOC_IN_RC(vectorx_reg));
aoqi@0 962 match(VecX);
aoqi@0 963
aoqi@0 964 format %{ %}
aoqi@0 965 interface(REG_INTER);
aoqi@0 966 %}
aoqi@0 967
aoqi@0 968 operand vecY() %{
aoqi@0 969 constraint(ALLOC_IN_RC(vectory_reg));
aoqi@0 970 match(VecY);
aoqi@0 971
aoqi@0 972 format %{ %}
aoqi@0 973 interface(REG_INTER);
aoqi@0 974 %}
aoqi@0 975
aoqi@0 976
aoqi@0 977 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
aoqi@0 978
aoqi@0 979 // ============================================================================
aoqi@0 980
aoqi@0 981 instruct ShouldNotReachHere() %{
aoqi@0 982 match(Halt);
aoqi@0 983 format %{ "int3\t# ShouldNotReachHere" %}
aoqi@0 984 ins_encode %{
aoqi@0 985 __ int3();
aoqi@0 986 %}
aoqi@0 987 ins_pipe(pipe_slow);
aoqi@0 988 %}
aoqi@0 989
aoqi@0 990 // ============================================================================
aoqi@0 991
aoqi@0 992 instruct addF_reg(regF dst, regF src) %{
aoqi@0 993 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 994 match(Set dst (AddF dst src));
aoqi@0 995
aoqi@0 996 format %{ "addss $dst, $src" %}
aoqi@0 997 ins_cost(150);
aoqi@0 998 ins_encode %{
aoqi@0 999 __ addss($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 1000 %}
aoqi@0 1001 ins_pipe(pipe_slow);
aoqi@0 1002 %}
aoqi@0 1003
aoqi@0 1004 instruct addF_mem(regF dst, memory src) %{
aoqi@0 1005 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1006 match(Set dst (AddF dst (LoadF src)));
aoqi@0 1007
aoqi@0 1008 format %{ "addss $dst, $src" %}
aoqi@0 1009 ins_cost(150);
aoqi@0 1010 ins_encode %{
aoqi@0 1011 __ addss($dst$$XMMRegister, $src$$Address);
aoqi@0 1012 %}
aoqi@0 1013 ins_pipe(pipe_slow);
aoqi@0 1014 %}
aoqi@0 1015
aoqi@0 1016 instruct addF_imm(regF dst, immF con) %{
aoqi@0 1017 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1018 match(Set dst (AddF dst con));
aoqi@0 1019 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
aoqi@0 1020 ins_cost(150);
aoqi@0 1021 ins_encode %{
aoqi@0 1022 __ addss($dst$$XMMRegister, $constantaddress($con));
aoqi@0 1023 %}
aoqi@0 1024 ins_pipe(pipe_slow);
aoqi@0 1025 %}
aoqi@0 1026
aoqi@0 1027 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
aoqi@0 1028 predicate(UseAVX > 0);
aoqi@0 1029 match(Set dst (AddF src1 src2));
aoqi@0 1030
aoqi@0 1031 format %{ "vaddss $dst, $src1, $src2" %}
aoqi@0 1032 ins_cost(150);
aoqi@0 1033 ins_encode %{
aoqi@0 1034 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
aoqi@0 1035 %}
aoqi@0 1036 ins_pipe(pipe_slow);
aoqi@0 1037 %}
aoqi@0 1038
aoqi@0 1039 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
aoqi@0 1040 predicate(UseAVX > 0);
aoqi@0 1041 match(Set dst (AddF src1 (LoadF src2)));
aoqi@0 1042
aoqi@0 1043 format %{ "vaddss $dst, $src1, $src2" %}
aoqi@0 1044 ins_cost(150);
aoqi@0 1045 ins_encode %{
aoqi@0 1046 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
aoqi@0 1047 %}
aoqi@0 1048 ins_pipe(pipe_slow);
aoqi@0 1049 %}
aoqi@0 1050
aoqi@0 1051 instruct addF_reg_imm(regF dst, regF src, immF con) %{
aoqi@0 1052 predicate(UseAVX > 0);
aoqi@0 1053 match(Set dst (AddF src con));
aoqi@0 1054
aoqi@0 1055 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
aoqi@0 1056 ins_cost(150);
aoqi@0 1057 ins_encode %{
aoqi@0 1058 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
aoqi@0 1059 %}
aoqi@0 1060 ins_pipe(pipe_slow);
aoqi@0 1061 %}
aoqi@0 1062
aoqi@0 1063 instruct addD_reg(regD dst, regD src) %{
aoqi@0 1064 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1065 match(Set dst (AddD dst src));
aoqi@0 1066
aoqi@0 1067 format %{ "addsd $dst, $src" %}
aoqi@0 1068 ins_cost(150);
aoqi@0 1069 ins_encode %{
aoqi@0 1070 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 1071 %}
aoqi@0 1072 ins_pipe(pipe_slow);
aoqi@0 1073 %}
aoqi@0 1074
aoqi@0 1075 instruct addD_mem(regD dst, memory src) %{
aoqi@0 1076 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1077 match(Set dst (AddD dst (LoadD src)));
aoqi@0 1078
aoqi@0 1079 format %{ "addsd $dst, $src" %}
aoqi@0 1080 ins_cost(150);
aoqi@0 1081 ins_encode %{
aoqi@0 1082 __ addsd($dst$$XMMRegister, $src$$Address);
aoqi@0 1083 %}
aoqi@0 1084 ins_pipe(pipe_slow);
aoqi@0 1085 %}
aoqi@0 1086
aoqi@0 1087 instruct addD_imm(regD dst, immD con) %{
aoqi@0 1088 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1089 match(Set dst (AddD dst con));
aoqi@0 1090 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
aoqi@0 1091 ins_cost(150);
aoqi@0 1092 ins_encode %{
aoqi@0 1093 __ addsd($dst$$XMMRegister, $constantaddress($con));
aoqi@0 1094 %}
aoqi@0 1095 ins_pipe(pipe_slow);
aoqi@0 1096 %}
aoqi@0 1097
aoqi@0 1098 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
aoqi@0 1099 predicate(UseAVX > 0);
aoqi@0 1100 match(Set dst (AddD src1 src2));
aoqi@0 1101
aoqi@0 1102 format %{ "vaddsd $dst, $src1, $src2" %}
aoqi@0 1103 ins_cost(150);
aoqi@0 1104 ins_encode %{
aoqi@0 1105 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
aoqi@0 1106 %}
aoqi@0 1107 ins_pipe(pipe_slow);
aoqi@0 1108 %}
aoqi@0 1109
aoqi@0 1110 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
aoqi@0 1111 predicate(UseAVX > 0);
aoqi@0 1112 match(Set dst (AddD src1 (LoadD src2)));
aoqi@0 1113
aoqi@0 1114 format %{ "vaddsd $dst, $src1, $src2" %}
aoqi@0 1115 ins_cost(150);
aoqi@0 1116 ins_encode %{
aoqi@0 1117 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
aoqi@0 1118 %}
aoqi@0 1119 ins_pipe(pipe_slow);
aoqi@0 1120 %}
aoqi@0 1121
aoqi@0 1122 instruct addD_reg_imm(regD dst, regD src, immD con) %{
aoqi@0 1123 predicate(UseAVX > 0);
aoqi@0 1124 match(Set dst (AddD src con));
aoqi@0 1125
aoqi@0 1126 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
aoqi@0 1127 ins_cost(150);
aoqi@0 1128 ins_encode %{
aoqi@0 1129 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
aoqi@0 1130 %}
aoqi@0 1131 ins_pipe(pipe_slow);
aoqi@0 1132 %}
aoqi@0 1133
aoqi@0 1134 instruct subF_reg(regF dst, regF src) %{
aoqi@0 1135 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1136 match(Set dst (SubF dst src));
aoqi@0 1137
aoqi@0 1138 format %{ "subss $dst, $src" %}
aoqi@0 1139 ins_cost(150);
aoqi@0 1140 ins_encode %{
aoqi@0 1141 __ subss($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 1142 %}
aoqi@0 1143 ins_pipe(pipe_slow);
aoqi@0 1144 %}
aoqi@0 1145
aoqi@0 1146 instruct subF_mem(regF dst, memory src) %{
aoqi@0 1147 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1148 match(Set dst (SubF dst (LoadF src)));
aoqi@0 1149
aoqi@0 1150 format %{ "subss $dst, $src" %}
aoqi@0 1151 ins_cost(150);
aoqi@0 1152 ins_encode %{
aoqi@0 1153 __ subss($dst$$XMMRegister, $src$$Address);
aoqi@0 1154 %}
aoqi@0 1155 ins_pipe(pipe_slow);
aoqi@0 1156 %}
aoqi@0 1157
aoqi@0 1158 instruct subF_imm(regF dst, immF con) %{
aoqi@0 1159 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1160 match(Set dst (SubF dst con));
aoqi@0 1161 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
aoqi@0 1162 ins_cost(150);
aoqi@0 1163 ins_encode %{
aoqi@0 1164 __ subss($dst$$XMMRegister, $constantaddress($con));
aoqi@0 1165 %}
aoqi@0 1166 ins_pipe(pipe_slow);
aoqi@0 1167 %}
aoqi@0 1168
aoqi@0 1169 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
aoqi@0 1170 predicate(UseAVX > 0);
aoqi@0 1171 match(Set dst (SubF src1 src2));
aoqi@0 1172
aoqi@0 1173 format %{ "vsubss $dst, $src1, $src2" %}
aoqi@0 1174 ins_cost(150);
aoqi@0 1175 ins_encode %{
aoqi@0 1176 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
aoqi@0 1177 %}
aoqi@0 1178 ins_pipe(pipe_slow);
aoqi@0 1179 %}
aoqi@0 1180
aoqi@0 1181 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
aoqi@0 1182 predicate(UseAVX > 0);
aoqi@0 1183 match(Set dst (SubF src1 (LoadF src2)));
aoqi@0 1184
aoqi@0 1185 format %{ "vsubss $dst, $src1, $src2" %}
aoqi@0 1186 ins_cost(150);
aoqi@0 1187 ins_encode %{
aoqi@0 1188 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
aoqi@0 1189 %}
aoqi@0 1190 ins_pipe(pipe_slow);
aoqi@0 1191 %}
aoqi@0 1192
aoqi@0 1193 instruct subF_reg_imm(regF dst, regF src, immF con) %{
aoqi@0 1194 predicate(UseAVX > 0);
aoqi@0 1195 match(Set dst (SubF src con));
aoqi@0 1196
aoqi@0 1197 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
aoqi@0 1198 ins_cost(150);
aoqi@0 1199 ins_encode %{
aoqi@0 1200 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
aoqi@0 1201 %}
aoqi@0 1202 ins_pipe(pipe_slow);
aoqi@0 1203 %}
aoqi@0 1204
aoqi@0 1205 instruct subD_reg(regD dst, regD src) %{
aoqi@0 1206 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1207 match(Set dst (SubD dst src));
aoqi@0 1208
aoqi@0 1209 format %{ "subsd $dst, $src" %}
aoqi@0 1210 ins_cost(150);
aoqi@0 1211 ins_encode %{
aoqi@0 1212 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 1213 %}
aoqi@0 1214 ins_pipe(pipe_slow);
aoqi@0 1215 %}
aoqi@0 1216
aoqi@0 1217 instruct subD_mem(regD dst, memory src) %{
aoqi@0 1218 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1219 match(Set dst (SubD dst (LoadD src)));
aoqi@0 1220
aoqi@0 1221 format %{ "subsd $dst, $src" %}
aoqi@0 1222 ins_cost(150);
aoqi@0 1223 ins_encode %{
aoqi@0 1224 __ subsd($dst$$XMMRegister, $src$$Address);
aoqi@0 1225 %}
aoqi@0 1226 ins_pipe(pipe_slow);
aoqi@0 1227 %}
aoqi@0 1228
aoqi@0 1229 instruct subD_imm(regD dst, immD con) %{
aoqi@0 1230 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1231 match(Set dst (SubD dst con));
aoqi@0 1232 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
aoqi@0 1233 ins_cost(150);
aoqi@0 1234 ins_encode %{
aoqi@0 1235 __ subsd($dst$$XMMRegister, $constantaddress($con));
aoqi@0 1236 %}
aoqi@0 1237 ins_pipe(pipe_slow);
aoqi@0 1238 %}
aoqi@0 1239
aoqi@0 1240 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
aoqi@0 1241 predicate(UseAVX > 0);
aoqi@0 1242 match(Set dst (SubD src1 src2));
aoqi@0 1243
aoqi@0 1244 format %{ "vsubsd $dst, $src1, $src2" %}
aoqi@0 1245 ins_cost(150);
aoqi@0 1246 ins_encode %{
aoqi@0 1247 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
aoqi@0 1248 %}
aoqi@0 1249 ins_pipe(pipe_slow);
aoqi@0 1250 %}
aoqi@0 1251
aoqi@0 1252 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
aoqi@0 1253 predicate(UseAVX > 0);
aoqi@0 1254 match(Set dst (SubD src1 (LoadD src2)));
aoqi@0 1255
aoqi@0 1256 format %{ "vsubsd $dst, $src1, $src2" %}
aoqi@0 1257 ins_cost(150);
aoqi@0 1258 ins_encode %{
aoqi@0 1259 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
aoqi@0 1260 %}
aoqi@0 1261 ins_pipe(pipe_slow);
aoqi@0 1262 %}
aoqi@0 1263
aoqi@0 1264 instruct subD_reg_imm(regD dst, regD src, immD con) %{
aoqi@0 1265 predicate(UseAVX > 0);
aoqi@0 1266 match(Set dst (SubD src con));
aoqi@0 1267
aoqi@0 1268 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
aoqi@0 1269 ins_cost(150);
aoqi@0 1270 ins_encode %{
aoqi@0 1271 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
aoqi@0 1272 %}
aoqi@0 1273 ins_pipe(pipe_slow);
aoqi@0 1274 %}
aoqi@0 1275
aoqi@0 1276 instruct mulF_reg(regF dst, regF src) %{
aoqi@0 1277 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1278 match(Set dst (MulF dst src));
aoqi@0 1279
aoqi@0 1280 format %{ "mulss $dst, $src" %}
aoqi@0 1281 ins_cost(150);
aoqi@0 1282 ins_encode %{
aoqi@0 1283 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 1284 %}
aoqi@0 1285 ins_pipe(pipe_slow);
aoqi@0 1286 %}
aoqi@0 1287
aoqi@0 1288 instruct mulF_mem(regF dst, memory src) %{
aoqi@0 1289 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1290 match(Set dst (MulF dst (LoadF src)));
aoqi@0 1291
aoqi@0 1292 format %{ "mulss $dst, $src" %}
aoqi@0 1293 ins_cost(150);
aoqi@0 1294 ins_encode %{
aoqi@0 1295 __ mulss($dst$$XMMRegister, $src$$Address);
aoqi@0 1296 %}
aoqi@0 1297 ins_pipe(pipe_slow);
aoqi@0 1298 %}
aoqi@0 1299
aoqi@0 1300 instruct mulF_imm(regF dst, immF con) %{
aoqi@0 1301 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1302 match(Set dst (MulF dst con));
aoqi@0 1303 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
aoqi@0 1304 ins_cost(150);
aoqi@0 1305 ins_encode %{
aoqi@0 1306 __ mulss($dst$$XMMRegister, $constantaddress($con));
aoqi@0 1307 %}
aoqi@0 1308 ins_pipe(pipe_slow);
aoqi@0 1309 %}
aoqi@0 1310
aoqi@0 1311 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
aoqi@0 1312 predicate(UseAVX > 0);
aoqi@0 1313 match(Set dst (MulF src1 src2));
aoqi@0 1314
aoqi@0 1315 format %{ "vmulss $dst, $src1, $src2" %}
aoqi@0 1316 ins_cost(150);
aoqi@0 1317 ins_encode %{
aoqi@0 1318 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
aoqi@0 1319 %}
aoqi@0 1320 ins_pipe(pipe_slow);
aoqi@0 1321 %}
aoqi@0 1322
aoqi@0 1323 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
aoqi@0 1324 predicate(UseAVX > 0);
aoqi@0 1325 match(Set dst (MulF src1 (LoadF src2)));
aoqi@0 1326
aoqi@0 1327 format %{ "vmulss $dst, $src1, $src2" %}
aoqi@0 1328 ins_cost(150);
aoqi@0 1329 ins_encode %{
aoqi@0 1330 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
aoqi@0 1331 %}
aoqi@0 1332 ins_pipe(pipe_slow);
aoqi@0 1333 %}
aoqi@0 1334
aoqi@0 1335 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
aoqi@0 1336 predicate(UseAVX > 0);
aoqi@0 1337 match(Set dst (MulF src con));
aoqi@0 1338
aoqi@0 1339 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
aoqi@0 1340 ins_cost(150);
aoqi@0 1341 ins_encode %{
aoqi@0 1342 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
aoqi@0 1343 %}
aoqi@0 1344 ins_pipe(pipe_slow);
aoqi@0 1345 %}
aoqi@0 1346
aoqi@0 1347 instruct mulD_reg(regD dst, regD src) %{
aoqi@0 1348 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1349 match(Set dst (MulD dst src));
aoqi@0 1350
aoqi@0 1351 format %{ "mulsd $dst, $src" %}
aoqi@0 1352 ins_cost(150);
aoqi@0 1353 ins_encode %{
aoqi@0 1354 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 1355 %}
aoqi@0 1356 ins_pipe(pipe_slow);
aoqi@0 1357 %}
aoqi@0 1358
aoqi@0 1359 instruct mulD_mem(regD dst, memory src) %{
aoqi@0 1360 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1361 match(Set dst (MulD dst (LoadD src)));
aoqi@0 1362
aoqi@0 1363 format %{ "mulsd $dst, $src" %}
aoqi@0 1364 ins_cost(150);
aoqi@0 1365 ins_encode %{
aoqi@0 1366 __ mulsd($dst$$XMMRegister, $src$$Address);
aoqi@0 1367 %}
aoqi@0 1368 ins_pipe(pipe_slow);
aoqi@0 1369 %}
aoqi@0 1370
aoqi@0 1371 instruct mulD_imm(regD dst, immD con) %{
aoqi@0 1372 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1373 match(Set dst (MulD dst con));
aoqi@0 1374 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
aoqi@0 1375 ins_cost(150);
aoqi@0 1376 ins_encode %{
aoqi@0 1377 __ mulsd($dst$$XMMRegister, $constantaddress($con));
aoqi@0 1378 %}
aoqi@0 1379 ins_pipe(pipe_slow);
aoqi@0 1380 %}
aoqi@0 1381
aoqi@0 1382 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
aoqi@0 1383 predicate(UseAVX > 0);
aoqi@0 1384 match(Set dst (MulD src1 src2));
aoqi@0 1385
aoqi@0 1386 format %{ "vmulsd $dst, $src1, $src2" %}
aoqi@0 1387 ins_cost(150);
aoqi@0 1388 ins_encode %{
aoqi@0 1389 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
aoqi@0 1390 %}
aoqi@0 1391 ins_pipe(pipe_slow);
aoqi@0 1392 %}
aoqi@0 1393
aoqi@0 1394 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
aoqi@0 1395 predicate(UseAVX > 0);
aoqi@0 1396 match(Set dst (MulD src1 (LoadD src2)));
aoqi@0 1397
aoqi@0 1398 format %{ "vmulsd $dst, $src1, $src2" %}
aoqi@0 1399 ins_cost(150);
aoqi@0 1400 ins_encode %{
aoqi@0 1401 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
aoqi@0 1402 %}
aoqi@0 1403 ins_pipe(pipe_slow);
aoqi@0 1404 %}
aoqi@0 1405
aoqi@0 1406 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
aoqi@0 1407 predicate(UseAVX > 0);
aoqi@0 1408 match(Set dst (MulD src con));
aoqi@0 1409
aoqi@0 1410 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
aoqi@0 1411 ins_cost(150);
aoqi@0 1412 ins_encode %{
aoqi@0 1413 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
aoqi@0 1414 %}
aoqi@0 1415 ins_pipe(pipe_slow);
aoqi@0 1416 %}
aoqi@0 1417
aoqi@0 1418 instruct divF_reg(regF dst, regF src) %{
aoqi@0 1419 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1420 match(Set dst (DivF dst src));
aoqi@0 1421
aoqi@0 1422 format %{ "divss $dst, $src" %}
aoqi@0 1423 ins_cost(150);
aoqi@0 1424 ins_encode %{
aoqi@0 1425 __ divss($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 1426 %}
aoqi@0 1427 ins_pipe(pipe_slow);
aoqi@0 1428 %}
aoqi@0 1429
aoqi@0 1430 instruct divF_mem(regF dst, memory src) %{
aoqi@0 1431 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1432 match(Set dst (DivF dst (LoadF src)));
aoqi@0 1433
aoqi@0 1434 format %{ "divss $dst, $src" %}
aoqi@0 1435 ins_cost(150);
aoqi@0 1436 ins_encode %{
aoqi@0 1437 __ divss($dst$$XMMRegister, $src$$Address);
aoqi@0 1438 %}
aoqi@0 1439 ins_pipe(pipe_slow);
aoqi@0 1440 %}
aoqi@0 1441
aoqi@0 1442 instruct divF_imm(regF dst, immF con) %{
aoqi@0 1443 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1444 match(Set dst (DivF dst con));
aoqi@0 1445 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
aoqi@0 1446 ins_cost(150);
aoqi@0 1447 ins_encode %{
aoqi@0 1448 __ divss($dst$$XMMRegister, $constantaddress($con));
aoqi@0 1449 %}
aoqi@0 1450 ins_pipe(pipe_slow);
aoqi@0 1451 %}
aoqi@0 1452
aoqi@0 1453 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
aoqi@0 1454 predicate(UseAVX > 0);
aoqi@0 1455 match(Set dst (DivF src1 src2));
aoqi@0 1456
aoqi@0 1457 format %{ "vdivss $dst, $src1, $src2" %}
aoqi@0 1458 ins_cost(150);
aoqi@0 1459 ins_encode %{
aoqi@0 1460 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
aoqi@0 1461 %}
aoqi@0 1462 ins_pipe(pipe_slow);
aoqi@0 1463 %}
aoqi@0 1464
aoqi@0 1465 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
aoqi@0 1466 predicate(UseAVX > 0);
aoqi@0 1467 match(Set dst (DivF src1 (LoadF src2)));
aoqi@0 1468
aoqi@0 1469 format %{ "vdivss $dst, $src1, $src2" %}
aoqi@0 1470 ins_cost(150);
aoqi@0 1471 ins_encode %{
aoqi@0 1472 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
aoqi@0 1473 %}
aoqi@0 1474 ins_pipe(pipe_slow);
aoqi@0 1475 %}
aoqi@0 1476
aoqi@0 1477 instruct divF_reg_imm(regF dst, regF src, immF con) %{
aoqi@0 1478 predicate(UseAVX > 0);
aoqi@0 1479 match(Set dst (DivF src con));
aoqi@0 1480
aoqi@0 1481 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
aoqi@0 1482 ins_cost(150);
aoqi@0 1483 ins_encode %{
aoqi@0 1484 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
aoqi@0 1485 %}
aoqi@0 1486 ins_pipe(pipe_slow);
aoqi@0 1487 %}
aoqi@0 1488
aoqi@0 1489 instruct divD_reg(regD dst, regD src) %{
aoqi@0 1490 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1491 match(Set dst (DivD dst src));
aoqi@0 1492
aoqi@0 1493 format %{ "divsd $dst, $src" %}
aoqi@0 1494 ins_cost(150);
aoqi@0 1495 ins_encode %{
aoqi@0 1496 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 1497 %}
aoqi@0 1498 ins_pipe(pipe_slow);
aoqi@0 1499 %}
aoqi@0 1500
aoqi@0 1501 instruct divD_mem(regD dst, memory src) %{
aoqi@0 1502 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1503 match(Set dst (DivD dst (LoadD src)));
aoqi@0 1504
aoqi@0 1505 format %{ "divsd $dst, $src" %}
aoqi@0 1506 ins_cost(150);
aoqi@0 1507 ins_encode %{
aoqi@0 1508 __ divsd($dst$$XMMRegister, $src$$Address);
aoqi@0 1509 %}
aoqi@0 1510 ins_pipe(pipe_slow);
aoqi@0 1511 %}
aoqi@0 1512
aoqi@0 1513 instruct divD_imm(regD dst, immD con) %{
aoqi@0 1514 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1515 match(Set dst (DivD dst con));
aoqi@0 1516 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
aoqi@0 1517 ins_cost(150);
aoqi@0 1518 ins_encode %{
aoqi@0 1519 __ divsd($dst$$XMMRegister, $constantaddress($con));
aoqi@0 1520 %}
aoqi@0 1521 ins_pipe(pipe_slow);
aoqi@0 1522 %}
aoqi@0 1523
aoqi@0 1524 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
aoqi@0 1525 predicate(UseAVX > 0);
aoqi@0 1526 match(Set dst (DivD src1 src2));
aoqi@0 1527
aoqi@0 1528 format %{ "vdivsd $dst, $src1, $src2" %}
aoqi@0 1529 ins_cost(150);
aoqi@0 1530 ins_encode %{
aoqi@0 1531 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
aoqi@0 1532 %}
aoqi@0 1533 ins_pipe(pipe_slow);
aoqi@0 1534 %}
aoqi@0 1535
aoqi@0 1536 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
aoqi@0 1537 predicate(UseAVX > 0);
aoqi@0 1538 match(Set dst (DivD src1 (LoadD src2)));
aoqi@0 1539
aoqi@0 1540 format %{ "vdivsd $dst, $src1, $src2" %}
aoqi@0 1541 ins_cost(150);
aoqi@0 1542 ins_encode %{
aoqi@0 1543 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
aoqi@0 1544 %}
aoqi@0 1545 ins_pipe(pipe_slow);
aoqi@0 1546 %}
aoqi@0 1547
aoqi@0 1548 instruct divD_reg_imm(regD dst, regD src, immD con) %{
aoqi@0 1549 predicate(UseAVX > 0);
aoqi@0 1550 match(Set dst (DivD src con));
aoqi@0 1551
aoqi@0 1552 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
aoqi@0 1553 ins_cost(150);
aoqi@0 1554 ins_encode %{
aoqi@0 1555 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
aoqi@0 1556 %}
aoqi@0 1557 ins_pipe(pipe_slow);
aoqi@0 1558 %}
aoqi@0 1559
aoqi@0 1560 instruct absF_reg(regF dst) %{
aoqi@0 1561 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1562 match(Set dst (AbsF dst));
aoqi@0 1563 ins_cost(150);
aoqi@0 1564 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
aoqi@0 1565 ins_encode %{
aoqi@0 1566 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
aoqi@0 1567 %}
aoqi@0 1568 ins_pipe(pipe_slow);
aoqi@0 1569 %}
aoqi@0 1570
aoqi@0 1571 instruct absF_reg_reg(regF dst, regF src) %{
aoqi@0 1572 predicate(UseAVX > 0);
aoqi@0 1573 match(Set dst (AbsF src));
aoqi@0 1574 ins_cost(150);
aoqi@0 1575 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
aoqi@0 1576 ins_encode %{
aoqi@0 1577 bool vector256 = false;
aoqi@0 1578 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
aoqi@0 1579 ExternalAddress(float_signmask()), vector256);
aoqi@0 1580 %}
aoqi@0 1581 ins_pipe(pipe_slow);
aoqi@0 1582 %}
aoqi@0 1583
aoqi@0 1584 instruct absD_reg(regD dst) %{
aoqi@0 1585 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1586 match(Set dst (AbsD dst));
aoqi@0 1587 ins_cost(150);
aoqi@0 1588 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
aoqi@0 1589 "# abs double by sign masking" %}
aoqi@0 1590 ins_encode %{
aoqi@0 1591 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
aoqi@0 1592 %}
aoqi@0 1593 ins_pipe(pipe_slow);
aoqi@0 1594 %}
aoqi@0 1595
aoqi@0 1596 instruct absD_reg_reg(regD dst, regD src) %{
aoqi@0 1597 predicate(UseAVX > 0);
aoqi@0 1598 match(Set dst (AbsD src));
aoqi@0 1599 ins_cost(150);
aoqi@0 1600 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
aoqi@0 1601 "# abs double by sign masking" %}
aoqi@0 1602 ins_encode %{
aoqi@0 1603 bool vector256 = false;
aoqi@0 1604 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
aoqi@0 1605 ExternalAddress(double_signmask()), vector256);
aoqi@0 1606 %}
aoqi@0 1607 ins_pipe(pipe_slow);
aoqi@0 1608 %}
aoqi@0 1609
aoqi@0 1610 instruct negF_reg(regF dst) %{
aoqi@0 1611 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1612 match(Set dst (NegF dst));
aoqi@0 1613 ins_cost(150);
aoqi@0 1614 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
aoqi@0 1615 ins_encode %{
aoqi@0 1616 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
aoqi@0 1617 %}
aoqi@0 1618 ins_pipe(pipe_slow);
aoqi@0 1619 %}
aoqi@0 1620
aoqi@0 1621 instruct negF_reg_reg(regF dst, regF src) %{
aoqi@0 1622 predicate(UseAVX > 0);
aoqi@0 1623 match(Set dst (NegF src));
aoqi@0 1624 ins_cost(150);
aoqi@0 1625 format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
aoqi@0 1626 ins_encode %{
aoqi@0 1627 bool vector256 = false;
aoqi@0 1628 __ vxorps($dst$$XMMRegister, $src$$XMMRegister,
aoqi@0 1629 ExternalAddress(float_signflip()), vector256);
aoqi@0 1630 %}
aoqi@0 1631 ins_pipe(pipe_slow);
aoqi@0 1632 %}
aoqi@0 1633
aoqi@0 1634 instruct negD_reg(regD dst) %{
aoqi@0 1635 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1636 match(Set dst (NegD dst));
aoqi@0 1637 ins_cost(150);
aoqi@0 1638 format %{ "xorpd $dst, [0x8000000000000000]\t"
aoqi@0 1639 "# neg double by sign flipping" %}
aoqi@0 1640 ins_encode %{
aoqi@0 1641 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
aoqi@0 1642 %}
aoqi@0 1643 ins_pipe(pipe_slow);
aoqi@0 1644 %}
aoqi@0 1645
aoqi@0 1646 instruct negD_reg_reg(regD dst, regD src) %{
aoqi@0 1647 predicate(UseAVX > 0);
aoqi@0 1648 match(Set dst (NegD src));
aoqi@0 1649 ins_cost(150);
aoqi@0 1650 format %{ "vxorpd $dst, $src, [0x8000000000000000]\t"
aoqi@0 1651 "# neg double by sign flipping" %}
aoqi@0 1652 ins_encode %{
aoqi@0 1653 bool vector256 = false;
aoqi@0 1654 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister,
aoqi@0 1655 ExternalAddress(double_signflip()), vector256);
aoqi@0 1656 %}
aoqi@0 1657 ins_pipe(pipe_slow);
aoqi@0 1658 %}
aoqi@0 1659
aoqi@0 1660 instruct sqrtF_reg(regF dst, regF src) %{
aoqi@0 1661 predicate(UseSSE>=1);
aoqi@0 1662 match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
aoqi@0 1663
aoqi@0 1664 format %{ "sqrtss $dst, $src" %}
aoqi@0 1665 ins_cost(150);
aoqi@0 1666 ins_encode %{
aoqi@0 1667 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 1668 %}
aoqi@0 1669 ins_pipe(pipe_slow);
aoqi@0 1670 %}
aoqi@0 1671
aoqi@0 1672 instruct sqrtF_mem(regF dst, memory src) %{
aoqi@0 1673 predicate(UseSSE>=1);
aoqi@0 1674 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
aoqi@0 1675
aoqi@0 1676 format %{ "sqrtss $dst, $src" %}
aoqi@0 1677 ins_cost(150);
aoqi@0 1678 ins_encode %{
aoqi@0 1679 __ sqrtss($dst$$XMMRegister, $src$$Address);
aoqi@0 1680 %}
aoqi@0 1681 ins_pipe(pipe_slow);
aoqi@0 1682 %}
aoqi@0 1683
aoqi@0 1684 instruct sqrtF_imm(regF dst, immF con) %{
aoqi@0 1685 predicate(UseSSE>=1);
aoqi@0 1686 match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
aoqi@0 1687 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
aoqi@0 1688 ins_cost(150);
aoqi@0 1689 ins_encode %{
aoqi@0 1690 __ sqrtss($dst$$XMMRegister, $constantaddress($con));
aoqi@0 1691 %}
aoqi@0 1692 ins_pipe(pipe_slow);
aoqi@0 1693 %}
aoqi@0 1694
aoqi@0 1695 instruct sqrtD_reg(regD dst, regD src) %{
aoqi@0 1696 predicate(UseSSE>=2);
aoqi@0 1697 match(Set dst (SqrtD src));
aoqi@0 1698
aoqi@0 1699 format %{ "sqrtsd $dst, $src" %}
aoqi@0 1700 ins_cost(150);
aoqi@0 1701 ins_encode %{
aoqi@0 1702 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 1703 %}
aoqi@0 1704 ins_pipe(pipe_slow);
aoqi@0 1705 %}
aoqi@0 1706
aoqi@0 1707 instruct sqrtD_mem(regD dst, memory src) %{
aoqi@0 1708 predicate(UseSSE>=2);
aoqi@0 1709 match(Set dst (SqrtD (LoadD src)));
aoqi@0 1710
aoqi@0 1711 format %{ "sqrtsd $dst, $src" %}
aoqi@0 1712 ins_cost(150);
aoqi@0 1713 ins_encode %{
aoqi@0 1714 __ sqrtsd($dst$$XMMRegister, $src$$Address);
aoqi@0 1715 %}
aoqi@0 1716 ins_pipe(pipe_slow);
aoqi@0 1717 %}
aoqi@0 1718
aoqi@0 1719 instruct sqrtD_imm(regD dst, immD con) %{
aoqi@0 1720 predicate(UseSSE>=2);
aoqi@0 1721 match(Set dst (SqrtD con));
aoqi@0 1722 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
aoqi@0 1723 ins_cost(150);
aoqi@0 1724 ins_encode %{
aoqi@0 1725 __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
aoqi@0 1726 %}
aoqi@0 1727 ins_pipe(pipe_slow);
aoqi@0 1728 %}
aoqi@0 1729
aoqi@0 1730
aoqi@0 1731 // ====================VECTOR INSTRUCTIONS=====================================
aoqi@0 1732
aoqi@0 1733 // Load vectors (4 bytes long)
aoqi@0 1734 instruct loadV4(vecS dst, memory mem) %{
aoqi@0 1735 predicate(n->as_LoadVector()->memory_size() == 4);
aoqi@0 1736 match(Set dst (LoadVector mem));
aoqi@0 1737 ins_cost(125);
aoqi@0 1738 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %}
aoqi@0 1739 ins_encode %{
aoqi@0 1740 __ movdl($dst$$XMMRegister, $mem$$Address);
aoqi@0 1741 %}
aoqi@0 1742 ins_pipe( pipe_slow );
aoqi@0 1743 %}
aoqi@0 1744
aoqi@0 1745 // Load vectors (8 bytes long)
aoqi@0 1746 instruct loadV8(vecD dst, memory mem) %{
aoqi@0 1747 predicate(n->as_LoadVector()->memory_size() == 8);
aoqi@0 1748 match(Set dst (LoadVector mem));
aoqi@0 1749 ins_cost(125);
aoqi@0 1750 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %}
aoqi@0 1751 ins_encode %{
aoqi@0 1752 __ movq($dst$$XMMRegister, $mem$$Address);
aoqi@0 1753 %}
aoqi@0 1754 ins_pipe( pipe_slow );
aoqi@0 1755 %}
aoqi@0 1756
aoqi@0 1757 // Load vectors (16 bytes long)
aoqi@0 1758 instruct loadV16(vecX dst, memory mem) %{
aoqi@0 1759 predicate(n->as_LoadVector()->memory_size() == 16);
aoqi@0 1760 match(Set dst (LoadVector mem));
aoqi@0 1761 ins_cost(125);
aoqi@0 1762 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %}
aoqi@0 1763 ins_encode %{
aoqi@0 1764 __ movdqu($dst$$XMMRegister, $mem$$Address);
aoqi@0 1765 %}
aoqi@0 1766 ins_pipe( pipe_slow );
aoqi@0 1767 %}
aoqi@0 1768
aoqi@0 1769 // Load vectors (32 bytes long)
aoqi@0 1770 instruct loadV32(vecY dst, memory mem) %{
aoqi@0 1771 predicate(n->as_LoadVector()->memory_size() == 32);
aoqi@0 1772 match(Set dst (LoadVector mem));
aoqi@0 1773 ins_cost(125);
aoqi@0 1774 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %}
aoqi@0 1775 ins_encode %{
aoqi@0 1776 __ vmovdqu($dst$$XMMRegister, $mem$$Address);
aoqi@0 1777 %}
aoqi@0 1778 ins_pipe( pipe_slow );
aoqi@0 1779 %}
aoqi@0 1780
aoqi@0 1781 // Store vectors
aoqi@0 1782 instruct storeV4(memory mem, vecS src) %{
aoqi@0 1783 predicate(n->as_StoreVector()->memory_size() == 4);
aoqi@0 1784 match(Set mem (StoreVector mem src));
aoqi@0 1785 ins_cost(145);
aoqi@0 1786 format %{ "movd $mem,$src\t! store vector (4 bytes)" %}
aoqi@0 1787 ins_encode %{
aoqi@0 1788 __ movdl($mem$$Address, $src$$XMMRegister);
aoqi@0 1789 %}
aoqi@0 1790 ins_pipe( pipe_slow );
aoqi@0 1791 %}
aoqi@0 1792
aoqi@0 1793 instruct storeV8(memory mem, vecD src) %{
aoqi@0 1794 predicate(n->as_StoreVector()->memory_size() == 8);
aoqi@0 1795 match(Set mem (StoreVector mem src));
aoqi@0 1796 ins_cost(145);
aoqi@0 1797 format %{ "movq $mem,$src\t! store vector (8 bytes)" %}
aoqi@0 1798 ins_encode %{
aoqi@0 1799 __ movq($mem$$Address, $src$$XMMRegister);
aoqi@0 1800 %}
aoqi@0 1801 ins_pipe( pipe_slow );
aoqi@0 1802 %}
aoqi@0 1803
aoqi@0 1804 instruct storeV16(memory mem, vecX src) %{
aoqi@0 1805 predicate(n->as_StoreVector()->memory_size() == 16);
aoqi@0 1806 match(Set mem (StoreVector mem src));
aoqi@0 1807 ins_cost(145);
aoqi@0 1808 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %}
aoqi@0 1809 ins_encode %{
aoqi@0 1810 __ movdqu($mem$$Address, $src$$XMMRegister);
aoqi@0 1811 %}
aoqi@0 1812 ins_pipe( pipe_slow );
aoqi@0 1813 %}
aoqi@0 1814
aoqi@0 1815 instruct storeV32(memory mem, vecY src) %{
aoqi@0 1816 predicate(n->as_StoreVector()->memory_size() == 32);
aoqi@0 1817 match(Set mem (StoreVector mem src));
aoqi@0 1818 ins_cost(145);
aoqi@0 1819 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %}
aoqi@0 1820 ins_encode %{
aoqi@0 1821 __ vmovdqu($mem$$Address, $src$$XMMRegister);
aoqi@0 1822 %}
aoqi@0 1823 ins_pipe( pipe_slow );
aoqi@0 1824 %}
aoqi@0 1825
aoqi@0 1826 // Replicate byte scalar to be vector
aoqi@0 1827 instruct Repl4B(vecS dst, rRegI src) %{
aoqi@0 1828 predicate(n->as_Vector()->length() == 4);
aoqi@0 1829 match(Set dst (ReplicateB src));
aoqi@0 1830 format %{ "movd $dst,$src\n\t"
aoqi@0 1831 "punpcklbw $dst,$dst\n\t"
aoqi@0 1832 "pshuflw $dst,$dst,0x00\t! replicate4B" %}
aoqi@0 1833 ins_encode %{
aoqi@0 1834 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 1835 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1836 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 1837 %}
aoqi@0 1838 ins_pipe( pipe_slow );
aoqi@0 1839 %}
aoqi@0 1840
aoqi@0 1841 instruct Repl8B(vecD dst, rRegI src) %{
aoqi@0 1842 predicate(n->as_Vector()->length() == 8);
aoqi@0 1843 match(Set dst (ReplicateB src));
aoqi@0 1844 format %{ "movd $dst,$src\n\t"
aoqi@0 1845 "punpcklbw $dst,$dst\n\t"
aoqi@0 1846 "pshuflw $dst,$dst,0x00\t! replicate8B" %}
aoqi@0 1847 ins_encode %{
aoqi@0 1848 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 1849 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1850 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 1851 %}
aoqi@0 1852 ins_pipe( pipe_slow );
aoqi@0 1853 %}
aoqi@0 1854
aoqi@0 1855 instruct Repl16B(vecX dst, rRegI src) %{
aoqi@0 1856 predicate(n->as_Vector()->length() == 16);
aoqi@0 1857 match(Set dst (ReplicateB src));
aoqi@0 1858 format %{ "movd $dst,$src\n\t"
aoqi@0 1859 "punpcklbw $dst,$dst\n\t"
aoqi@0 1860 "pshuflw $dst,$dst,0x00\n\t"
aoqi@0 1861 "punpcklqdq $dst,$dst\t! replicate16B" %}
aoqi@0 1862 ins_encode %{
aoqi@0 1863 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 1864 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1865 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 1866 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1867 %}
aoqi@0 1868 ins_pipe( pipe_slow );
aoqi@0 1869 %}
aoqi@0 1870
aoqi@0 1871 instruct Repl32B(vecY dst, rRegI src) %{
aoqi@0 1872 predicate(n->as_Vector()->length() == 32);
aoqi@0 1873 match(Set dst (ReplicateB src));
aoqi@0 1874 format %{ "movd $dst,$src\n\t"
aoqi@0 1875 "punpcklbw $dst,$dst\n\t"
aoqi@0 1876 "pshuflw $dst,$dst,0x00\n\t"
aoqi@0 1877 "punpcklqdq $dst,$dst\n\t"
aoqi@0 1878 "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
aoqi@0 1879 ins_encode %{
aoqi@0 1880 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 1881 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1882 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 1883 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1884 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1885 %}
aoqi@0 1886 ins_pipe( pipe_slow );
aoqi@0 1887 %}
aoqi@0 1888
aoqi@0 1889 // Replicate byte scalar immediate to be vector by loading from const table.
aoqi@0 1890 instruct Repl4B_imm(vecS dst, immI con) %{
aoqi@0 1891 predicate(n->as_Vector()->length() == 4);
aoqi@0 1892 match(Set dst (ReplicateB con));
aoqi@0 1893 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %}
aoqi@0 1894 ins_encode %{
aoqi@0 1895 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1)));
aoqi@0 1896 %}
aoqi@0 1897 ins_pipe( pipe_slow );
aoqi@0 1898 %}
aoqi@0 1899
aoqi@0 1900 instruct Repl8B_imm(vecD dst, immI con) %{
aoqi@0 1901 predicate(n->as_Vector()->length() == 8);
aoqi@0 1902 match(Set dst (ReplicateB con));
aoqi@0 1903 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %}
aoqi@0 1904 ins_encode %{
aoqi@0 1905 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
aoqi@0 1906 %}
aoqi@0 1907 ins_pipe( pipe_slow );
aoqi@0 1908 %}
aoqi@0 1909
aoqi@0 1910 instruct Repl16B_imm(vecX dst, immI con) %{
aoqi@0 1911 predicate(n->as_Vector()->length() == 16);
aoqi@0 1912 match(Set dst (ReplicateB con));
aoqi@0 1913 format %{ "movq $dst,[$constantaddress]\n\t"
aoqi@0 1914 "punpcklqdq $dst,$dst\t! replicate16B($con)" %}
aoqi@0 1915 ins_encode %{
aoqi@0 1916 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
aoqi@0 1917 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1918 %}
aoqi@0 1919 ins_pipe( pipe_slow );
aoqi@0 1920 %}
aoqi@0 1921
aoqi@0 1922 instruct Repl32B_imm(vecY dst, immI con) %{
aoqi@0 1923 predicate(n->as_Vector()->length() == 32);
aoqi@0 1924 match(Set dst (ReplicateB con));
aoqi@0 1925 format %{ "movq $dst,[$constantaddress]\n\t"
aoqi@0 1926 "punpcklqdq $dst,$dst\n\t"
aoqi@0 1927 "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %}
aoqi@0 1928 ins_encode %{
aoqi@0 1929 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
aoqi@0 1930 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1931 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1932 %}
aoqi@0 1933 ins_pipe( pipe_slow );
aoqi@0 1934 %}
aoqi@0 1935
aoqi@0 1936 // Replicate byte scalar zero to be vector
aoqi@0 1937 instruct Repl4B_zero(vecS dst, immI0 zero) %{
aoqi@0 1938 predicate(n->as_Vector()->length() == 4);
aoqi@0 1939 match(Set dst (ReplicateB zero));
aoqi@0 1940 format %{ "pxor $dst,$dst\t! replicate4B zero" %}
aoqi@0 1941 ins_encode %{
aoqi@0 1942 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1943 %}
aoqi@0 1944 ins_pipe( fpu_reg_reg );
aoqi@0 1945 %}
aoqi@0 1946
aoqi@0 1947 instruct Repl8B_zero(vecD dst, immI0 zero) %{
aoqi@0 1948 predicate(n->as_Vector()->length() == 8);
aoqi@0 1949 match(Set dst (ReplicateB zero));
aoqi@0 1950 format %{ "pxor $dst,$dst\t! replicate8B zero" %}
aoqi@0 1951 ins_encode %{
aoqi@0 1952 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1953 %}
aoqi@0 1954 ins_pipe( fpu_reg_reg );
aoqi@0 1955 %}
aoqi@0 1956
aoqi@0 1957 instruct Repl16B_zero(vecX dst, immI0 zero) %{
aoqi@0 1958 predicate(n->as_Vector()->length() == 16);
aoqi@0 1959 match(Set dst (ReplicateB zero));
aoqi@0 1960 format %{ "pxor $dst,$dst\t! replicate16B zero" %}
aoqi@0 1961 ins_encode %{
aoqi@0 1962 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1963 %}
aoqi@0 1964 ins_pipe( fpu_reg_reg );
aoqi@0 1965 %}
aoqi@0 1966
aoqi@0 1967 instruct Repl32B_zero(vecY dst, immI0 zero) %{
aoqi@0 1968 predicate(n->as_Vector()->length() == 32);
aoqi@0 1969 match(Set dst (ReplicateB zero));
aoqi@0 1970 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %}
aoqi@0 1971 ins_encode %{
aoqi@0 1972 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
aoqi@0 1973 bool vector256 = true;
aoqi@0 1974 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
aoqi@0 1975 %}
aoqi@0 1976 ins_pipe( fpu_reg_reg );
aoqi@0 1977 %}
aoqi@0 1978
aoqi@0 1979 // Replicate char/short (2 byte) scalar to be vector
aoqi@0 1980 instruct Repl2S(vecS dst, rRegI src) %{
aoqi@0 1981 predicate(n->as_Vector()->length() == 2);
aoqi@0 1982 match(Set dst (ReplicateS src));
aoqi@0 1983 format %{ "movd $dst,$src\n\t"
aoqi@0 1984 "pshuflw $dst,$dst,0x00\t! replicate2S" %}
aoqi@0 1985 ins_encode %{
aoqi@0 1986 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 1987 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 1988 %}
aoqi@0 1989 ins_pipe( fpu_reg_reg );
aoqi@0 1990 %}
aoqi@0 1991
aoqi@0 1992 instruct Repl4S(vecD dst, rRegI src) %{
aoqi@0 1993 predicate(n->as_Vector()->length() == 4);
aoqi@0 1994 match(Set dst (ReplicateS src));
aoqi@0 1995 format %{ "movd $dst,$src\n\t"
aoqi@0 1996 "pshuflw $dst,$dst,0x00\t! replicate4S" %}
aoqi@0 1997 ins_encode %{
aoqi@0 1998 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 1999 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 2000 %}
aoqi@0 2001 ins_pipe( fpu_reg_reg );
aoqi@0 2002 %}
aoqi@0 2003
aoqi@0 2004 instruct Repl8S(vecX dst, rRegI src) %{
aoqi@0 2005 predicate(n->as_Vector()->length() == 8);
aoqi@0 2006 match(Set dst (ReplicateS src));
aoqi@0 2007 format %{ "movd $dst,$src\n\t"
aoqi@0 2008 "pshuflw $dst,$dst,0x00\n\t"
aoqi@0 2009 "punpcklqdq $dst,$dst\t! replicate8S" %}
aoqi@0 2010 ins_encode %{
aoqi@0 2011 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 2012 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 2013 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2014 %}
aoqi@0 2015 ins_pipe( pipe_slow );
aoqi@0 2016 %}
aoqi@0 2017
aoqi@0 2018 instruct Repl16S(vecY dst, rRegI src) %{
aoqi@0 2019 predicate(n->as_Vector()->length() == 16);
aoqi@0 2020 match(Set dst (ReplicateS src));
aoqi@0 2021 format %{ "movd $dst,$src\n\t"
aoqi@0 2022 "pshuflw $dst,$dst,0x00\n\t"
aoqi@0 2023 "punpcklqdq $dst,$dst\n\t"
aoqi@0 2024 "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
aoqi@0 2025 ins_encode %{
aoqi@0 2026 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 2027 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 2028 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2029 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2030 %}
aoqi@0 2031 ins_pipe( pipe_slow );
aoqi@0 2032 %}
aoqi@0 2033
aoqi@0 2034 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table.
aoqi@0 2035 instruct Repl2S_imm(vecS dst, immI con) %{
aoqi@0 2036 predicate(n->as_Vector()->length() == 2);
aoqi@0 2037 match(Set dst (ReplicateS con));
aoqi@0 2038 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %}
aoqi@0 2039 ins_encode %{
aoqi@0 2040 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2)));
aoqi@0 2041 %}
aoqi@0 2042 ins_pipe( fpu_reg_reg );
aoqi@0 2043 %}
aoqi@0 2044
aoqi@0 2045 instruct Repl4S_imm(vecD dst, immI con) %{
aoqi@0 2046 predicate(n->as_Vector()->length() == 4);
aoqi@0 2047 match(Set dst (ReplicateS con));
aoqi@0 2048 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %}
aoqi@0 2049 ins_encode %{
aoqi@0 2050 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
aoqi@0 2051 %}
aoqi@0 2052 ins_pipe( fpu_reg_reg );
aoqi@0 2053 %}
aoqi@0 2054
aoqi@0 2055 instruct Repl8S_imm(vecX dst, immI con) %{
aoqi@0 2056 predicate(n->as_Vector()->length() == 8);
aoqi@0 2057 match(Set dst (ReplicateS con));
aoqi@0 2058 format %{ "movq $dst,[$constantaddress]\n\t"
aoqi@0 2059 "punpcklqdq $dst,$dst\t! replicate8S($con)" %}
aoqi@0 2060 ins_encode %{
aoqi@0 2061 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
aoqi@0 2062 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2063 %}
aoqi@0 2064 ins_pipe( pipe_slow );
aoqi@0 2065 %}
aoqi@0 2066
aoqi@0 2067 instruct Repl16S_imm(vecY dst, immI con) %{
aoqi@0 2068 predicate(n->as_Vector()->length() == 16);
aoqi@0 2069 match(Set dst (ReplicateS con));
aoqi@0 2070 format %{ "movq $dst,[$constantaddress]\n\t"
aoqi@0 2071 "punpcklqdq $dst,$dst\n\t"
aoqi@0 2072 "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %}
aoqi@0 2073 ins_encode %{
aoqi@0 2074 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
aoqi@0 2075 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2076 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2077 %}
aoqi@0 2078 ins_pipe( pipe_slow );
aoqi@0 2079 %}
aoqi@0 2080
aoqi@0 2081 // Replicate char/short (2 byte) scalar zero to be vector
aoqi@0 2082 instruct Repl2S_zero(vecS dst, immI0 zero) %{
aoqi@0 2083 predicate(n->as_Vector()->length() == 2);
aoqi@0 2084 match(Set dst (ReplicateS zero));
aoqi@0 2085 format %{ "pxor $dst,$dst\t! replicate2S zero" %}
aoqi@0 2086 ins_encode %{
aoqi@0 2087 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2088 %}
aoqi@0 2089 ins_pipe( fpu_reg_reg );
aoqi@0 2090 %}
aoqi@0 2091
aoqi@0 2092 instruct Repl4S_zero(vecD dst, immI0 zero) %{
aoqi@0 2093 predicate(n->as_Vector()->length() == 4);
aoqi@0 2094 match(Set dst (ReplicateS zero));
aoqi@0 2095 format %{ "pxor $dst,$dst\t! replicate4S zero" %}
aoqi@0 2096 ins_encode %{
aoqi@0 2097 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2098 %}
aoqi@0 2099 ins_pipe( fpu_reg_reg );
aoqi@0 2100 %}
aoqi@0 2101
aoqi@0 2102 instruct Repl8S_zero(vecX dst, immI0 zero) %{
aoqi@0 2103 predicate(n->as_Vector()->length() == 8);
aoqi@0 2104 match(Set dst (ReplicateS zero));
aoqi@0 2105 format %{ "pxor $dst,$dst\t! replicate8S zero" %}
aoqi@0 2106 ins_encode %{
aoqi@0 2107 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2108 %}
aoqi@0 2109 ins_pipe( fpu_reg_reg );
aoqi@0 2110 %}
aoqi@0 2111
aoqi@0 2112 instruct Repl16S_zero(vecY dst, immI0 zero) %{
aoqi@0 2113 predicate(n->as_Vector()->length() == 16);
aoqi@0 2114 match(Set dst (ReplicateS zero));
aoqi@0 2115 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %}
aoqi@0 2116 ins_encode %{
aoqi@0 2117 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
aoqi@0 2118 bool vector256 = true;
aoqi@0 2119 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
aoqi@0 2120 %}
aoqi@0 2121 ins_pipe( fpu_reg_reg );
aoqi@0 2122 %}
aoqi@0 2123
aoqi@0 2124 // Replicate integer (4 byte) scalar to be vector
aoqi@0 2125 instruct Repl2I(vecD dst, rRegI src) %{
aoqi@0 2126 predicate(n->as_Vector()->length() == 2);
aoqi@0 2127 match(Set dst (ReplicateI src));
aoqi@0 2128 format %{ "movd $dst,$src\n\t"
aoqi@0 2129 "pshufd $dst,$dst,0x00\t! replicate2I" %}
aoqi@0 2130 ins_encode %{
aoqi@0 2131 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 2132 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 2133 %}
aoqi@0 2134 ins_pipe( fpu_reg_reg );
aoqi@0 2135 %}
aoqi@0 2136
aoqi@0 2137 instruct Repl4I(vecX dst, rRegI src) %{
aoqi@0 2138 predicate(n->as_Vector()->length() == 4);
aoqi@0 2139 match(Set dst (ReplicateI src));
aoqi@0 2140 format %{ "movd $dst,$src\n\t"
aoqi@0 2141 "pshufd $dst,$dst,0x00\t! replicate4I" %}
aoqi@0 2142 ins_encode %{
aoqi@0 2143 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 2144 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 2145 %}
aoqi@0 2146 ins_pipe( pipe_slow );
aoqi@0 2147 %}
aoqi@0 2148
aoqi@0 2149 instruct Repl8I(vecY dst, rRegI src) %{
aoqi@0 2150 predicate(n->as_Vector()->length() == 8);
aoqi@0 2151 match(Set dst (ReplicateI src));
aoqi@0 2152 format %{ "movd $dst,$src\n\t"
aoqi@0 2153 "pshufd $dst,$dst,0x00\n\t"
aoqi@0 2154 "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
aoqi@0 2155 ins_encode %{
aoqi@0 2156 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 2157 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 2158 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2159 %}
aoqi@0 2160 ins_pipe( pipe_slow );
aoqi@0 2161 %}
aoqi@0 2162
aoqi@0 2163 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
aoqi@0 2164 instruct Repl2I_imm(vecD dst, immI con) %{
aoqi@0 2165 predicate(n->as_Vector()->length() == 2);
aoqi@0 2166 match(Set dst (ReplicateI con));
aoqi@0 2167 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %}
aoqi@0 2168 ins_encode %{
aoqi@0 2169 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
aoqi@0 2170 %}
aoqi@0 2171 ins_pipe( fpu_reg_reg );
aoqi@0 2172 %}
aoqi@0 2173
aoqi@0 2174 instruct Repl4I_imm(vecX dst, immI con) %{
aoqi@0 2175 predicate(n->as_Vector()->length() == 4);
aoqi@0 2176 match(Set dst (ReplicateI con));
aoqi@0 2177 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t"
aoqi@0 2178 "punpcklqdq $dst,$dst" %}
aoqi@0 2179 ins_encode %{
aoqi@0 2180 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
aoqi@0 2181 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2182 %}
aoqi@0 2183 ins_pipe( pipe_slow );
aoqi@0 2184 %}
aoqi@0 2185
aoqi@0 2186 instruct Repl8I_imm(vecY dst, immI con) %{
aoqi@0 2187 predicate(n->as_Vector()->length() == 8);
aoqi@0 2188 match(Set dst (ReplicateI con));
aoqi@0 2189 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t"
aoqi@0 2190 "punpcklqdq $dst,$dst\n\t"
aoqi@0 2191 "vinserti128h $dst,$dst,$dst" %}
aoqi@0 2192 ins_encode %{
aoqi@0 2193 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
aoqi@0 2194 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2195 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2196 %}
aoqi@0 2197 ins_pipe( pipe_slow );
aoqi@0 2198 %}
aoqi@0 2199
aoqi@0 2200 // Integer could be loaded into xmm register directly from memory.
aoqi@0 2201 instruct Repl2I_mem(vecD dst, memory mem) %{
aoqi@0 2202 predicate(n->as_Vector()->length() == 2);
aoqi@0 2203 match(Set dst (ReplicateI (LoadI mem)));
aoqi@0 2204 format %{ "movd $dst,$mem\n\t"
aoqi@0 2205 "pshufd $dst,$dst,0x00\t! replicate2I" %}
aoqi@0 2206 ins_encode %{
aoqi@0 2207 __ movdl($dst$$XMMRegister, $mem$$Address);
aoqi@0 2208 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 2209 %}
aoqi@0 2210 ins_pipe( fpu_reg_reg );
aoqi@0 2211 %}
aoqi@0 2212
aoqi@0 2213 instruct Repl4I_mem(vecX dst, memory mem) %{
aoqi@0 2214 predicate(n->as_Vector()->length() == 4);
aoqi@0 2215 match(Set dst (ReplicateI (LoadI mem)));
aoqi@0 2216 format %{ "movd $dst,$mem\n\t"
aoqi@0 2217 "pshufd $dst,$dst,0x00\t! replicate4I" %}
aoqi@0 2218 ins_encode %{
aoqi@0 2219 __ movdl($dst$$XMMRegister, $mem$$Address);
aoqi@0 2220 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 2221 %}
aoqi@0 2222 ins_pipe( pipe_slow );
aoqi@0 2223 %}
aoqi@0 2224
aoqi@0 2225 instruct Repl8I_mem(vecY dst, memory mem) %{
aoqi@0 2226 predicate(n->as_Vector()->length() == 8);
aoqi@0 2227 match(Set dst (ReplicateI (LoadI mem)));
aoqi@0 2228 format %{ "movd $dst,$mem\n\t"
aoqi@0 2229 "pshufd $dst,$dst,0x00\n\t"
aoqi@0 2230 "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
aoqi@0 2231 ins_encode %{
aoqi@0 2232 __ movdl($dst$$XMMRegister, $mem$$Address);
aoqi@0 2233 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 2234 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2235 %}
aoqi@0 2236 ins_pipe( pipe_slow );
aoqi@0 2237 %}
aoqi@0 2238
aoqi@0 2239 // Replicate integer (4 byte) scalar zero to be vector
aoqi@0 2240 instruct Repl2I_zero(vecD dst, immI0 zero) %{
aoqi@0 2241 predicate(n->as_Vector()->length() == 2);
aoqi@0 2242 match(Set dst (ReplicateI zero));
aoqi@0 2243 format %{ "pxor $dst,$dst\t! replicate2I" %}
aoqi@0 2244 ins_encode %{
aoqi@0 2245 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2246 %}
aoqi@0 2247 ins_pipe( fpu_reg_reg );
aoqi@0 2248 %}
aoqi@0 2249
aoqi@0 2250 instruct Repl4I_zero(vecX dst, immI0 zero) %{
aoqi@0 2251 predicate(n->as_Vector()->length() == 4);
aoqi@0 2252 match(Set dst (ReplicateI zero));
aoqi@0 2253 format %{ "pxor $dst,$dst\t! replicate4I zero)" %}
aoqi@0 2254 ins_encode %{
aoqi@0 2255 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2256 %}
aoqi@0 2257 ins_pipe( fpu_reg_reg );
aoqi@0 2258 %}
aoqi@0 2259
aoqi@0 2260 instruct Repl8I_zero(vecY dst, immI0 zero) %{
aoqi@0 2261 predicate(n->as_Vector()->length() == 8);
aoqi@0 2262 match(Set dst (ReplicateI zero));
aoqi@0 2263 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %}
aoqi@0 2264 ins_encode %{
aoqi@0 2265 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
aoqi@0 2266 bool vector256 = true;
aoqi@0 2267 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
aoqi@0 2268 %}
aoqi@0 2269 ins_pipe( fpu_reg_reg );
aoqi@0 2270 %}
aoqi@0 2271
aoqi@0 2272 // Replicate long (8 byte) scalar to be vector
aoqi@0 2273 #ifdef _LP64
aoqi@0 2274 instruct Repl2L(vecX dst, rRegL src) %{
aoqi@0 2275 predicate(n->as_Vector()->length() == 2);
aoqi@0 2276 match(Set dst (ReplicateL src));
aoqi@0 2277 format %{ "movdq $dst,$src\n\t"
aoqi@0 2278 "punpcklqdq $dst,$dst\t! replicate2L" %}
aoqi@0 2279 ins_encode %{
aoqi@0 2280 __ movdq($dst$$XMMRegister, $src$$Register);
aoqi@0 2281 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2282 %}
aoqi@0 2283 ins_pipe( pipe_slow );
aoqi@0 2284 %}
aoqi@0 2285
aoqi@0 2286 instruct Repl4L(vecY dst, rRegL src) %{
aoqi@0 2287 predicate(n->as_Vector()->length() == 4);
aoqi@0 2288 match(Set dst (ReplicateL src));
aoqi@0 2289 format %{ "movdq $dst,$src\n\t"
aoqi@0 2290 "punpcklqdq $dst,$dst\n\t"
aoqi@0 2291 "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
aoqi@0 2292 ins_encode %{
aoqi@0 2293 __ movdq($dst$$XMMRegister, $src$$Register);
aoqi@0 2294 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2295 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2296 %}
aoqi@0 2297 ins_pipe( pipe_slow );
aoqi@0 2298 %}
aoqi@0 2299 #else // _LP64
aoqi@0 2300 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{
aoqi@0 2301 predicate(n->as_Vector()->length() == 2);
aoqi@0 2302 match(Set dst (ReplicateL src));
aoqi@0 2303 effect(TEMP dst, USE src, TEMP tmp);
aoqi@0 2304 format %{ "movdl $dst,$src.lo\n\t"
aoqi@0 2305 "movdl $tmp,$src.hi\n\t"
aoqi@0 2306 "punpckldq $dst,$tmp\n\t"
aoqi@0 2307 "punpcklqdq $dst,$dst\t! replicate2L"%}
aoqi@0 2308 ins_encode %{
aoqi@0 2309 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 2310 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
aoqi@0 2311 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
aoqi@0 2312 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2313 %}
aoqi@0 2314 ins_pipe( pipe_slow );
aoqi@0 2315 %}
aoqi@0 2316
aoqi@0 2317 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
aoqi@0 2318 predicate(n->as_Vector()->length() == 4);
aoqi@0 2319 match(Set dst (ReplicateL src));
aoqi@0 2320 effect(TEMP dst, USE src, TEMP tmp);
aoqi@0 2321 format %{ "movdl $dst,$src.lo\n\t"
aoqi@0 2322 "movdl $tmp,$src.hi\n\t"
aoqi@0 2323 "punpckldq $dst,$tmp\n\t"
aoqi@0 2324 "punpcklqdq $dst,$dst\n\t"
aoqi@0 2325 "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
aoqi@0 2326 ins_encode %{
aoqi@0 2327 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 2328 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
aoqi@0 2329 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
aoqi@0 2330 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2331 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2332 %}
aoqi@0 2333 ins_pipe( pipe_slow );
aoqi@0 2334 %}
aoqi@0 2335 #endif // _LP64
aoqi@0 2336
aoqi@0 2337 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
aoqi@0 2338 instruct Repl2L_imm(vecX dst, immL con) %{
aoqi@0 2339 predicate(n->as_Vector()->length() == 2);
aoqi@0 2340 match(Set dst (ReplicateL con));
aoqi@0 2341 format %{ "movq $dst,[$constantaddress]\n\t"
aoqi@0 2342 "punpcklqdq $dst,$dst\t! replicate2L($con)" %}
aoqi@0 2343 ins_encode %{
aoqi@0 2344 __ movq($dst$$XMMRegister, $constantaddress($con));
aoqi@0 2345 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2346 %}
aoqi@0 2347 ins_pipe( pipe_slow );
aoqi@0 2348 %}
aoqi@0 2349
aoqi@0 2350 instruct Repl4L_imm(vecY dst, immL con) %{
aoqi@0 2351 predicate(n->as_Vector()->length() == 4);
aoqi@0 2352 match(Set dst (ReplicateL con));
aoqi@0 2353 format %{ "movq $dst,[$constantaddress]\n\t"
aoqi@0 2354 "punpcklqdq $dst,$dst\n\t"
aoqi@0 2355 "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %}
aoqi@0 2356 ins_encode %{
aoqi@0 2357 __ movq($dst$$XMMRegister, $constantaddress($con));
aoqi@0 2358 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2359 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2360 %}
aoqi@0 2361 ins_pipe( pipe_slow );
aoqi@0 2362 %}
aoqi@0 2363
aoqi@0 2364 // Long could be loaded into xmm register directly from memory.
aoqi@0 2365 instruct Repl2L_mem(vecX dst, memory mem) %{
aoqi@0 2366 predicate(n->as_Vector()->length() == 2);
aoqi@0 2367 match(Set dst (ReplicateL (LoadL mem)));
aoqi@0 2368 format %{ "movq $dst,$mem\n\t"
aoqi@0 2369 "punpcklqdq $dst,$dst\t! replicate2L" %}
aoqi@0 2370 ins_encode %{
aoqi@0 2371 __ movq($dst$$XMMRegister, $mem$$Address);
aoqi@0 2372 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2373 %}
aoqi@0 2374 ins_pipe( pipe_slow );
aoqi@0 2375 %}
aoqi@0 2376
aoqi@0 2377 instruct Repl4L_mem(vecY dst, memory mem) %{
aoqi@0 2378 predicate(n->as_Vector()->length() == 4);
aoqi@0 2379 match(Set dst (ReplicateL (LoadL mem)));
aoqi@0 2380 format %{ "movq $dst,$mem\n\t"
aoqi@0 2381 "punpcklqdq $dst,$dst\n\t"
aoqi@0 2382 "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
aoqi@0 2383 ins_encode %{
aoqi@0 2384 __ movq($dst$$XMMRegister, $mem$$Address);
aoqi@0 2385 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2386 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2387 %}
aoqi@0 2388 ins_pipe( pipe_slow );
aoqi@0 2389 %}
aoqi@0 2390
aoqi@0 2391 // Replicate long (8 byte) scalar zero to be vector
aoqi@0 2392 instruct Repl2L_zero(vecX dst, immL0 zero) %{
aoqi@0 2393 predicate(n->as_Vector()->length() == 2);
aoqi@0 2394 match(Set dst (ReplicateL zero));
aoqi@0 2395 format %{ "pxor $dst,$dst\t! replicate2L zero" %}
aoqi@0 2396 ins_encode %{
aoqi@0 2397 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2398 %}
aoqi@0 2399 ins_pipe( fpu_reg_reg );
aoqi@0 2400 %}
aoqi@0 2401
aoqi@0 2402 instruct Repl4L_zero(vecY dst, immL0 zero) %{
aoqi@0 2403 predicate(n->as_Vector()->length() == 4);
aoqi@0 2404 match(Set dst (ReplicateL zero));
aoqi@0 2405 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %}
aoqi@0 2406 ins_encode %{
aoqi@0 2407 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
aoqi@0 2408 bool vector256 = true;
aoqi@0 2409 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
aoqi@0 2410 %}
aoqi@0 2411 ins_pipe( fpu_reg_reg );
aoqi@0 2412 %}
aoqi@0 2413
aoqi@0 2414 // Replicate float (4 byte) scalar to be vector
aoqi@0 2415 instruct Repl2F(vecD dst, regF src) %{
aoqi@0 2416 predicate(n->as_Vector()->length() == 2);
aoqi@0 2417 match(Set dst (ReplicateF src));
aoqi@0 2418 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %}
aoqi@0 2419 ins_encode %{
aoqi@0 2420 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
aoqi@0 2421 %}
aoqi@0 2422 ins_pipe( fpu_reg_reg );
aoqi@0 2423 %}
aoqi@0 2424
aoqi@0 2425 instruct Repl4F(vecX dst, regF src) %{
aoqi@0 2426 predicate(n->as_Vector()->length() == 4);
aoqi@0 2427 match(Set dst (ReplicateF src));
aoqi@0 2428 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %}
aoqi@0 2429 ins_encode %{
aoqi@0 2430 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
aoqi@0 2431 %}
aoqi@0 2432 ins_pipe( pipe_slow );
aoqi@0 2433 %}
aoqi@0 2434
aoqi@0 2435 instruct Repl8F(vecY dst, regF src) %{
aoqi@0 2436 predicate(n->as_Vector()->length() == 8);
aoqi@0 2437 match(Set dst (ReplicateF src));
aoqi@0 2438 format %{ "pshufd $dst,$src,0x00\n\t"
aoqi@0 2439 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
aoqi@0 2440 ins_encode %{
aoqi@0 2441 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
aoqi@0 2442 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2443 %}
aoqi@0 2444 ins_pipe( pipe_slow );
aoqi@0 2445 %}
aoqi@0 2446
aoqi@0 2447 // Replicate float (4 byte) scalar zero to be vector
aoqi@0 2448 instruct Repl2F_zero(vecD dst, immF0 zero) %{
aoqi@0 2449 predicate(n->as_Vector()->length() == 2);
aoqi@0 2450 match(Set dst (ReplicateF zero));
aoqi@0 2451 format %{ "xorps $dst,$dst\t! replicate2F zero" %}
aoqi@0 2452 ins_encode %{
aoqi@0 2453 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2454 %}
aoqi@0 2455 ins_pipe( fpu_reg_reg );
aoqi@0 2456 %}
aoqi@0 2457
aoqi@0 2458 instruct Repl4F_zero(vecX dst, immF0 zero) %{
aoqi@0 2459 predicate(n->as_Vector()->length() == 4);
aoqi@0 2460 match(Set dst (ReplicateF zero));
aoqi@0 2461 format %{ "xorps $dst,$dst\t! replicate4F zero" %}
aoqi@0 2462 ins_encode %{
aoqi@0 2463 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2464 %}
aoqi@0 2465 ins_pipe( fpu_reg_reg );
aoqi@0 2466 %}
aoqi@0 2467
aoqi@0 2468 instruct Repl8F_zero(vecY dst, immF0 zero) %{
aoqi@0 2469 predicate(n->as_Vector()->length() == 8);
aoqi@0 2470 match(Set dst (ReplicateF zero));
aoqi@0 2471 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %}
aoqi@0 2472 ins_encode %{
aoqi@0 2473 bool vector256 = true;
aoqi@0 2474 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
aoqi@0 2475 %}
aoqi@0 2476 ins_pipe( fpu_reg_reg );
aoqi@0 2477 %}
aoqi@0 2478
aoqi@0 2479 // Replicate double (8 bytes) scalar to be vector
aoqi@0 2480 instruct Repl2D(vecX dst, regD src) %{
aoqi@0 2481 predicate(n->as_Vector()->length() == 2);
aoqi@0 2482 match(Set dst (ReplicateD src));
aoqi@0 2483 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %}
aoqi@0 2484 ins_encode %{
aoqi@0 2485 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
aoqi@0 2486 %}
aoqi@0 2487 ins_pipe( pipe_slow );
aoqi@0 2488 %}
aoqi@0 2489
aoqi@0 2490 instruct Repl4D(vecY dst, regD src) %{
aoqi@0 2491 predicate(n->as_Vector()->length() == 4);
aoqi@0 2492 match(Set dst (ReplicateD src));
aoqi@0 2493 format %{ "pshufd $dst,$src,0x44\n\t"
aoqi@0 2494 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
aoqi@0 2495 ins_encode %{
aoqi@0 2496 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
aoqi@0 2497 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2498 %}
aoqi@0 2499 ins_pipe( pipe_slow );
aoqi@0 2500 %}
aoqi@0 2501
aoqi@0 2502 // Replicate double (8 byte) scalar zero to be vector
aoqi@0 2503 instruct Repl2D_zero(vecX dst, immD0 zero) %{
aoqi@0 2504 predicate(n->as_Vector()->length() == 2);
aoqi@0 2505 match(Set dst (ReplicateD zero));
aoqi@0 2506 format %{ "xorpd $dst,$dst\t! replicate2D zero" %}
aoqi@0 2507 ins_encode %{
aoqi@0 2508 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2509 %}
aoqi@0 2510 ins_pipe( fpu_reg_reg );
aoqi@0 2511 %}
aoqi@0 2512
aoqi@0 2513 instruct Repl4D_zero(vecY dst, immD0 zero) %{
aoqi@0 2514 predicate(n->as_Vector()->length() == 4);
aoqi@0 2515 match(Set dst (ReplicateD zero));
aoqi@0 2516 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %}
aoqi@0 2517 ins_encode %{
aoqi@0 2518 bool vector256 = true;
aoqi@0 2519 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
aoqi@0 2520 %}
aoqi@0 2521 ins_pipe( fpu_reg_reg );
aoqi@0 2522 %}
aoqi@0 2523
aoqi@0 2524 // ====================VECTOR ARITHMETIC=======================================
aoqi@0 2525
aoqi@0 2526 // --------------------------------- ADD --------------------------------------
aoqi@0 2527
aoqi@0 2528 // Bytes vector add
aoqi@0 2529 instruct vadd4B(vecS dst, vecS src) %{
aoqi@0 2530 predicate(n->as_Vector()->length() == 4);
aoqi@0 2531 match(Set dst (AddVB dst src));
aoqi@0 2532 format %{ "paddb $dst,$src\t! add packed4B" %}
aoqi@0 2533 ins_encode %{
aoqi@0 2534 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2535 %}
aoqi@0 2536 ins_pipe( pipe_slow );
aoqi@0 2537 %}
aoqi@0 2538
aoqi@0 2539 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{
aoqi@0 2540 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 2541 match(Set dst (AddVB src1 src2));
aoqi@0 2542 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %}
aoqi@0 2543 ins_encode %{
aoqi@0 2544 bool vector256 = false;
aoqi@0 2545 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2546 %}
aoqi@0 2547 ins_pipe( pipe_slow );
aoqi@0 2548 %}
aoqi@0 2549
aoqi@0 2550 instruct vadd8B(vecD dst, vecD src) %{
aoqi@0 2551 predicate(n->as_Vector()->length() == 8);
aoqi@0 2552 match(Set dst (AddVB dst src));
aoqi@0 2553 format %{ "paddb $dst,$src\t! add packed8B" %}
aoqi@0 2554 ins_encode %{
aoqi@0 2555 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2556 %}
aoqi@0 2557 ins_pipe( pipe_slow );
aoqi@0 2558 %}
aoqi@0 2559
aoqi@0 2560 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 2561 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 2562 match(Set dst (AddVB src1 src2));
aoqi@0 2563 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %}
aoqi@0 2564 ins_encode %{
aoqi@0 2565 bool vector256 = false;
aoqi@0 2566 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2567 %}
aoqi@0 2568 ins_pipe( pipe_slow );
aoqi@0 2569 %}
aoqi@0 2570
aoqi@0 2571 instruct vadd16B(vecX dst, vecX src) %{
aoqi@0 2572 predicate(n->as_Vector()->length() == 16);
aoqi@0 2573 match(Set dst (AddVB dst src));
aoqi@0 2574 format %{ "paddb $dst,$src\t! add packed16B" %}
aoqi@0 2575 ins_encode %{
aoqi@0 2576 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2577 %}
aoqi@0 2578 ins_pipe( pipe_slow );
aoqi@0 2579 %}
aoqi@0 2580
aoqi@0 2581 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 2582 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
aoqi@0 2583 match(Set dst (AddVB src1 src2));
aoqi@0 2584 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %}
aoqi@0 2585 ins_encode %{
aoqi@0 2586 bool vector256 = false;
aoqi@0 2587 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2588 %}
aoqi@0 2589 ins_pipe( pipe_slow );
aoqi@0 2590 %}
aoqi@0 2591
aoqi@0 2592 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 2593 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
aoqi@0 2594 match(Set dst (AddVB src (LoadVector mem)));
aoqi@0 2595 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %}
aoqi@0 2596 ins_encode %{
aoqi@0 2597 bool vector256 = false;
aoqi@0 2598 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2599 %}
aoqi@0 2600 ins_pipe( pipe_slow );
aoqi@0 2601 %}
aoqi@0 2602
aoqi@0 2603 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 2604 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
aoqi@0 2605 match(Set dst (AddVB src1 src2));
aoqi@0 2606 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %}
aoqi@0 2607 ins_encode %{
aoqi@0 2608 bool vector256 = true;
aoqi@0 2609 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2610 %}
aoqi@0 2611 ins_pipe( pipe_slow );
aoqi@0 2612 %}
aoqi@0 2613
aoqi@0 2614 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 2615 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
aoqi@0 2616 match(Set dst (AddVB src (LoadVector mem)));
aoqi@0 2617 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %}
aoqi@0 2618 ins_encode %{
aoqi@0 2619 bool vector256 = true;
aoqi@0 2620 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2621 %}
aoqi@0 2622 ins_pipe( pipe_slow );
aoqi@0 2623 %}
aoqi@0 2624
aoqi@0 2625 // Shorts/Chars vector add
aoqi@0 2626 instruct vadd2S(vecS dst, vecS src) %{
aoqi@0 2627 predicate(n->as_Vector()->length() == 2);
aoqi@0 2628 match(Set dst (AddVS dst src));
aoqi@0 2629 format %{ "paddw $dst,$src\t! add packed2S" %}
aoqi@0 2630 ins_encode %{
aoqi@0 2631 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2632 %}
aoqi@0 2633 ins_pipe( pipe_slow );
aoqi@0 2634 %}
aoqi@0 2635
aoqi@0 2636 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{
aoqi@0 2637 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 2638 match(Set dst (AddVS src1 src2));
aoqi@0 2639 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %}
aoqi@0 2640 ins_encode %{
aoqi@0 2641 bool vector256 = false;
aoqi@0 2642 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2643 %}
aoqi@0 2644 ins_pipe( pipe_slow );
aoqi@0 2645 %}
aoqi@0 2646
aoqi@0 2647 instruct vadd4S(vecD dst, vecD src) %{
aoqi@0 2648 predicate(n->as_Vector()->length() == 4);
aoqi@0 2649 match(Set dst (AddVS dst src));
aoqi@0 2650 format %{ "paddw $dst,$src\t! add packed4S" %}
aoqi@0 2651 ins_encode %{
aoqi@0 2652 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2653 %}
aoqi@0 2654 ins_pipe( pipe_slow );
aoqi@0 2655 %}
aoqi@0 2656
aoqi@0 2657 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 2658 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 2659 match(Set dst (AddVS src1 src2));
aoqi@0 2660 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %}
aoqi@0 2661 ins_encode %{
aoqi@0 2662 bool vector256 = false;
aoqi@0 2663 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2664 %}
aoqi@0 2665 ins_pipe( pipe_slow );
aoqi@0 2666 %}
aoqi@0 2667
aoqi@0 2668 instruct vadd8S(vecX dst, vecX src) %{
aoqi@0 2669 predicate(n->as_Vector()->length() == 8);
aoqi@0 2670 match(Set dst (AddVS dst src));
aoqi@0 2671 format %{ "paddw $dst,$src\t! add packed8S" %}
aoqi@0 2672 ins_encode %{
aoqi@0 2673 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2674 %}
aoqi@0 2675 ins_pipe( pipe_slow );
aoqi@0 2676 %}
aoqi@0 2677
aoqi@0 2678 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 2679 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 2680 match(Set dst (AddVS src1 src2));
aoqi@0 2681 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %}
aoqi@0 2682 ins_encode %{
aoqi@0 2683 bool vector256 = false;
aoqi@0 2684 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2685 %}
aoqi@0 2686 ins_pipe( pipe_slow );
aoqi@0 2687 %}
aoqi@0 2688
aoqi@0 2689 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 2690 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 2691 match(Set dst (AddVS src (LoadVector mem)));
aoqi@0 2692 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %}
aoqi@0 2693 ins_encode %{
aoqi@0 2694 bool vector256 = false;
aoqi@0 2695 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2696 %}
aoqi@0 2697 ins_pipe( pipe_slow );
aoqi@0 2698 %}
aoqi@0 2699
aoqi@0 2700 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 2701 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 2702 match(Set dst (AddVS src1 src2));
aoqi@0 2703 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %}
aoqi@0 2704 ins_encode %{
aoqi@0 2705 bool vector256 = true;
aoqi@0 2706 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2707 %}
aoqi@0 2708 ins_pipe( pipe_slow );
aoqi@0 2709 %}
aoqi@0 2710
aoqi@0 2711 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 2712 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 2713 match(Set dst (AddVS src (LoadVector mem)));
aoqi@0 2714 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %}
aoqi@0 2715 ins_encode %{
aoqi@0 2716 bool vector256 = true;
aoqi@0 2717 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2718 %}
aoqi@0 2719 ins_pipe( pipe_slow );
aoqi@0 2720 %}
aoqi@0 2721
aoqi@0 2722 // Integers vector add
aoqi@0 2723 instruct vadd2I(vecD dst, vecD src) %{
aoqi@0 2724 predicate(n->as_Vector()->length() == 2);
aoqi@0 2725 match(Set dst (AddVI dst src));
aoqi@0 2726 format %{ "paddd $dst,$src\t! add packed2I" %}
aoqi@0 2727 ins_encode %{
aoqi@0 2728 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2729 %}
aoqi@0 2730 ins_pipe( pipe_slow );
aoqi@0 2731 %}
aoqi@0 2732
aoqi@0 2733 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 2734 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 2735 match(Set dst (AddVI src1 src2));
aoqi@0 2736 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %}
aoqi@0 2737 ins_encode %{
aoqi@0 2738 bool vector256 = false;
aoqi@0 2739 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2740 %}
aoqi@0 2741 ins_pipe( pipe_slow );
aoqi@0 2742 %}
aoqi@0 2743
aoqi@0 2744 instruct vadd4I(vecX dst, vecX src) %{
aoqi@0 2745 predicate(n->as_Vector()->length() == 4);
aoqi@0 2746 match(Set dst (AddVI dst src));
aoqi@0 2747 format %{ "paddd $dst,$src\t! add packed4I" %}
aoqi@0 2748 ins_encode %{
aoqi@0 2749 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2750 %}
aoqi@0 2751 ins_pipe( pipe_slow );
aoqi@0 2752 %}
aoqi@0 2753
aoqi@0 2754 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 2755 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 2756 match(Set dst (AddVI src1 src2));
aoqi@0 2757 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %}
aoqi@0 2758 ins_encode %{
aoqi@0 2759 bool vector256 = false;
aoqi@0 2760 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2761 %}
aoqi@0 2762 ins_pipe( pipe_slow );
aoqi@0 2763 %}
aoqi@0 2764
aoqi@0 2765 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 2766 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 2767 match(Set dst (AddVI src (LoadVector mem)));
aoqi@0 2768 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %}
aoqi@0 2769 ins_encode %{
aoqi@0 2770 bool vector256 = false;
aoqi@0 2771 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2772 %}
aoqi@0 2773 ins_pipe( pipe_slow );
aoqi@0 2774 %}
aoqi@0 2775
aoqi@0 2776 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 2777 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 2778 match(Set dst (AddVI src1 src2));
aoqi@0 2779 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %}
aoqi@0 2780 ins_encode %{
aoqi@0 2781 bool vector256 = true;
aoqi@0 2782 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2783 %}
aoqi@0 2784 ins_pipe( pipe_slow );
aoqi@0 2785 %}
aoqi@0 2786
aoqi@0 2787 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 2788 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 2789 match(Set dst (AddVI src (LoadVector mem)));
aoqi@0 2790 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %}
aoqi@0 2791 ins_encode %{
aoqi@0 2792 bool vector256 = true;
aoqi@0 2793 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2794 %}
aoqi@0 2795 ins_pipe( pipe_slow );
aoqi@0 2796 %}
aoqi@0 2797
aoqi@0 2798 // Longs vector add
aoqi@0 2799 instruct vadd2L(vecX dst, vecX src) %{
aoqi@0 2800 predicate(n->as_Vector()->length() == 2);
aoqi@0 2801 match(Set dst (AddVL dst src));
aoqi@0 2802 format %{ "paddq $dst,$src\t! add packed2L" %}
aoqi@0 2803 ins_encode %{
aoqi@0 2804 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2805 %}
aoqi@0 2806 ins_pipe( pipe_slow );
aoqi@0 2807 %}
aoqi@0 2808
aoqi@0 2809 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 2810 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 2811 match(Set dst (AddVL src1 src2));
aoqi@0 2812 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %}
aoqi@0 2813 ins_encode %{
aoqi@0 2814 bool vector256 = false;
aoqi@0 2815 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2816 %}
aoqi@0 2817 ins_pipe( pipe_slow );
aoqi@0 2818 %}
aoqi@0 2819
aoqi@0 2820 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 2821 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 2822 match(Set dst (AddVL src (LoadVector mem)));
aoqi@0 2823 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %}
aoqi@0 2824 ins_encode %{
aoqi@0 2825 bool vector256 = false;
aoqi@0 2826 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2827 %}
aoqi@0 2828 ins_pipe( pipe_slow );
aoqi@0 2829 %}
aoqi@0 2830
aoqi@0 2831 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 2832 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
aoqi@0 2833 match(Set dst (AddVL src1 src2));
aoqi@0 2834 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %}
aoqi@0 2835 ins_encode %{
aoqi@0 2836 bool vector256 = true;
aoqi@0 2837 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2838 %}
aoqi@0 2839 ins_pipe( pipe_slow );
aoqi@0 2840 %}
aoqi@0 2841
aoqi@0 2842 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 2843 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
aoqi@0 2844 match(Set dst (AddVL src (LoadVector mem)));
aoqi@0 2845 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %}
aoqi@0 2846 ins_encode %{
aoqi@0 2847 bool vector256 = true;
aoqi@0 2848 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2849 %}
aoqi@0 2850 ins_pipe( pipe_slow );
aoqi@0 2851 %}
aoqi@0 2852
aoqi@0 2853 // Floats vector add
aoqi@0 2854 instruct vadd2F(vecD dst, vecD src) %{
aoqi@0 2855 predicate(n->as_Vector()->length() == 2);
aoqi@0 2856 match(Set dst (AddVF dst src));
aoqi@0 2857 format %{ "addps $dst,$src\t! add packed2F" %}
aoqi@0 2858 ins_encode %{
aoqi@0 2859 __ addps($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2860 %}
aoqi@0 2861 ins_pipe( pipe_slow );
aoqi@0 2862 %}
aoqi@0 2863
aoqi@0 2864 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 2865 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 2866 match(Set dst (AddVF src1 src2));
aoqi@0 2867 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %}
aoqi@0 2868 ins_encode %{
aoqi@0 2869 bool vector256 = false;
aoqi@0 2870 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2871 %}
aoqi@0 2872 ins_pipe( pipe_slow );
aoqi@0 2873 %}
aoqi@0 2874
aoqi@0 2875 instruct vadd4F(vecX dst, vecX src) %{
aoqi@0 2876 predicate(n->as_Vector()->length() == 4);
aoqi@0 2877 match(Set dst (AddVF dst src));
aoqi@0 2878 format %{ "addps $dst,$src\t! add packed4F" %}
aoqi@0 2879 ins_encode %{
aoqi@0 2880 __ addps($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2881 %}
aoqi@0 2882 ins_pipe( pipe_slow );
aoqi@0 2883 %}
aoqi@0 2884
aoqi@0 2885 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 2886 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 2887 match(Set dst (AddVF src1 src2));
aoqi@0 2888 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %}
aoqi@0 2889 ins_encode %{
aoqi@0 2890 bool vector256 = false;
aoqi@0 2891 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2892 %}
aoqi@0 2893 ins_pipe( pipe_slow );
aoqi@0 2894 %}
aoqi@0 2895
aoqi@0 2896 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 2897 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 2898 match(Set dst (AddVF src (LoadVector mem)));
aoqi@0 2899 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %}
aoqi@0 2900 ins_encode %{
aoqi@0 2901 bool vector256 = false;
aoqi@0 2902 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2903 %}
aoqi@0 2904 ins_pipe( pipe_slow );
aoqi@0 2905 %}
aoqi@0 2906
aoqi@0 2907 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 2908 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 2909 match(Set dst (AddVF src1 src2));
aoqi@0 2910 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %}
aoqi@0 2911 ins_encode %{
aoqi@0 2912 bool vector256 = true;
aoqi@0 2913 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2914 %}
aoqi@0 2915 ins_pipe( pipe_slow );
aoqi@0 2916 %}
aoqi@0 2917
aoqi@0 2918 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 2919 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 2920 match(Set dst (AddVF src (LoadVector mem)));
aoqi@0 2921 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %}
aoqi@0 2922 ins_encode %{
aoqi@0 2923 bool vector256 = true;
aoqi@0 2924 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2925 %}
aoqi@0 2926 ins_pipe( pipe_slow );
aoqi@0 2927 %}
aoqi@0 2928
aoqi@0 2929 // Doubles vector add
aoqi@0 2930 instruct vadd2D(vecX dst, vecX src) %{
aoqi@0 2931 predicate(n->as_Vector()->length() == 2);
aoqi@0 2932 match(Set dst (AddVD dst src));
aoqi@0 2933 format %{ "addpd $dst,$src\t! add packed2D" %}
aoqi@0 2934 ins_encode %{
aoqi@0 2935 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2936 %}
aoqi@0 2937 ins_pipe( pipe_slow );
aoqi@0 2938 %}
aoqi@0 2939
aoqi@0 2940 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 2941 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 2942 match(Set dst (AddVD src1 src2));
aoqi@0 2943 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %}
aoqi@0 2944 ins_encode %{
aoqi@0 2945 bool vector256 = false;
aoqi@0 2946 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2947 %}
aoqi@0 2948 ins_pipe( pipe_slow );
aoqi@0 2949 %}
aoqi@0 2950
aoqi@0 2951 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 2952 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 2953 match(Set dst (AddVD src (LoadVector mem)));
aoqi@0 2954 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %}
aoqi@0 2955 ins_encode %{
aoqi@0 2956 bool vector256 = false;
aoqi@0 2957 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2958 %}
aoqi@0 2959 ins_pipe( pipe_slow );
aoqi@0 2960 %}
aoqi@0 2961
aoqi@0 2962 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 2963 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 2964 match(Set dst (AddVD src1 src2));
aoqi@0 2965 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %}
aoqi@0 2966 ins_encode %{
aoqi@0 2967 bool vector256 = true;
aoqi@0 2968 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2969 %}
aoqi@0 2970 ins_pipe( pipe_slow );
aoqi@0 2971 %}
aoqi@0 2972
aoqi@0 2973 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 2974 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 2975 match(Set dst (AddVD src (LoadVector mem)));
aoqi@0 2976 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %}
aoqi@0 2977 ins_encode %{
aoqi@0 2978 bool vector256 = true;
aoqi@0 2979 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2980 %}
aoqi@0 2981 ins_pipe( pipe_slow );
aoqi@0 2982 %}
aoqi@0 2983
aoqi@0 2984 // --------------------------------- SUB --------------------------------------
aoqi@0 2985
aoqi@0 2986 // Bytes vector sub
aoqi@0 2987 instruct vsub4B(vecS dst, vecS src) %{
aoqi@0 2988 predicate(n->as_Vector()->length() == 4);
aoqi@0 2989 match(Set dst (SubVB dst src));
aoqi@0 2990 format %{ "psubb $dst,$src\t! sub packed4B" %}
aoqi@0 2991 ins_encode %{
aoqi@0 2992 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2993 %}
aoqi@0 2994 ins_pipe( pipe_slow );
aoqi@0 2995 %}
aoqi@0 2996
aoqi@0 2997 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{
aoqi@0 2998 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 2999 match(Set dst (SubVB src1 src2));
aoqi@0 3000 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %}
aoqi@0 3001 ins_encode %{
aoqi@0 3002 bool vector256 = false;
aoqi@0 3003 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3004 %}
aoqi@0 3005 ins_pipe( pipe_slow );
aoqi@0 3006 %}
aoqi@0 3007
aoqi@0 3008 instruct vsub8B(vecD dst, vecD src) %{
aoqi@0 3009 predicate(n->as_Vector()->length() == 8);
aoqi@0 3010 match(Set dst (SubVB dst src));
aoqi@0 3011 format %{ "psubb $dst,$src\t! sub packed8B" %}
aoqi@0 3012 ins_encode %{
aoqi@0 3013 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3014 %}
aoqi@0 3015 ins_pipe( pipe_slow );
aoqi@0 3016 %}
aoqi@0 3017
aoqi@0 3018 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 3019 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 3020 match(Set dst (SubVB src1 src2));
aoqi@0 3021 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %}
aoqi@0 3022 ins_encode %{
aoqi@0 3023 bool vector256 = false;
aoqi@0 3024 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3025 %}
aoqi@0 3026 ins_pipe( pipe_slow );
aoqi@0 3027 %}
aoqi@0 3028
aoqi@0 3029 instruct vsub16B(vecX dst, vecX src) %{
aoqi@0 3030 predicate(n->as_Vector()->length() == 16);
aoqi@0 3031 match(Set dst (SubVB dst src));
aoqi@0 3032 format %{ "psubb $dst,$src\t! sub packed16B" %}
aoqi@0 3033 ins_encode %{
aoqi@0 3034 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3035 %}
aoqi@0 3036 ins_pipe( pipe_slow );
aoqi@0 3037 %}
aoqi@0 3038
aoqi@0 3039 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3040 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
aoqi@0 3041 match(Set dst (SubVB src1 src2));
aoqi@0 3042 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %}
aoqi@0 3043 ins_encode %{
aoqi@0 3044 bool vector256 = false;
aoqi@0 3045 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3046 %}
aoqi@0 3047 ins_pipe( pipe_slow );
aoqi@0 3048 %}
aoqi@0 3049
aoqi@0 3050 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3051 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
aoqi@0 3052 match(Set dst (SubVB src (LoadVector mem)));
aoqi@0 3053 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %}
aoqi@0 3054 ins_encode %{
aoqi@0 3055 bool vector256 = false;
aoqi@0 3056 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3057 %}
aoqi@0 3058 ins_pipe( pipe_slow );
aoqi@0 3059 %}
aoqi@0 3060
aoqi@0 3061 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3062 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
aoqi@0 3063 match(Set dst (SubVB src1 src2));
aoqi@0 3064 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %}
aoqi@0 3065 ins_encode %{
aoqi@0 3066 bool vector256 = true;
aoqi@0 3067 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3068 %}
aoqi@0 3069 ins_pipe( pipe_slow );
aoqi@0 3070 %}
aoqi@0 3071
aoqi@0 3072 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3073 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
aoqi@0 3074 match(Set dst (SubVB src (LoadVector mem)));
aoqi@0 3075 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %}
aoqi@0 3076 ins_encode %{
aoqi@0 3077 bool vector256 = true;
aoqi@0 3078 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3079 %}
aoqi@0 3080 ins_pipe( pipe_slow );
aoqi@0 3081 %}
aoqi@0 3082
aoqi@0 3083 // Shorts/Chars vector sub
aoqi@0 3084 instruct vsub2S(vecS dst, vecS src) %{
aoqi@0 3085 predicate(n->as_Vector()->length() == 2);
aoqi@0 3086 match(Set dst (SubVS dst src));
aoqi@0 3087 format %{ "psubw $dst,$src\t! sub packed2S" %}
aoqi@0 3088 ins_encode %{
aoqi@0 3089 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3090 %}
aoqi@0 3091 ins_pipe( pipe_slow );
aoqi@0 3092 %}
aoqi@0 3093
aoqi@0 3094 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{
aoqi@0 3095 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3096 match(Set dst (SubVS src1 src2));
aoqi@0 3097 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %}
aoqi@0 3098 ins_encode %{
aoqi@0 3099 bool vector256 = false;
aoqi@0 3100 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3101 %}
aoqi@0 3102 ins_pipe( pipe_slow );
aoqi@0 3103 %}
aoqi@0 3104
aoqi@0 3105 instruct vsub4S(vecD dst, vecD src) %{
aoqi@0 3106 predicate(n->as_Vector()->length() == 4);
aoqi@0 3107 match(Set dst (SubVS dst src));
aoqi@0 3108 format %{ "psubw $dst,$src\t! sub packed4S" %}
aoqi@0 3109 ins_encode %{
aoqi@0 3110 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3111 %}
aoqi@0 3112 ins_pipe( pipe_slow );
aoqi@0 3113 %}
aoqi@0 3114
aoqi@0 3115 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 3116 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3117 match(Set dst (SubVS src1 src2));
aoqi@0 3118 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %}
aoqi@0 3119 ins_encode %{
aoqi@0 3120 bool vector256 = false;
aoqi@0 3121 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3122 %}
aoqi@0 3123 ins_pipe( pipe_slow );
aoqi@0 3124 %}
aoqi@0 3125
aoqi@0 3126 instruct vsub8S(vecX dst, vecX src) %{
aoqi@0 3127 predicate(n->as_Vector()->length() == 8);
aoqi@0 3128 match(Set dst (SubVS dst src));
aoqi@0 3129 format %{ "psubw $dst,$src\t! sub packed8S" %}
aoqi@0 3130 ins_encode %{
aoqi@0 3131 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3132 %}
aoqi@0 3133 ins_pipe( pipe_slow );
aoqi@0 3134 %}
aoqi@0 3135
aoqi@0 3136 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3137 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 3138 match(Set dst (SubVS src1 src2));
aoqi@0 3139 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %}
aoqi@0 3140 ins_encode %{
aoqi@0 3141 bool vector256 = false;
aoqi@0 3142 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3143 %}
aoqi@0 3144 ins_pipe( pipe_slow );
aoqi@0 3145 %}
aoqi@0 3146
aoqi@0 3147 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3148 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 3149 match(Set dst (SubVS src (LoadVector mem)));
aoqi@0 3150 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %}
aoqi@0 3151 ins_encode %{
aoqi@0 3152 bool vector256 = false;
aoqi@0 3153 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3154 %}
aoqi@0 3155 ins_pipe( pipe_slow );
aoqi@0 3156 %}
aoqi@0 3157
aoqi@0 3158 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3159 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 3160 match(Set dst (SubVS src1 src2));
aoqi@0 3161 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %}
aoqi@0 3162 ins_encode %{
aoqi@0 3163 bool vector256 = true;
aoqi@0 3164 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3165 %}
aoqi@0 3166 ins_pipe( pipe_slow );
aoqi@0 3167 %}
aoqi@0 3168
aoqi@0 3169 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3170 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 3171 match(Set dst (SubVS src (LoadVector mem)));
aoqi@0 3172 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %}
aoqi@0 3173 ins_encode %{
aoqi@0 3174 bool vector256 = true;
aoqi@0 3175 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3176 %}
aoqi@0 3177 ins_pipe( pipe_slow );
aoqi@0 3178 %}
aoqi@0 3179
aoqi@0 3180 // Integers vector sub
aoqi@0 3181 instruct vsub2I(vecD dst, vecD src) %{
aoqi@0 3182 predicate(n->as_Vector()->length() == 2);
aoqi@0 3183 match(Set dst (SubVI dst src));
aoqi@0 3184 format %{ "psubd $dst,$src\t! sub packed2I" %}
aoqi@0 3185 ins_encode %{
aoqi@0 3186 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3187 %}
aoqi@0 3188 ins_pipe( pipe_slow );
aoqi@0 3189 %}
aoqi@0 3190
aoqi@0 3191 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 3192 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3193 match(Set dst (SubVI src1 src2));
aoqi@0 3194 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %}
aoqi@0 3195 ins_encode %{
aoqi@0 3196 bool vector256 = false;
aoqi@0 3197 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3198 %}
aoqi@0 3199 ins_pipe( pipe_slow );
aoqi@0 3200 %}
aoqi@0 3201
aoqi@0 3202 instruct vsub4I(vecX dst, vecX src) %{
aoqi@0 3203 predicate(n->as_Vector()->length() == 4);
aoqi@0 3204 match(Set dst (SubVI dst src));
aoqi@0 3205 format %{ "psubd $dst,$src\t! sub packed4I" %}
aoqi@0 3206 ins_encode %{
aoqi@0 3207 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3208 %}
aoqi@0 3209 ins_pipe( pipe_slow );
aoqi@0 3210 %}
aoqi@0 3211
aoqi@0 3212 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3213 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3214 match(Set dst (SubVI src1 src2));
aoqi@0 3215 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %}
aoqi@0 3216 ins_encode %{
aoqi@0 3217 bool vector256 = false;
aoqi@0 3218 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3219 %}
aoqi@0 3220 ins_pipe( pipe_slow );
aoqi@0 3221 %}
aoqi@0 3222
aoqi@0 3223 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3224 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3225 match(Set dst (SubVI src (LoadVector mem)));
aoqi@0 3226 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %}
aoqi@0 3227 ins_encode %{
aoqi@0 3228 bool vector256 = false;
aoqi@0 3229 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3230 %}
aoqi@0 3231 ins_pipe( pipe_slow );
aoqi@0 3232 %}
aoqi@0 3233
aoqi@0 3234 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3235 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 3236 match(Set dst (SubVI src1 src2));
aoqi@0 3237 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %}
aoqi@0 3238 ins_encode %{
aoqi@0 3239 bool vector256 = true;
aoqi@0 3240 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3241 %}
aoqi@0 3242 ins_pipe( pipe_slow );
aoqi@0 3243 %}
aoqi@0 3244
aoqi@0 3245 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3246 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 3247 match(Set dst (SubVI src (LoadVector mem)));
aoqi@0 3248 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %}
aoqi@0 3249 ins_encode %{
aoqi@0 3250 bool vector256 = true;
aoqi@0 3251 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3252 %}
aoqi@0 3253 ins_pipe( pipe_slow );
aoqi@0 3254 %}
aoqi@0 3255
aoqi@0 3256 // Longs vector sub
aoqi@0 3257 instruct vsub2L(vecX dst, vecX src) %{
aoqi@0 3258 predicate(n->as_Vector()->length() == 2);
aoqi@0 3259 match(Set dst (SubVL dst src));
aoqi@0 3260 format %{ "psubq $dst,$src\t! sub packed2L" %}
aoqi@0 3261 ins_encode %{
aoqi@0 3262 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3263 %}
aoqi@0 3264 ins_pipe( pipe_slow );
aoqi@0 3265 %}
aoqi@0 3266
aoqi@0 3267 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3268 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3269 match(Set dst (SubVL src1 src2));
aoqi@0 3270 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %}
aoqi@0 3271 ins_encode %{
aoqi@0 3272 bool vector256 = false;
aoqi@0 3273 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3274 %}
aoqi@0 3275 ins_pipe( pipe_slow );
aoqi@0 3276 %}
aoqi@0 3277
aoqi@0 3278 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3279 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3280 match(Set dst (SubVL src (LoadVector mem)));
aoqi@0 3281 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %}
aoqi@0 3282 ins_encode %{
aoqi@0 3283 bool vector256 = false;
aoqi@0 3284 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3285 %}
aoqi@0 3286 ins_pipe( pipe_slow );
aoqi@0 3287 %}
aoqi@0 3288
aoqi@0 3289 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3290 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
aoqi@0 3291 match(Set dst (SubVL src1 src2));
aoqi@0 3292 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %}
aoqi@0 3293 ins_encode %{
aoqi@0 3294 bool vector256 = true;
aoqi@0 3295 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3296 %}
aoqi@0 3297 ins_pipe( pipe_slow );
aoqi@0 3298 %}
aoqi@0 3299
aoqi@0 3300 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3301 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
aoqi@0 3302 match(Set dst (SubVL src (LoadVector mem)));
aoqi@0 3303 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %}
aoqi@0 3304 ins_encode %{
aoqi@0 3305 bool vector256 = true;
aoqi@0 3306 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3307 %}
aoqi@0 3308 ins_pipe( pipe_slow );
aoqi@0 3309 %}
aoqi@0 3310
aoqi@0 3311 // Floats vector sub
aoqi@0 3312 instruct vsub2F(vecD dst, vecD src) %{
aoqi@0 3313 predicate(n->as_Vector()->length() == 2);
aoqi@0 3314 match(Set dst (SubVF dst src));
aoqi@0 3315 format %{ "subps $dst,$src\t! sub packed2F" %}
aoqi@0 3316 ins_encode %{
aoqi@0 3317 __ subps($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3318 %}
aoqi@0 3319 ins_pipe( pipe_slow );
aoqi@0 3320 %}
aoqi@0 3321
aoqi@0 3322 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 3323 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3324 match(Set dst (SubVF src1 src2));
aoqi@0 3325 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %}
aoqi@0 3326 ins_encode %{
aoqi@0 3327 bool vector256 = false;
aoqi@0 3328 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3329 %}
aoqi@0 3330 ins_pipe( pipe_slow );
aoqi@0 3331 %}
aoqi@0 3332
aoqi@0 3333 instruct vsub4F(vecX dst, vecX src) %{
aoqi@0 3334 predicate(n->as_Vector()->length() == 4);
aoqi@0 3335 match(Set dst (SubVF dst src));
aoqi@0 3336 format %{ "subps $dst,$src\t! sub packed4F" %}
aoqi@0 3337 ins_encode %{
aoqi@0 3338 __ subps($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3339 %}
aoqi@0 3340 ins_pipe( pipe_slow );
aoqi@0 3341 %}
aoqi@0 3342
aoqi@0 3343 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3344 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3345 match(Set dst (SubVF src1 src2));
aoqi@0 3346 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %}
aoqi@0 3347 ins_encode %{
aoqi@0 3348 bool vector256 = false;
aoqi@0 3349 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3350 %}
aoqi@0 3351 ins_pipe( pipe_slow );
aoqi@0 3352 %}
aoqi@0 3353
aoqi@0 3354 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3355 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3356 match(Set dst (SubVF src (LoadVector mem)));
aoqi@0 3357 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %}
aoqi@0 3358 ins_encode %{
aoqi@0 3359 bool vector256 = false;
aoqi@0 3360 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3361 %}
aoqi@0 3362 ins_pipe( pipe_slow );
aoqi@0 3363 %}
aoqi@0 3364
aoqi@0 3365 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3366 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 3367 match(Set dst (SubVF src1 src2));
aoqi@0 3368 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %}
aoqi@0 3369 ins_encode %{
aoqi@0 3370 bool vector256 = true;
aoqi@0 3371 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3372 %}
aoqi@0 3373 ins_pipe( pipe_slow );
aoqi@0 3374 %}
aoqi@0 3375
aoqi@0 3376 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3377 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 3378 match(Set dst (SubVF src (LoadVector mem)));
aoqi@0 3379 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %}
aoqi@0 3380 ins_encode %{
aoqi@0 3381 bool vector256 = true;
aoqi@0 3382 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3383 %}
aoqi@0 3384 ins_pipe( pipe_slow );
aoqi@0 3385 %}
aoqi@0 3386
aoqi@0 3387 // Doubles vector sub
aoqi@0 3388 instruct vsub2D(vecX dst, vecX src) %{
aoqi@0 3389 predicate(n->as_Vector()->length() == 2);
aoqi@0 3390 match(Set dst (SubVD dst src));
aoqi@0 3391 format %{ "subpd $dst,$src\t! sub packed2D" %}
aoqi@0 3392 ins_encode %{
aoqi@0 3393 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3394 %}
aoqi@0 3395 ins_pipe( pipe_slow );
aoqi@0 3396 %}
aoqi@0 3397
aoqi@0 3398 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3399 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3400 match(Set dst (SubVD src1 src2));
aoqi@0 3401 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %}
aoqi@0 3402 ins_encode %{
aoqi@0 3403 bool vector256 = false;
aoqi@0 3404 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3405 %}
aoqi@0 3406 ins_pipe( pipe_slow );
aoqi@0 3407 %}
aoqi@0 3408
aoqi@0 3409 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3410 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3411 match(Set dst (SubVD src (LoadVector mem)));
aoqi@0 3412 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %}
aoqi@0 3413 ins_encode %{
aoqi@0 3414 bool vector256 = false;
aoqi@0 3415 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3416 %}
aoqi@0 3417 ins_pipe( pipe_slow );
aoqi@0 3418 %}
aoqi@0 3419
aoqi@0 3420 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3421 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3422 match(Set dst (SubVD src1 src2));
aoqi@0 3423 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %}
aoqi@0 3424 ins_encode %{
aoqi@0 3425 bool vector256 = true;
aoqi@0 3426 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3427 %}
aoqi@0 3428 ins_pipe( pipe_slow );
aoqi@0 3429 %}
aoqi@0 3430
aoqi@0 3431 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3432 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3433 match(Set dst (SubVD src (LoadVector mem)));
aoqi@0 3434 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %}
aoqi@0 3435 ins_encode %{
aoqi@0 3436 bool vector256 = true;
aoqi@0 3437 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3438 %}
aoqi@0 3439 ins_pipe( pipe_slow );
aoqi@0 3440 %}
aoqi@0 3441
aoqi@0 3442 // --------------------------------- MUL --------------------------------------
aoqi@0 3443
aoqi@0 3444 // Shorts/Chars vector mul
aoqi@0 3445 instruct vmul2S(vecS dst, vecS src) %{
aoqi@0 3446 predicate(n->as_Vector()->length() == 2);
aoqi@0 3447 match(Set dst (MulVS dst src));
aoqi@0 3448 format %{ "pmullw $dst,$src\t! mul packed2S" %}
aoqi@0 3449 ins_encode %{
aoqi@0 3450 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3451 %}
aoqi@0 3452 ins_pipe( pipe_slow );
aoqi@0 3453 %}
aoqi@0 3454
aoqi@0 3455 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{
aoqi@0 3456 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3457 match(Set dst (MulVS src1 src2));
aoqi@0 3458 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
aoqi@0 3459 ins_encode %{
aoqi@0 3460 bool vector256 = false;
aoqi@0 3461 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3462 %}
aoqi@0 3463 ins_pipe( pipe_slow );
aoqi@0 3464 %}
aoqi@0 3465
aoqi@0 3466 instruct vmul4S(vecD dst, vecD src) %{
aoqi@0 3467 predicate(n->as_Vector()->length() == 4);
aoqi@0 3468 match(Set dst (MulVS dst src));
aoqi@0 3469 format %{ "pmullw $dst,$src\t! mul packed4S" %}
aoqi@0 3470 ins_encode %{
aoqi@0 3471 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3472 %}
aoqi@0 3473 ins_pipe( pipe_slow );
aoqi@0 3474 %}
aoqi@0 3475
aoqi@0 3476 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 3477 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3478 match(Set dst (MulVS src1 src2));
aoqi@0 3479 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
aoqi@0 3480 ins_encode %{
aoqi@0 3481 bool vector256 = false;
aoqi@0 3482 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3483 %}
aoqi@0 3484 ins_pipe( pipe_slow );
aoqi@0 3485 %}
aoqi@0 3486
aoqi@0 3487 instruct vmul8S(vecX dst, vecX src) %{
aoqi@0 3488 predicate(n->as_Vector()->length() == 8);
aoqi@0 3489 match(Set dst (MulVS dst src));
aoqi@0 3490 format %{ "pmullw $dst,$src\t! mul packed8S" %}
aoqi@0 3491 ins_encode %{
aoqi@0 3492 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3493 %}
aoqi@0 3494 ins_pipe( pipe_slow );
aoqi@0 3495 %}
aoqi@0 3496
aoqi@0 3497 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3498 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 3499 match(Set dst (MulVS src1 src2));
aoqi@0 3500 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
aoqi@0 3501 ins_encode %{
aoqi@0 3502 bool vector256 = false;
aoqi@0 3503 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3504 %}
aoqi@0 3505 ins_pipe( pipe_slow );
aoqi@0 3506 %}
aoqi@0 3507
aoqi@0 3508 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3509 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 3510 match(Set dst (MulVS src (LoadVector mem)));
aoqi@0 3511 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
aoqi@0 3512 ins_encode %{
aoqi@0 3513 bool vector256 = false;
aoqi@0 3514 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3515 %}
aoqi@0 3516 ins_pipe( pipe_slow );
aoqi@0 3517 %}
aoqi@0 3518
aoqi@0 3519 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3520 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 3521 match(Set dst (MulVS src1 src2));
aoqi@0 3522 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
aoqi@0 3523 ins_encode %{
aoqi@0 3524 bool vector256 = true;
aoqi@0 3525 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3526 %}
aoqi@0 3527 ins_pipe( pipe_slow );
aoqi@0 3528 %}
aoqi@0 3529
aoqi@0 3530 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3531 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 3532 match(Set dst (MulVS src (LoadVector mem)));
aoqi@0 3533 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
aoqi@0 3534 ins_encode %{
aoqi@0 3535 bool vector256 = true;
aoqi@0 3536 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3537 %}
aoqi@0 3538 ins_pipe( pipe_slow );
aoqi@0 3539 %}
aoqi@0 3540
aoqi@0 3541 // Integers vector mul (sse4_1)
aoqi@0 3542 instruct vmul2I(vecD dst, vecD src) %{
aoqi@0 3543 predicate(UseSSE > 3 && n->as_Vector()->length() == 2);
aoqi@0 3544 match(Set dst (MulVI dst src));
aoqi@0 3545 format %{ "pmulld $dst,$src\t! mul packed2I" %}
aoqi@0 3546 ins_encode %{
aoqi@0 3547 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3548 %}
aoqi@0 3549 ins_pipe( pipe_slow );
aoqi@0 3550 %}
aoqi@0 3551
aoqi@0 3552 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 3553 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3554 match(Set dst (MulVI src1 src2));
aoqi@0 3555 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %}
aoqi@0 3556 ins_encode %{
aoqi@0 3557 bool vector256 = false;
aoqi@0 3558 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3559 %}
aoqi@0 3560 ins_pipe( pipe_slow );
aoqi@0 3561 %}
aoqi@0 3562
aoqi@0 3563 instruct vmul4I(vecX dst, vecX src) %{
aoqi@0 3564 predicate(UseSSE > 3 && n->as_Vector()->length() == 4);
aoqi@0 3565 match(Set dst (MulVI dst src));
aoqi@0 3566 format %{ "pmulld $dst,$src\t! mul packed4I" %}
aoqi@0 3567 ins_encode %{
aoqi@0 3568 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3569 %}
aoqi@0 3570 ins_pipe( pipe_slow );
aoqi@0 3571 %}
aoqi@0 3572
aoqi@0 3573 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3574 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3575 match(Set dst (MulVI src1 src2));
aoqi@0 3576 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %}
aoqi@0 3577 ins_encode %{
aoqi@0 3578 bool vector256 = false;
aoqi@0 3579 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3580 %}
aoqi@0 3581 ins_pipe( pipe_slow );
aoqi@0 3582 %}
aoqi@0 3583
aoqi@0 3584 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3585 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3586 match(Set dst (MulVI src (LoadVector mem)));
aoqi@0 3587 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %}
aoqi@0 3588 ins_encode %{
aoqi@0 3589 bool vector256 = false;
aoqi@0 3590 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3591 %}
aoqi@0 3592 ins_pipe( pipe_slow );
aoqi@0 3593 %}
aoqi@0 3594
aoqi@0 3595 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3596 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 3597 match(Set dst (MulVI src1 src2));
aoqi@0 3598 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %}
aoqi@0 3599 ins_encode %{
aoqi@0 3600 bool vector256 = true;
aoqi@0 3601 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3602 %}
aoqi@0 3603 ins_pipe( pipe_slow );
aoqi@0 3604 %}
aoqi@0 3605
aoqi@0 3606 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3607 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 3608 match(Set dst (MulVI src (LoadVector mem)));
aoqi@0 3609 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %}
aoqi@0 3610 ins_encode %{
aoqi@0 3611 bool vector256 = true;
aoqi@0 3612 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3613 %}
aoqi@0 3614 ins_pipe( pipe_slow );
aoqi@0 3615 %}
aoqi@0 3616
aoqi@0 3617 // Floats vector mul
aoqi@0 3618 instruct vmul2F(vecD dst, vecD src) %{
aoqi@0 3619 predicate(n->as_Vector()->length() == 2);
aoqi@0 3620 match(Set dst (MulVF dst src));
aoqi@0 3621 format %{ "mulps $dst,$src\t! mul packed2F" %}
aoqi@0 3622 ins_encode %{
aoqi@0 3623 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3624 %}
aoqi@0 3625 ins_pipe( pipe_slow );
aoqi@0 3626 %}
aoqi@0 3627
aoqi@0 3628 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 3629 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3630 match(Set dst (MulVF src1 src2));
aoqi@0 3631 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %}
aoqi@0 3632 ins_encode %{
aoqi@0 3633 bool vector256 = false;
aoqi@0 3634 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3635 %}
aoqi@0 3636 ins_pipe( pipe_slow );
aoqi@0 3637 %}
aoqi@0 3638
aoqi@0 3639 instruct vmul4F(vecX dst, vecX src) %{
aoqi@0 3640 predicate(n->as_Vector()->length() == 4);
aoqi@0 3641 match(Set dst (MulVF dst src));
aoqi@0 3642 format %{ "mulps $dst,$src\t! mul packed4F" %}
aoqi@0 3643 ins_encode %{
aoqi@0 3644 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3645 %}
aoqi@0 3646 ins_pipe( pipe_slow );
aoqi@0 3647 %}
aoqi@0 3648
aoqi@0 3649 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3650 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3651 match(Set dst (MulVF src1 src2));
aoqi@0 3652 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %}
aoqi@0 3653 ins_encode %{
aoqi@0 3654 bool vector256 = false;
aoqi@0 3655 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3656 %}
aoqi@0 3657 ins_pipe( pipe_slow );
aoqi@0 3658 %}
aoqi@0 3659
aoqi@0 3660 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3661 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3662 match(Set dst (MulVF src (LoadVector mem)));
aoqi@0 3663 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %}
aoqi@0 3664 ins_encode %{
aoqi@0 3665 bool vector256 = false;
aoqi@0 3666 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3667 %}
aoqi@0 3668 ins_pipe( pipe_slow );
aoqi@0 3669 %}
aoqi@0 3670
aoqi@0 3671 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3672 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 3673 match(Set dst (MulVF src1 src2));
aoqi@0 3674 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %}
aoqi@0 3675 ins_encode %{
aoqi@0 3676 bool vector256 = true;
aoqi@0 3677 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3678 %}
aoqi@0 3679 ins_pipe( pipe_slow );
aoqi@0 3680 %}
aoqi@0 3681
aoqi@0 3682 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3683 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 3684 match(Set dst (MulVF src (LoadVector mem)));
aoqi@0 3685 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %}
aoqi@0 3686 ins_encode %{
aoqi@0 3687 bool vector256 = true;
aoqi@0 3688 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3689 %}
aoqi@0 3690 ins_pipe( pipe_slow );
aoqi@0 3691 %}
aoqi@0 3692
aoqi@0 3693 // Doubles vector mul
aoqi@0 3694 instruct vmul2D(vecX dst, vecX src) %{
aoqi@0 3695 predicate(n->as_Vector()->length() == 2);
aoqi@0 3696 match(Set dst (MulVD dst src));
aoqi@0 3697 format %{ "mulpd $dst,$src\t! mul packed2D" %}
aoqi@0 3698 ins_encode %{
aoqi@0 3699 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3700 %}
aoqi@0 3701 ins_pipe( pipe_slow );
aoqi@0 3702 %}
aoqi@0 3703
aoqi@0 3704 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3705 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3706 match(Set dst (MulVD src1 src2));
aoqi@0 3707 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %}
aoqi@0 3708 ins_encode %{
aoqi@0 3709 bool vector256 = false;
aoqi@0 3710 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3711 %}
aoqi@0 3712 ins_pipe( pipe_slow );
aoqi@0 3713 %}
aoqi@0 3714
aoqi@0 3715 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3716 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3717 match(Set dst (MulVD src (LoadVector mem)));
aoqi@0 3718 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %}
aoqi@0 3719 ins_encode %{
aoqi@0 3720 bool vector256 = false;
aoqi@0 3721 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3722 %}
aoqi@0 3723 ins_pipe( pipe_slow );
aoqi@0 3724 %}
aoqi@0 3725
aoqi@0 3726 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3727 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3728 match(Set dst (MulVD src1 src2));
aoqi@0 3729 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %}
aoqi@0 3730 ins_encode %{
aoqi@0 3731 bool vector256 = true;
aoqi@0 3732 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3733 %}
aoqi@0 3734 ins_pipe( pipe_slow );
aoqi@0 3735 %}
aoqi@0 3736
aoqi@0 3737 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3738 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3739 match(Set dst (MulVD src (LoadVector mem)));
aoqi@0 3740 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %}
aoqi@0 3741 ins_encode %{
aoqi@0 3742 bool vector256 = true;
aoqi@0 3743 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3744 %}
aoqi@0 3745 ins_pipe( pipe_slow );
aoqi@0 3746 %}
aoqi@0 3747
aoqi@0 3748 // --------------------------------- DIV --------------------------------------
aoqi@0 3749
aoqi@0 3750 // Floats vector div
aoqi@0 3751 instruct vdiv2F(vecD dst, vecD src) %{
aoqi@0 3752 predicate(n->as_Vector()->length() == 2);
aoqi@0 3753 match(Set dst (DivVF dst src));
aoqi@0 3754 format %{ "divps $dst,$src\t! div packed2F" %}
aoqi@0 3755 ins_encode %{
aoqi@0 3756 __ divps($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3757 %}
aoqi@0 3758 ins_pipe( pipe_slow );
aoqi@0 3759 %}
aoqi@0 3760
aoqi@0 3761 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 3762 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3763 match(Set dst (DivVF src1 src2));
aoqi@0 3764 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %}
aoqi@0 3765 ins_encode %{
aoqi@0 3766 bool vector256 = false;
aoqi@0 3767 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3768 %}
aoqi@0 3769 ins_pipe( pipe_slow );
aoqi@0 3770 %}
aoqi@0 3771
aoqi@0 3772 instruct vdiv4F(vecX dst, vecX src) %{
aoqi@0 3773 predicate(n->as_Vector()->length() == 4);
aoqi@0 3774 match(Set dst (DivVF dst src));
aoqi@0 3775 format %{ "divps $dst,$src\t! div packed4F" %}
aoqi@0 3776 ins_encode %{
aoqi@0 3777 __ divps($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3778 %}
aoqi@0 3779 ins_pipe( pipe_slow );
aoqi@0 3780 %}
aoqi@0 3781
aoqi@0 3782 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3783 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3784 match(Set dst (DivVF src1 src2));
aoqi@0 3785 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %}
aoqi@0 3786 ins_encode %{
aoqi@0 3787 bool vector256 = false;
aoqi@0 3788 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3789 %}
aoqi@0 3790 ins_pipe( pipe_slow );
aoqi@0 3791 %}
aoqi@0 3792
aoqi@0 3793 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3794 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3795 match(Set dst (DivVF src (LoadVector mem)));
aoqi@0 3796 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %}
aoqi@0 3797 ins_encode %{
aoqi@0 3798 bool vector256 = false;
aoqi@0 3799 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3800 %}
aoqi@0 3801 ins_pipe( pipe_slow );
aoqi@0 3802 %}
aoqi@0 3803
aoqi@0 3804 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3805 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 3806 match(Set dst (DivVF src1 src2));
aoqi@0 3807 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %}
aoqi@0 3808 ins_encode %{
aoqi@0 3809 bool vector256 = true;
aoqi@0 3810 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3811 %}
aoqi@0 3812 ins_pipe( pipe_slow );
aoqi@0 3813 %}
aoqi@0 3814
aoqi@0 3815 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3816 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 3817 match(Set dst (DivVF src (LoadVector mem)));
aoqi@0 3818 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %}
aoqi@0 3819 ins_encode %{
aoqi@0 3820 bool vector256 = true;
aoqi@0 3821 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3822 %}
aoqi@0 3823 ins_pipe( pipe_slow );
aoqi@0 3824 %}
aoqi@0 3825
aoqi@0 3826 // Doubles vector div
aoqi@0 3827 instruct vdiv2D(vecX dst, vecX src) %{
aoqi@0 3828 predicate(n->as_Vector()->length() == 2);
aoqi@0 3829 match(Set dst (DivVD dst src));
aoqi@0 3830 format %{ "divpd $dst,$src\t! div packed2D" %}
aoqi@0 3831 ins_encode %{
aoqi@0 3832 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3833 %}
aoqi@0 3834 ins_pipe( pipe_slow );
aoqi@0 3835 %}
aoqi@0 3836
aoqi@0 3837 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3838 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3839 match(Set dst (DivVD src1 src2));
aoqi@0 3840 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %}
aoqi@0 3841 ins_encode %{
aoqi@0 3842 bool vector256 = false;
aoqi@0 3843 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3844 %}
aoqi@0 3845 ins_pipe( pipe_slow );
aoqi@0 3846 %}
aoqi@0 3847
aoqi@0 3848 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3849 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3850 match(Set dst (DivVD src (LoadVector mem)));
aoqi@0 3851 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %}
aoqi@0 3852 ins_encode %{
aoqi@0 3853 bool vector256 = false;
aoqi@0 3854 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3855 %}
aoqi@0 3856 ins_pipe( pipe_slow );
aoqi@0 3857 %}
aoqi@0 3858
aoqi@0 3859 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3860 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3861 match(Set dst (DivVD src1 src2));
aoqi@0 3862 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %}
aoqi@0 3863 ins_encode %{
aoqi@0 3864 bool vector256 = true;
aoqi@0 3865 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3866 %}
aoqi@0 3867 ins_pipe( pipe_slow );
aoqi@0 3868 %}
aoqi@0 3869
aoqi@0 3870 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3871 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3872 match(Set dst (DivVD src (LoadVector mem)));
aoqi@0 3873 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %}
aoqi@0 3874 ins_encode %{
aoqi@0 3875 bool vector256 = true;
aoqi@0 3876 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3877 %}
aoqi@0 3878 ins_pipe( pipe_slow );
aoqi@0 3879 %}
aoqi@0 3880
aoqi@0 3881 // ------------------------------ Shift ---------------------------------------
aoqi@0 3882
aoqi@0 3883 // Left and right shift count vectors are the same on x86
aoqi@0 3884 // (only lowest bits of xmm reg are used for count).
aoqi@0 3885 instruct vshiftcnt(vecS dst, rRegI cnt) %{
aoqi@0 3886 match(Set dst (LShiftCntV cnt));
aoqi@0 3887 match(Set dst (RShiftCntV cnt));
aoqi@0 3888 format %{ "movd $dst,$cnt\t! load shift count" %}
aoqi@0 3889 ins_encode %{
aoqi@0 3890 __ movdl($dst$$XMMRegister, $cnt$$Register);
aoqi@0 3891 %}
aoqi@0 3892 ins_pipe( pipe_slow );
aoqi@0 3893 %}
aoqi@0 3894
aoqi@0 3895 // ------------------------------ LeftShift -----------------------------------
aoqi@0 3896
aoqi@0 3897 // Shorts/Chars vector left shift
aoqi@0 3898 instruct vsll2S(vecS dst, vecS shift) %{
aoqi@0 3899 predicate(n->as_Vector()->length() == 2);
aoqi@0 3900 match(Set dst (LShiftVS dst shift));
aoqi@0 3901 format %{ "psllw $dst,$shift\t! left shift packed2S" %}
aoqi@0 3902 ins_encode %{
aoqi@0 3903 __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 3904 %}
aoqi@0 3905 ins_pipe( pipe_slow );
aoqi@0 3906 %}
aoqi@0 3907
aoqi@0 3908 instruct vsll2S_imm(vecS dst, immI8 shift) %{
aoqi@0 3909 predicate(n->as_Vector()->length() == 2);
aoqi@0 3910 match(Set dst (LShiftVS dst shift));
aoqi@0 3911 format %{ "psllw $dst,$shift\t! left shift packed2S" %}
aoqi@0 3912 ins_encode %{
aoqi@0 3913 __ psllw($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 3914 %}
aoqi@0 3915 ins_pipe( pipe_slow );
aoqi@0 3916 %}
aoqi@0 3917
aoqi@0 3918 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{
aoqi@0 3919 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3920 match(Set dst (LShiftVS src shift));
aoqi@0 3921 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
aoqi@0 3922 ins_encode %{
aoqi@0 3923 bool vector256 = false;
aoqi@0 3924 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 3925 %}
aoqi@0 3926 ins_pipe( pipe_slow );
aoqi@0 3927 %}
aoqi@0 3928
aoqi@0 3929 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
aoqi@0 3930 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3931 match(Set dst (LShiftVS src shift));
aoqi@0 3932 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
aoqi@0 3933 ins_encode %{
aoqi@0 3934 bool vector256 = false;
aoqi@0 3935 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 3936 %}
aoqi@0 3937 ins_pipe( pipe_slow );
aoqi@0 3938 %}
aoqi@0 3939
aoqi@0 3940 instruct vsll4S(vecD dst, vecS shift) %{
aoqi@0 3941 predicate(n->as_Vector()->length() == 4);
aoqi@0 3942 match(Set dst (LShiftVS dst shift));
aoqi@0 3943 format %{ "psllw $dst,$shift\t! left shift packed4S" %}
aoqi@0 3944 ins_encode %{
aoqi@0 3945 __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 3946 %}
aoqi@0 3947 ins_pipe( pipe_slow );
aoqi@0 3948 %}
aoqi@0 3949
aoqi@0 3950 instruct vsll4S_imm(vecD dst, immI8 shift) %{
aoqi@0 3951 predicate(n->as_Vector()->length() == 4);
aoqi@0 3952 match(Set dst (LShiftVS dst shift));
aoqi@0 3953 format %{ "psllw $dst,$shift\t! left shift packed4S" %}
aoqi@0 3954 ins_encode %{
aoqi@0 3955 __ psllw($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 3956 %}
aoqi@0 3957 ins_pipe( pipe_slow );
aoqi@0 3958 %}
aoqi@0 3959
aoqi@0 3960 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{
aoqi@0 3961 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3962 match(Set dst (LShiftVS src shift));
aoqi@0 3963 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
aoqi@0 3964 ins_encode %{
aoqi@0 3965 bool vector256 = false;
aoqi@0 3966 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 3967 %}
aoqi@0 3968 ins_pipe( pipe_slow );
aoqi@0 3969 %}
aoqi@0 3970
aoqi@0 3971 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
aoqi@0 3972 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3973 match(Set dst (LShiftVS src shift));
aoqi@0 3974 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
aoqi@0 3975 ins_encode %{
aoqi@0 3976 bool vector256 = false;
aoqi@0 3977 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 3978 %}
aoqi@0 3979 ins_pipe( pipe_slow );
aoqi@0 3980 %}
aoqi@0 3981
aoqi@0 3982 instruct vsll8S(vecX dst, vecS shift) %{
aoqi@0 3983 predicate(n->as_Vector()->length() == 8);
aoqi@0 3984 match(Set dst (LShiftVS dst shift));
aoqi@0 3985 format %{ "psllw $dst,$shift\t! left shift packed8S" %}
aoqi@0 3986 ins_encode %{
aoqi@0 3987 __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 3988 %}
aoqi@0 3989 ins_pipe( pipe_slow );
aoqi@0 3990 %}
aoqi@0 3991
aoqi@0 3992 instruct vsll8S_imm(vecX dst, immI8 shift) %{
aoqi@0 3993 predicate(n->as_Vector()->length() == 8);
aoqi@0 3994 match(Set dst (LShiftVS dst shift));
aoqi@0 3995 format %{ "psllw $dst,$shift\t! left shift packed8S" %}
aoqi@0 3996 ins_encode %{
aoqi@0 3997 __ psllw($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 3998 %}
aoqi@0 3999 ins_pipe( pipe_slow );
aoqi@0 4000 %}
aoqi@0 4001
aoqi@0 4002 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{
aoqi@0 4003 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 4004 match(Set dst (LShiftVS src shift));
aoqi@0 4005 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
aoqi@0 4006 ins_encode %{
aoqi@0 4007 bool vector256 = false;
aoqi@0 4008 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4009 %}
aoqi@0 4010 ins_pipe( pipe_slow );
aoqi@0 4011 %}
aoqi@0 4012
aoqi@0 4013 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
aoqi@0 4014 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 4015 match(Set dst (LShiftVS src shift));
aoqi@0 4016 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
aoqi@0 4017 ins_encode %{
aoqi@0 4018 bool vector256 = false;
aoqi@0 4019 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4020 %}
aoqi@0 4021 ins_pipe( pipe_slow );
aoqi@0 4022 %}
aoqi@0 4023
aoqi@0 4024 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{
aoqi@0 4025 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 4026 match(Set dst (LShiftVS src shift));
aoqi@0 4027 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
aoqi@0 4028 ins_encode %{
aoqi@0 4029 bool vector256 = true;
aoqi@0 4030 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4031 %}
aoqi@0 4032 ins_pipe( pipe_slow );
aoqi@0 4033 %}
aoqi@0 4034
aoqi@0 4035 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
aoqi@0 4036 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 4037 match(Set dst (LShiftVS src shift));
aoqi@0 4038 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
aoqi@0 4039 ins_encode %{
aoqi@0 4040 bool vector256 = true;
aoqi@0 4041 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4042 %}
aoqi@0 4043 ins_pipe( pipe_slow );
aoqi@0 4044 %}
aoqi@0 4045
aoqi@0 4046 // Integers vector left shift
aoqi@0 4047 instruct vsll2I(vecD dst, vecS shift) %{
aoqi@0 4048 predicate(n->as_Vector()->length() == 2);
aoqi@0 4049 match(Set dst (LShiftVI dst shift));
aoqi@0 4050 format %{ "pslld $dst,$shift\t! left shift packed2I" %}
aoqi@0 4051 ins_encode %{
aoqi@0 4052 __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4053 %}
aoqi@0 4054 ins_pipe( pipe_slow );
aoqi@0 4055 %}
aoqi@0 4056
aoqi@0 4057 instruct vsll2I_imm(vecD dst, immI8 shift) %{
aoqi@0 4058 predicate(n->as_Vector()->length() == 2);
aoqi@0 4059 match(Set dst (LShiftVI dst shift));
aoqi@0 4060 format %{ "pslld $dst,$shift\t! left shift packed2I" %}
aoqi@0 4061 ins_encode %{
aoqi@0 4062 __ pslld($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4063 %}
aoqi@0 4064 ins_pipe( pipe_slow );
aoqi@0 4065 %}
aoqi@0 4066
aoqi@0 4067 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{
aoqi@0 4068 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4069 match(Set dst (LShiftVI src shift));
aoqi@0 4070 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %}
aoqi@0 4071 ins_encode %{
aoqi@0 4072 bool vector256 = false;
aoqi@0 4073 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4074 %}
aoqi@0 4075 ins_pipe( pipe_slow );
aoqi@0 4076 %}
aoqi@0 4077
aoqi@0 4078 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
aoqi@0 4079 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4080 match(Set dst (LShiftVI src shift));
aoqi@0 4081 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %}
aoqi@0 4082 ins_encode %{
aoqi@0 4083 bool vector256 = false;
aoqi@0 4084 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4085 %}
aoqi@0 4086 ins_pipe( pipe_slow );
aoqi@0 4087 %}
aoqi@0 4088
aoqi@0 4089 instruct vsll4I(vecX dst, vecS shift) %{
aoqi@0 4090 predicate(n->as_Vector()->length() == 4);
aoqi@0 4091 match(Set dst (LShiftVI dst shift));
aoqi@0 4092 format %{ "pslld $dst,$shift\t! left shift packed4I" %}
aoqi@0 4093 ins_encode %{
aoqi@0 4094 __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4095 %}
aoqi@0 4096 ins_pipe( pipe_slow );
aoqi@0 4097 %}
aoqi@0 4098
aoqi@0 4099 instruct vsll4I_imm(vecX dst, immI8 shift) %{
aoqi@0 4100 predicate(n->as_Vector()->length() == 4);
aoqi@0 4101 match(Set dst (LShiftVI dst shift));
aoqi@0 4102 format %{ "pslld $dst,$shift\t! left shift packed4I" %}
aoqi@0 4103 ins_encode %{
aoqi@0 4104 __ pslld($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4105 %}
aoqi@0 4106 ins_pipe( pipe_slow );
aoqi@0 4107 %}
aoqi@0 4108
aoqi@0 4109 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{
aoqi@0 4110 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 4111 match(Set dst (LShiftVI src shift));
aoqi@0 4112 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %}
aoqi@0 4113 ins_encode %{
aoqi@0 4114 bool vector256 = false;
aoqi@0 4115 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4116 %}
aoqi@0 4117 ins_pipe( pipe_slow );
aoqi@0 4118 %}
aoqi@0 4119
aoqi@0 4120 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
aoqi@0 4121 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 4122 match(Set dst (LShiftVI src shift));
aoqi@0 4123 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %}
aoqi@0 4124 ins_encode %{
aoqi@0 4125 bool vector256 = false;
aoqi@0 4126 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4127 %}
aoqi@0 4128 ins_pipe( pipe_slow );
aoqi@0 4129 %}
aoqi@0 4130
aoqi@0 4131 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{
aoqi@0 4132 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 4133 match(Set dst (LShiftVI src shift));
aoqi@0 4134 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %}
aoqi@0 4135 ins_encode %{
aoqi@0 4136 bool vector256 = true;
aoqi@0 4137 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4138 %}
aoqi@0 4139 ins_pipe( pipe_slow );
aoqi@0 4140 %}
aoqi@0 4141
aoqi@0 4142 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
aoqi@0 4143 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 4144 match(Set dst (LShiftVI src shift));
aoqi@0 4145 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %}
aoqi@0 4146 ins_encode %{
aoqi@0 4147 bool vector256 = true;
aoqi@0 4148 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4149 %}
aoqi@0 4150 ins_pipe( pipe_slow );
aoqi@0 4151 %}
aoqi@0 4152
aoqi@0 4153 // Longs vector left shift
aoqi@0 4154 instruct vsll2L(vecX dst, vecS shift) %{
aoqi@0 4155 predicate(n->as_Vector()->length() == 2);
aoqi@0 4156 match(Set dst (LShiftVL dst shift));
aoqi@0 4157 format %{ "psllq $dst,$shift\t! left shift packed2L" %}
aoqi@0 4158 ins_encode %{
aoqi@0 4159 __ psllq($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4160 %}
aoqi@0 4161 ins_pipe( pipe_slow );
aoqi@0 4162 %}
aoqi@0 4163
aoqi@0 4164 instruct vsll2L_imm(vecX dst, immI8 shift) %{
aoqi@0 4165 predicate(n->as_Vector()->length() == 2);
aoqi@0 4166 match(Set dst (LShiftVL dst shift));
aoqi@0 4167 format %{ "psllq $dst,$shift\t! left shift packed2L" %}
aoqi@0 4168 ins_encode %{
aoqi@0 4169 __ psllq($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4170 %}
aoqi@0 4171 ins_pipe( pipe_slow );
aoqi@0 4172 %}
aoqi@0 4173
aoqi@0 4174 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{
aoqi@0 4175 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4176 match(Set dst (LShiftVL src shift));
aoqi@0 4177 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %}
aoqi@0 4178 ins_encode %{
aoqi@0 4179 bool vector256 = false;
aoqi@0 4180 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4181 %}
aoqi@0 4182 ins_pipe( pipe_slow );
aoqi@0 4183 %}
aoqi@0 4184
aoqi@0 4185 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
aoqi@0 4186 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4187 match(Set dst (LShiftVL src shift));
aoqi@0 4188 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %}
aoqi@0 4189 ins_encode %{
aoqi@0 4190 bool vector256 = false;
aoqi@0 4191 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4192 %}
aoqi@0 4193 ins_pipe( pipe_slow );
aoqi@0 4194 %}
aoqi@0 4195
aoqi@0 4196 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{
aoqi@0 4197 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
aoqi@0 4198 match(Set dst (LShiftVL src shift));
aoqi@0 4199 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %}
aoqi@0 4200 ins_encode %{
aoqi@0 4201 bool vector256 = true;
aoqi@0 4202 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4203 %}
aoqi@0 4204 ins_pipe( pipe_slow );
aoqi@0 4205 %}
aoqi@0 4206
aoqi@0 4207 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
aoqi@0 4208 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
aoqi@0 4209 match(Set dst (LShiftVL src shift));
aoqi@0 4210 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %}
aoqi@0 4211 ins_encode %{
aoqi@0 4212 bool vector256 = true;
aoqi@0 4213 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4214 %}
aoqi@0 4215 ins_pipe( pipe_slow );
aoqi@0 4216 %}
aoqi@0 4217
aoqi@0 4218 // ----------------------- LogicalRightShift -----------------------------------
aoqi@0 4219
aoqi@0 4220 // Shorts vector logical right shift produces incorrect Java result
aoqi@0 4221 // for negative data because java code convert short value into int with
aoqi@0 4222 // sign extension before a shift. But char vectors are fine since chars are
aoqi@0 4223 // unsigned values.
aoqi@0 4224
aoqi@0 4225 instruct vsrl2S(vecS dst, vecS shift) %{
aoqi@0 4226 predicate(n->as_Vector()->length() == 2);
aoqi@0 4227 match(Set dst (URShiftVS dst shift));
aoqi@0 4228 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %}
aoqi@0 4229 ins_encode %{
aoqi@0 4230 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4231 %}
aoqi@0 4232 ins_pipe( pipe_slow );
aoqi@0 4233 %}
aoqi@0 4234
aoqi@0 4235 instruct vsrl2S_imm(vecS dst, immI8 shift) %{
aoqi@0 4236 predicate(n->as_Vector()->length() == 2);
aoqi@0 4237 match(Set dst (URShiftVS dst shift));
aoqi@0 4238 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %}
aoqi@0 4239 ins_encode %{
aoqi@0 4240 __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4241 %}
aoqi@0 4242 ins_pipe( pipe_slow );
aoqi@0 4243 %}
aoqi@0 4244
aoqi@0 4245 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{
aoqi@0 4246 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4247 match(Set dst (URShiftVS src shift));
aoqi@0 4248 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
aoqi@0 4249 ins_encode %{
aoqi@0 4250 bool vector256 = false;
aoqi@0 4251 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4252 %}
aoqi@0 4253 ins_pipe( pipe_slow );
aoqi@0 4254 %}
aoqi@0 4255
aoqi@0 4256 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
aoqi@0 4257 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4258 match(Set dst (URShiftVS src shift));
aoqi@0 4259 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
aoqi@0 4260 ins_encode %{
aoqi@0 4261 bool vector256 = false;
aoqi@0 4262 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4263 %}
aoqi@0 4264 ins_pipe( pipe_slow );
aoqi@0 4265 %}
aoqi@0 4266
aoqi@0 4267 instruct vsrl4S(vecD dst, vecS shift) %{
aoqi@0 4268 predicate(n->as_Vector()->length() == 4);
aoqi@0 4269 match(Set dst (URShiftVS dst shift));
aoqi@0 4270 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %}
aoqi@0 4271 ins_encode %{
aoqi@0 4272 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4273 %}
aoqi@0 4274 ins_pipe( pipe_slow );
aoqi@0 4275 %}
aoqi@0 4276
aoqi@0 4277 instruct vsrl4S_imm(vecD dst, immI8 shift) %{
aoqi@0 4278 predicate(n->as_Vector()->length() == 4);
aoqi@0 4279 match(Set dst (URShiftVS dst shift));
aoqi@0 4280 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %}
aoqi@0 4281 ins_encode %{
aoqi@0 4282 __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4283 %}
aoqi@0 4284 ins_pipe( pipe_slow );
aoqi@0 4285 %}
aoqi@0 4286
aoqi@0 4287 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{
aoqi@0 4288 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 4289 match(Set dst (URShiftVS src shift));
aoqi@0 4290 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
aoqi@0 4291 ins_encode %{
aoqi@0 4292 bool vector256 = false;
aoqi@0 4293 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4294 %}
aoqi@0 4295 ins_pipe( pipe_slow );
aoqi@0 4296 %}
aoqi@0 4297
aoqi@0 4298 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
aoqi@0 4299 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 4300 match(Set dst (URShiftVS src shift));
aoqi@0 4301 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
aoqi@0 4302 ins_encode %{
aoqi@0 4303 bool vector256 = false;
aoqi@0 4304 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4305 %}
aoqi@0 4306 ins_pipe( pipe_slow );
aoqi@0 4307 %}
aoqi@0 4308
aoqi@0 4309 instruct vsrl8S(vecX dst, vecS shift) %{
aoqi@0 4310 predicate(n->as_Vector()->length() == 8);
aoqi@0 4311 match(Set dst (URShiftVS dst shift));
aoqi@0 4312 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %}
aoqi@0 4313 ins_encode %{
aoqi@0 4314 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4315 %}
aoqi@0 4316 ins_pipe( pipe_slow );
aoqi@0 4317 %}
aoqi@0 4318
aoqi@0 4319 instruct vsrl8S_imm(vecX dst, immI8 shift) %{
aoqi@0 4320 predicate(n->as_Vector()->length() == 8);
aoqi@0 4321 match(Set dst (URShiftVS dst shift));
aoqi@0 4322 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %}
aoqi@0 4323 ins_encode %{
aoqi@0 4324 __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4325 %}
aoqi@0 4326 ins_pipe( pipe_slow );
aoqi@0 4327 %}
aoqi@0 4328
aoqi@0 4329 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{
aoqi@0 4330 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 4331 match(Set dst (URShiftVS src shift));
aoqi@0 4332 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
aoqi@0 4333 ins_encode %{
aoqi@0 4334 bool vector256 = false;
aoqi@0 4335 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4336 %}
aoqi@0 4337 ins_pipe( pipe_slow );
aoqi@0 4338 %}
aoqi@0 4339
aoqi@0 4340 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
aoqi@0 4341 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 4342 match(Set dst (URShiftVS src shift));
aoqi@0 4343 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
aoqi@0 4344 ins_encode %{
aoqi@0 4345 bool vector256 = false;
aoqi@0 4346 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4347 %}
aoqi@0 4348 ins_pipe( pipe_slow );
aoqi@0 4349 %}
aoqi@0 4350
aoqi@0 4351 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{
aoqi@0 4352 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 4353 match(Set dst (URShiftVS src shift));
aoqi@0 4354 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
aoqi@0 4355 ins_encode %{
aoqi@0 4356 bool vector256 = true;
aoqi@0 4357 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4358 %}
aoqi@0 4359 ins_pipe( pipe_slow );
aoqi@0 4360 %}
aoqi@0 4361
aoqi@0 4362 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
aoqi@0 4363 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 4364 match(Set dst (URShiftVS src shift));
aoqi@0 4365 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
aoqi@0 4366 ins_encode %{
aoqi@0 4367 bool vector256 = true;
aoqi@0 4368 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4369 %}
aoqi@0 4370 ins_pipe( pipe_slow );
aoqi@0 4371 %}
aoqi@0 4372
aoqi@0 4373 // Integers vector logical right shift
aoqi@0 4374 instruct vsrl2I(vecD dst, vecS shift) %{
aoqi@0 4375 predicate(n->as_Vector()->length() == 2);
aoqi@0 4376 match(Set dst (URShiftVI dst shift));
aoqi@0 4377 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %}
aoqi@0 4378 ins_encode %{
aoqi@0 4379 __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4380 %}
aoqi@0 4381 ins_pipe( pipe_slow );
aoqi@0 4382 %}
aoqi@0 4383
aoqi@0 4384 instruct vsrl2I_imm(vecD dst, immI8 shift) %{
aoqi@0 4385 predicate(n->as_Vector()->length() == 2);
aoqi@0 4386 match(Set dst (URShiftVI dst shift));
aoqi@0 4387 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %}
aoqi@0 4388 ins_encode %{
aoqi@0 4389 __ psrld($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4390 %}
aoqi@0 4391 ins_pipe( pipe_slow );
aoqi@0 4392 %}
aoqi@0 4393
aoqi@0 4394 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{
aoqi@0 4395 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4396 match(Set dst (URShiftVI src shift));
aoqi@0 4397 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %}
aoqi@0 4398 ins_encode %{
aoqi@0 4399 bool vector256 = false;
aoqi@0 4400 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4401 %}
aoqi@0 4402 ins_pipe( pipe_slow );
aoqi@0 4403 %}
aoqi@0 4404
aoqi@0 4405 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
aoqi@0 4406 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4407 match(Set dst (URShiftVI src shift));
aoqi@0 4408 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %}
aoqi@0 4409 ins_encode %{
aoqi@0 4410 bool vector256 = false;
aoqi@0 4411 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4412 %}
aoqi@0 4413 ins_pipe( pipe_slow );
aoqi@0 4414 %}
aoqi@0 4415
aoqi@0 4416 instruct vsrl4I(vecX dst, vecS shift) %{
aoqi@0 4417 predicate(n->as_Vector()->length() == 4);
aoqi@0 4418 match(Set dst (URShiftVI dst shift));
aoqi@0 4419 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %}
aoqi@0 4420 ins_encode %{
aoqi@0 4421 __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4422 %}
aoqi@0 4423 ins_pipe( pipe_slow );
aoqi@0 4424 %}
aoqi@0 4425
aoqi@0 4426 instruct vsrl4I_imm(vecX dst, immI8 shift) %{
aoqi@0 4427 predicate(n->as_Vector()->length() == 4);
aoqi@0 4428 match(Set dst (URShiftVI dst shift));
aoqi@0 4429 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %}
aoqi@0 4430 ins_encode %{
aoqi@0 4431 __ psrld($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4432 %}
aoqi@0 4433 ins_pipe( pipe_slow );
aoqi@0 4434 %}
aoqi@0 4435
aoqi@0 4436 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{
aoqi@0 4437 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 4438 match(Set dst (URShiftVI src shift));
aoqi@0 4439 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %}
aoqi@0 4440 ins_encode %{
aoqi@0 4441 bool vector256 = false;
aoqi@0 4442 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4443 %}
aoqi@0 4444 ins_pipe( pipe_slow );
aoqi@0 4445 %}
aoqi@0 4446
aoqi@0 4447 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
aoqi@0 4448 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 4449 match(Set dst (URShiftVI src shift));
aoqi@0 4450 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %}
aoqi@0 4451 ins_encode %{
aoqi@0 4452 bool vector256 = false;
aoqi@0 4453 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4454 %}
aoqi@0 4455 ins_pipe( pipe_slow );
aoqi@0 4456 %}
aoqi@0 4457
aoqi@0 4458 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{
aoqi@0 4459 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 4460 match(Set dst (URShiftVI src shift));
aoqi@0 4461 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %}
aoqi@0 4462 ins_encode %{
aoqi@0 4463 bool vector256 = true;
aoqi@0 4464 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4465 %}
aoqi@0 4466 ins_pipe( pipe_slow );
aoqi@0 4467 %}
aoqi@0 4468
aoqi@0 4469 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
aoqi@0 4470 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 4471 match(Set dst (URShiftVI src shift));
aoqi@0 4472 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %}
aoqi@0 4473 ins_encode %{
aoqi@0 4474 bool vector256 = true;
aoqi@0 4475 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4476 %}
aoqi@0 4477 ins_pipe( pipe_slow );
aoqi@0 4478 %}
aoqi@0 4479
aoqi@0 4480 // Longs vector logical right shift
aoqi@0 4481 instruct vsrl2L(vecX dst, vecS shift) %{
aoqi@0 4482 predicate(n->as_Vector()->length() == 2);
aoqi@0 4483 match(Set dst (URShiftVL dst shift));
aoqi@0 4484 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %}
aoqi@0 4485 ins_encode %{
aoqi@0 4486 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4487 %}
aoqi@0 4488 ins_pipe( pipe_slow );
aoqi@0 4489 %}
aoqi@0 4490
aoqi@0 4491 instruct vsrl2L_imm(vecX dst, immI8 shift) %{
aoqi@0 4492 predicate(n->as_Vector()->length() == 2);
aoqi@0 4493 match(Set dst (URShiftVL dst shift));
aoqi@0 4494 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %}
aoqi@0 4495 ins_encode %{
aoqi@0 4496 __ psrlq($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4497 %}
aoqi@0 4498 ins_pipe( pipe_slow );
aoqi@0 4499 %}
aoqi@0 4500
aoqi@0 4501 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{
aoqi@0 4502 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4503 match(Set dst (URShiftVL src shift));
aoqi@0 4504 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %}
aoqi@0 4505 ins_encode %{
aoqi@0 4506 bool vector256 = false;
aoqi@0 4507 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4508 %}
aoqi@0 4509 ins_pipe( pipe_slow );
aoqi@0 4510 %}
aoqi@0 4511
aoqi@0 4512 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
aoqi@0 4513 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4514 match(Set dst (URShiftVL src shift));
aoqi@0 4515 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %}
aoqi@0 4516 ins_encode %{
aoqi@0 4517 bool vector256 = false;
aoqi@0 4518 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4519 %}
aoqi@0 4520 ins_pipe( pipe_slow );
aoqi@0 4521 %}
aoqi@0 4522
aoqi@0 4523 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{
aoqi@0 4524 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
aoqi@0 4525 match(Set dst (URShiftVL src shift));
aoqi@0 4526 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %}
aoqi@0 4527 ins_encode %{
aoqi@0 4528 bool vector256 = true;
aoqi@0 4529 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4530 %}
aoqi@0 4531 ins_pipe( pipe_slow );
aoqi@0 4532 %}
aoqi@0 4533
aoqi@0 4534 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
aoqi@0 4535 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
aoqi@0 4536 match(Set dst (URShiftVL src shift));
aoqi@0 4537 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %}
aoqi@0 4538 ins_encode %{
aoqi@0 4539 bool vector256 = true;
aoqi@0 4540 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4541 %}
aoqi@0 4542 ins_pipe( pipe_slow );
aoqi@0 4543 %}
aoqi@0 4544
aoqi@0 4545 // ------------------- ArithmeticRightShift -----------------------------------
aoqi@0 4546
aoqi@0 4547 // Shorts/Chars vector arithmetic right shift
aoqi@0 4548 instruct vsra2S(vecS dst, vecS shift) %{
aoqi@0 4549 predicate(n->as_Vector()->length() == 2);
aoqi@0 4550 match(Set dst (RShiftVS dst shift));
aoqi@0 4551 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %}
aoqi@0 4552 ins_encode %{
aoqi@0 4553 __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4554 %}
aoqi@0 4555 ins_pipe( pipe_slow );
aoqi@0 4556 %}
aoqi@0 4557
aoqi@0 4558 instruct vsra2S_imm(vecS dst, immI8 shift) %{
aoqi@0 4559 predicate(n->as_Vector()->length() == 2);
aoqi@0 4560 match(Set dst (RShiftVS dst shift));
aoqi@0 4561 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %}
aoqi@0 4562 ins_encode %{
aoqi@0 4563 __ psraw($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4564 %}
aoqi@0 4565 ins_pipe( pipe_slow );
aoqi@0 4566 %}
aoqi@0 4567
aoqi@0 4568 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{
aoqi@0 4569 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4570 match(Set dst (RShiftVS src shift));
aoqi@0 4571 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
aoqi@0 4572 ins_encode %{
aoqi@0 4573 bool vector256 = false;
aoqi@0 4574 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4575 %}
aoqi@0 4576 ins_pipe( pipe_slow );
aoqi@0 4577 %}
aoqi@0 4578
aoqi@0 4579 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
aoqi@0 4580 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4581 match(Set dst (RShiftVS src shift));
aoqi@0 4582 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
aoqi@0 4583 ins_encode %{
aoqi@0 4584 bool vector256 = false;
aoqi@0 4585 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4586 %}
aoqi@0 4587 ins_pipe( pipe_slow );
aoqi@0 4588 %}
aoqi@0 4589
aoqi@0 4590 instruct vsra4S(vecD dst, vecS shift) %{
aoqi@0 4591 predicate(n->as_Vector()->length() == 4);
aoqi@0 4592 match(Set dst (RShiftVS dst shift));
aoqi@0 4593 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %}
aoqi@0 4594 ins_encode %{
aoqi@0 4595 __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4596 %}
aoqi@0 4597 ins_pipe( pipe_slow );
aoqi@0 4598 %}
aoqi@0 4599
aoqi@0 4600 instruct vsra4S_imm(vecD dst, immI8 shift) %{
aoqi@0 4601 predicate(n->as_Vector()->length() == 4);
aoqi@0 4602 match(Set dst (RShiftVS dst shift));
aoqi@0 4603 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %}
aoqi@0 4604 ins_encode %{
aoqi@0 4605 __ psraw($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4606 %}
aoqi@0 4607 ins_pipe( pipe_slow );
aoqi@0 4608 %}
aoqi@0 4609
aoqi@0 4610 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{
aoqi@0 4611 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 4612 match(Set dst (RShiftVS src shift));
aoqi@0 4613 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
aoqi@0 4614 ins_encode %{
aoqi@0 4615 bool vector256 = false;
aoqi@0 4616 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4617 %}
aoqi@0 4618 ins_pipe( pipe_slow );
aoqi@0 4619 %}
aoqi@0 4620
aoqi@0 4621 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
aoqi@0 4622 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 4623 match(Set dst (RShiftVS src shift));
aoqi@0 4624 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
aoqi@0 4625 ins_encode %{
aoqi@0 4626 bool vector256 = false;
aoqi@0 4627 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4628 %}
aoqi@0 4629 ins_pipe( pipe_slow );
aoqi@0 4630 %}
aoqi@0 4631
aoqi@0 4632 instruct vsra8S(vecX dst, vecS shift) %{
aoqi@0 4633 predicate(n->as_Vector()->length() == 8);
aoqi@0 4634 match(Set dst (RShiftVS dst shift));
aoqi@0 4635 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %}
aoqi@0 4636 ins_encode %{
aoqi@0 4637 __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4638 %}
aoqi@0 4639 ins_pipe( pipe_slow );
aoqi@0 4640 %}
aoqi@0 4641
aoqi@0 4642 instruct vsra8S_imm(vecX dst, immI8 shift) %{
aoqi@0 4643 predicate(n->as_Vector()->length() == 8);
aoqi@0 4644 match(Set dst (RShiftVS dst shift));
aoqi@0 4645 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %}
aoqi@0 4646 ins_encode %{
aoqi@0 4647 __ psraw($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4648 %}
aoqi@0 4649 ins_pipe( pipe_slow );
aoqi@0 4650 %}
aoqi@0 4651
aoqi@0 4652 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{
aoqi@0 4653 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 4654 match(Set dst (RShiftVS src shift));
aoqi@0 4655 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
aoqi@0 4656 ins_encode %{
aoqi@0 4657 bool vector256 = false;
aoqi@0 4658 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4659 %}
aoqi@0 4660 ins_pipe( pipe_slow );
aoqi@0 4661 %}
aoqi@0 4662
aoqi@0 4663 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
aoqi@0 4664 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 4665 match(Set dst (RShiftVS src shift));
aoqi@0 4666 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
aoqi@0 4667 ins_encode %{
aoqi@0 4668 bool vector256 = false;
aoqi@0 4669 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4670 %}
aoqi@0 4671 ins_pipe( pipe_slow );
aoqi@0 4672 %}
aoqi@0 4673
aoqi@0 4674 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{
aoqi@0 4675 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 4676 match(Set dst (RShiftVS src shift));
aoqi@0 4677 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
aoqi@0 4678 ins_encode %{
aoqi@0 4679 bool vector256 = true;
aoqi@0 4680 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4681 %}
aoqi@0 4682 ins_pipe( pipe_slow );
aoqi@0 4683 %}
aoqi@0 4684
aoqi@0 4685 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
aoqi@0 4686 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 4687 match(Set dst (RShiftVS src shift));
aoqi@0 4688 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
aoqi@0 4689 ins_encode %{
aoqi@0 4690 bool vector256 = true;
aoqi@0 4691 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4692 %}
aoqi@0 4693 ins_pipe( pipe_slow );
aoqi@0 4694 %}
aoqi@0 4695
aoqi@0 4696 // Integers vector arithmetic right shift
aoqi@0 4697 instruct vsra2I(vecD dst, vecS shift) %{
aoqi@0 4698 predicate(n->as_Vector()->length() == 2);
aoqi@0 4699 match(Set dst (RShiftVI dst shift));
aoqi@0 4700 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %}
aoqi@0 4701 ins_encode %{
aoqi@0 4702 __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4703 %}
aoqi@0 4704 ins_pipe( pipe_slow );
aoqi@0 4705 %}
aoqi@0 4706
aoqi@0 4707 instruct vsra2I_imm(vecD dst, immI8 shift) %{
aoqi@0 4708 predicate(n->as_Vector()->length() == 2);
aoqi@0 4709 match(Set dst (RShiftVI dst shift));
aoqi@0 4710 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %}
aoqi@0 4711 ins_encode %{
aoqi@0 4712 __ psrad($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4713 %}
aoqi@0 4714 ins_pipe( pipe_slow );
aoqi@0 4715 %}
aoqi@0 4716
aoqi@0 4717 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{
aoqi@0 4718 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4719 match(Set dst (RShiftVI src shift));
aoqi@0 4720 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %}
aoqi@0 4721 ins_encode %{
aoqi@0 4722 bool vector256 = false;
aoqi@0 4723 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4724 %}
aoqi@0 4725 ins_pipe( pipe_slow );
aoqi@0 4726 %}
aoqi@0 4727
aoqi@0 4728 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
aoqi@0 4729 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4730 match(Set dst (RShiftVI src shift));
aoqi@0 4731 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %}
aoqi@0 4732 ins_encode %{
aoqi@0 4733 bool vector256 = false;
aoqi@0 4734 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4735 %}
aoqi@0 4736 ins_pipe( pipe_slow );
aoqi@0 4737 %}
aoqi@0 4738
aoqi@0 4739 instruct vsra4I(vecX dst, vecS shift) %{
aoqi@0 4740 predicate(n->as_Vector()->length() == 4);
aoqi@0 4741 match(Set dst (RShiftVI dst shift));
aoqi@0 4742 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %}
aoqi@0 4743 ins_encode %{
aoqi@0 4744 __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4745 %}
aoqi@0 4746 ins_pipe( pipe_slow );
aoqi@0 4747 %}
aoqi@0 4748
aoqi@0 4749 instruct vsra4I_imm(vecX dst, immI8 shift) %{
aoqi@0 4750 predicate(n->as_Vector()->length() == 4);
aoqi@0 4751 match(Set dst (RShiftVI dst shift));
aoqi@0 4752 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %}
aoqi@0 4753 ins_encode %{
aoqi@0 4754 __ psrad($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4755 %}
aoqi@0 4756 ins_pipe( pipe_slow );
aoqi@0 4757 %}
aoqi@0 4758
aoqi@0 4759 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{
aoqi@0 4760 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 4761 match(Set dst (RShiftVI src shift));
aoqi@0 4762 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %}
aoqi@0 4763 ins_encode %{
aoqi@0 4764 bool vector256 = false;
aoqi@0 4765 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4766 %}
aoqi@0 4767 ins_pipe( pipe_slow );
aoqi@0 4768 %}
aoqi@0 4769
aoqi@0 4770 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
aoqi@0 4771 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 4772 match(Set dst (RShiftVI src shift));
aoqi@0 4773 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %}
aoqi@0 4774 ins_encode %{
aoqi@0 4775 bool vector256 = false;
aoqi@0 4776 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4777 %}
aoqi@0 4778 ins_pipe( pipe_slow );
aoqi@0 4779 %}
aoqi@0 4780
aoqi@0 4781 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{
aoqi@0 4782 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 4783 match(Set dst (RShiftVI src shift));
aoqi@0 4784 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %}
aoqi@0 4785 ins_encode %{
aoqi@0 4786 bool vector256 = true;
aoqi@0 4787 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4788 %}
aoqi@0 4789 ins_pipe( pipe_slow );
aoqi@0 4790 %}
aoqi@0 4791
aoqi@0 4792 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
aoqi@0 4793 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 4794 match(Set dst (RShiftVI src shift));
aoqi@0 4795 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %}
aoqi@0 4796 ins_encode %{
aoqi@0 4797 bool vector256 = true;
aoqi@0 4798 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4799 %}
aoqi@0 4800 ins_pipe( pipe_slow );
aoqi@0 4801 %}
aoqi@0 4802
aoqi@0 4803 // There are no longs vector arithmetic right shift instructions.
aoqi@0 4804
aoqi@0 4805
aoqi@0 4806 // --------------------------------- AND --------------------------------------
aoqi@0 4807
aoqi@0 4808 instruct vand4B(vecS dst, vecS src) %{
aoqi@0 4809 predicate(n->as_Vector()->length_in_bytes() == 4);
aoqi@0 4810 match(Set dst (AndV dst src));
aoqi@0 4811 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %}
aoqi@0 4812 ins_encode %{
aoqi@0 4813 __ pand($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 4814 %}
aoqi@0 4815 ins_pipe( pipe_slow );
aoqi@0 4816 %}
aoqi@0 4817
aoqi@0 4818 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{
aoqi@0 4819 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
aoqi@0 4820 match(Set dst (AndV src1 src2));
aoqi@0 4821 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %}
aoqi@0 4822 ins_encode %{
aoqi@0 4823 bool vector256 = false;
aoqi@0 4824 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 4825 %}
aoqi@0 4826 ins_pipe( pipe_slow );
aoqi@0 4827 %}
aoqi@0 4828
aoqi@0 4829 instruct vand8B(vecD dst, vecD src) %{
aoqi@0 4830 predicate(n->as_Vector()->length_in_bytes() == 8);
aoqi@0 4831 match(Set dst (AndV dst src));
aoqi@0 4832 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %}
aoqi@0 4833 ins_encode %{
aoqi@0 4834 __ pand($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 4835 %}
aoqi@0 4836 ins_pipe( pipe_slow );
aoqi@0 4837 %}
aoqi@0 4838
aoqi@0 4839 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 4840 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
aoqi@0 4841 match(Set dst (AndV src1 src2));
aoqi@0 4842 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %}
aoqi@0 4843 ins_encode %{
aoqi@0 4844 bool vector256 = false;
aoqi@0 4845 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 4846 %}
aoqi@0 4847 ins_pipe( pipe_slow );
aoqi@0 4848 %}
aoqi@0 4849
aoqi@0 4850 instruct vand16B(vecX dst, vecX src) %{
aoqi@0 4851 predicate(n->as_Vector()->length_in_bytes() == 16);
aoqi@0 4852 match(Set dst (AndV dst src));
aoqi@0 4853 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %}
aoqi@0 4854 ins_encode %{
aoqi@0 4855 __ pand($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 4856 %}
aoqi@0 4857 ins_pipe( pipe_slow );
aoqi@0 4858 %}
aoqi@0 4859
aoqi@0 4860 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 4861 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
aoqi@0 4862 match(Set dst (AndV src1 src2));
aoqi@0 4863 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %}
aoqi@0 4864 ins_encode %{
aoqi@0 4865 bool vector256 = false;
aoqi@0 4866 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 4867 %}
aoqi@0 4868 ins_pipe( pipe_slow );
aoqi@0 4869 %}
aoqi@0 4870
aoqi@0 4871 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 4872 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
aoqi@0 4873 match(Set dst (AndV src (LoadVector mem)));
aoqi@0 4874 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %}
aoqi@0 4875 ins_encode %{
aoqi@0 4876 bool vector256 = false;
aoqi@0 4877 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 4878 %}
aoqi@0 4879 ins_pipe( pipe_slow );
aoqi@0 4880 %}
aoqi@0 4881
aoqi@0 4882 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 4883 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
aoqi@0 4884 match(Set dst (AndV src1 src2));
aoqi@0 4885 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %}
aoqi@0 4886 ins_encode %{
aoqi@0 4887 bool vector256 = true;
aoqi@0 4888 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 4889 %}
aoqi@0 4890 ins_pipe( pipe_slow );
aoqi@0 4891 %}
aoqi@0 4892
aoqi@0 4893 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 4894 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
aoqi@0 4895 match(Set dst (AndV src (LoadVector mem)));
aoqi@0 4896 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %}
aoqi@0 4897 ins_encode %{
aoqi@0 4898 bool vector256 = true;
aoqi@0 4899 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 4900 %}
aoqi@0 4901 ins_pipe( pipe_slow );
aoqi@0 4902 %}
aoqi@0 4903
aoqi@0 4904 // --------------------------------- OR ---------------------------------------
aoqi@0 4905
aoqi@0 4906 instruct vor4B(vecS dst, vecS src) %{
aoqi@0 4907 predicate(n->as_Vector()->length_in_bytes() == 4);
aoqi@0 4908 match(Set dst (OrV dst src));
aoqi@0 4909 format %{ "por $dst,$src\t! or vectors (4 bytes)" %}
aoqi@0 4910 ins_encode %{
aoqi@0 4911 __ por($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 4912 %}
aoqi@0 4913 ins_pipe( pipe_slow );
aoqi@0 4914 %}
aoqi@0 4915
aoqi@0 4916 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{
aoqi@0 4917 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
aoqi@0 4918 match(Set dst (OrV src1 src2));
aoqi@0 4919 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %}
aoqi@0 4920 ins_encode %{
aoqi@0 4921 bool vector256 = false;
aoqi@0 4922 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 4923 %}
aoqi@0 4924 ins_pipe( pipe_slow );
aoqi@0 4925 %}
aoqi@0 4926
aoqi@0 4927 instruct vor8B(vecD dst, vecD src) %{
aoqi@0 4928 predicate(n->as_Vector()->length_in_bytes() == 8);
aoqi@0 4929 match(Set dst (OrV dst src));
aoqi@0 4930 format %{ "por $dst,$src\t! or vectors (8 bytes)" %}
aoqi@0 4931 ins_encode %{
aoqi@0 4932 __ por($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 4933 %}
aoqi@0 4934 ins_pipe( pipe_slow );
aoqi@0 4935 %}
aoqi@0 4936
aoqi@0 4937 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 4938 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
aoqi@0 4939 match(Set dst (OrV src1 src2));
aoqi@0 4940 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %}
aoqi@0 4941 ins_encode %{
aoqi@0 4942 bool vector256 = false;
aoqi@0 4943 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 4944 %}
aoqi@0 4945 ins_pipe( pipe_slow );
aoqi@0 4946 %}
aoqi@0 4947
aoqi@0 4948 instruct vor16B(vecX dst, vecX src) %{
aoqi@0 4949 predicate(n->as_Vector()->length_in_bytes() == 16);
aoqi@0 4950 match(Set dst (OrV dst src));
aoqi@0 4951 format %{ "por $dst,$src\t! or vectors (16 bytes)" %}
aoqi@0 4952 ins_encode %{
aoqi@0 4953 __ por($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 4954 %}
aoqi@0 4955 ins_pipe( pipe_slow );
aoqi@0 4956 %}
aoqi@0 4957
aoqi@0 4958 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 4959 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
aoqi@0 4960 match(Set dst (OrV src1 src2));
aoqi@0 4961 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %}
aoqi@0 4962 ins_encode %{
aoqi@0 4963 bool vector256 = false;
aoqi@0 4964 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 4965 %}
aoqi@0 4966 ins_pipe( pipe_slow );
aoqi@0 4967 %}
aoqi@0 4968
aoqi@0 4969 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 4970 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
aoqi@0 4971 match(Set dst (OrV src (LoadVector mem)));
aoqi@0 4972 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %}
aoqi@0 4973 ins_encode %{
aoqi@0 4974 bool vector256 = false;
aoqi@0 4975 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 4976 %}
aoqi@0 4977 ins_pipe( pipe_slow );
aoqi@0 4978 %}
aoqi@0 4979
aoqi@0 4980 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 4981 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
aoqi@0 4982 match(Set dst (OrV src1 src2));
aoqi@0 4983 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %}
aoqi@0 4984 ins_encode %{
aoqi@0 4985 bool vector256 = true;
aoqi@0 4986 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 4987 %}
aoqi@0 4988 ins_pipe( pipe_slow );
aoqi@0 4989 %}
aoqi@0 4990
aoqi@0 4991 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 4992 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
aoqi@0 4993 match(Set dst (OrV src (LoadVector mem)));
aoqi@0 4994 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %}
aoqi@0 4995 ins_encode %{
aoqi@0 4996 bool vector256 = true;
aoqi@0 4997 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 4998 %}
aoqi@0 4999 ins_pipe( pipe_slow );
aoqi@0 5000 %}
aoqi@0 5001
aoqi@0 5002 // --------------------------------- XOR --------------------------------------
aoqi@0 5003
aoqi@0 5004 instruct vxor4B(vecS dst, vecS src) %{
aoqi@0 5005 predicate(n->as_Vector()->length_in_bytes() == 4);
aoqi@0 5006 match(Set dst (XorV dst src));
aoqi@0 5007 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %}
aoqi@0 5008 ins_encode %{
aoqi@0 5009 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 5010 %}
aoqi@0 5011 ins_pipe( pipe_slow );
aoqi@0 5012 %}
aoqi@0 5013
aoqi@0 5014 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{
aoqi@0 5015 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
aoqi@0 5016 match(Set dst (XorV src1 src2));
aoqi@0 5017 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %}
aoqi@0 5018 ins_encode %{
aoqi@0 5019 bool vector256 = false;
aoqi@0 5020 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 5021 %}
aoqi@0 5022 ins_pipe( pipe_slow );
aoqi@0 5023 %}
aoqi@0 5024
aoqi@0 5025 instruct vxor8B(vecD dst, vecD src) %{
aoqi@0 5026 predicate(n->as_Vector()->length_in_bytes() == 8);
aoqi@0 5027 match(Set dst (XorV dst src));
aoqi@0 5028 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %}
aoqi@0 5029 ins_encode %{
aoqi@0 5030 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 5031 %}
aoqi@0 5032 ins_pipe( pipe_slow );
aoqi@0 5033 %}
aoqi@0 5034
aoqi@0 5035 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 5036 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
aoqi@0 5037 match(Set dst (XorV src1 src2));
aoqi@0 5038 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %}
aoqi@0 5039 ins_encode %{
aoqi@0 5040 bool vector256 = false;
aoqi@0 5041 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 5042 %}
aoqi@0 5043 ins_pipe( pipe_slow );
aoqi@0 5044 %}
aoqi@0 5045
aoqi@0 5046 instruct vxor16B(vecX dst, vecX src) %{
aoqi@0 5047 predicate(n->as_Vector()->length_in_bytes() == 16);
aoqi@0 5048 match(Set dst (XorV dst src));
aoqi@0 5049 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %}
aoqi@0 5050 ins_encode %{
aoqi@0 5051 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 5052 %}
aoqi@0 5053 ins_pipe( pipe_slow );
aoqi@0 5054 %}
aoqi@0 5055
aoqi@0 5056 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 5057 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
aoqi@0 5058 match(Set dst (XorV src1 src2));
aoqi@0 5059 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %}
aoqi@0 5060 ins_encode %{
aoqi@0 5061 bool vector256 = false;
aoqi@0 5062 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 5063 %}
aoqi@0 5064 ins_pipe( pipe_slow );
aoqi@0 5065 %}
aoqi@0 5066
aoqi@0 5067 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 5068 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
aoqi@0 5069 match(Set dst (XorV src (LoadVector mem)));
aoqi@0 5070 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %}
aoqi@0 5071 ins_encode %{
aoqi@0 5072 bool vector256 = false;
aoqi@0 5073 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 5074 %}
aoqi@0 5075 ins_pipe( pipe_slow );
aoqi@0 5076 %}
aoqi@0 5077
aoqi@0 5078 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 5079 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
aoqi@0 5080 match(Set dst (XorV src1 src2));
aoqi@0 5081 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %}
aoqi@0 5082 ins_encode %{
aoqi@0 5083 bool vector256 = true;
aoqi@0 5084 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 5085 %}
aoqi@0 5086 ins_pipe( pipe_slow );
aoqi@0 5087 %}
aoqi@0 5088
aoqi@0 5089 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 5090 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
aoqi@0 5091 match(Set dst (XorV src (LoadVector mem)));
aoqi@0 5092 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %}
aoqi@0 5093 ins_encode %{
aoqi@0 5094 bool vector256 = true;
aoqi@0 5095 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 5096 %}
aoqi@0 5097 ins_pipe( pipe_slow );
aoqi@0 5098 %}
aoqi@0 5099

mercurial