src/cpu/x86/vm/x86.ad

Tue, 17 Oct 2017 12:58:25 +0800

author
aoqi
date
Tue, 17 Oct 2017 12:58:25 +0800
changeset 7994
04ff2f6cd0eb
parent 7854
e8260b6328fb
parent 6876
710a3c8b516e
child 8604
04d83ba48607
permissions
-rw-r--r--

merge

aoqi@0 1 //
aoqi@0 2 // Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
aoqi@0 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
aoqi@0 4 //
aoqi@0 5 // This code is free software; you can redistribute it and/or modify it
aoqi@0 6 // under the terms of the GNU General Public License version 2 only, as
aoqi@0 7 // published by the Free Software Foundation.
aoqi@0 8 //
aoqi@0 9 // This code is distributed in the hope that it will be useful, but WITHOUT
aoqi@0 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
aoqi@0 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
aoqi@0 12 // version 2 for more details (a copy is included in the LICENSE file that
aoqi@0 13 // accompanied this code).
aoqi@0 14 //
aoqi@0 15 // You should have received a copy of the GNU General Public License version
aoqi@0 16 // 2 along with this work; if not, write to the Free Software Foundation,
aoqi@0 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
aoqi@0 18 //
aoqi@0 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
aoqi@0 20 // or visit www.oracle.com if you need additional information or have any
aoqi@0 21 // questions.
aoqi@0 22 //
aoqi@0 23 //
aoqi@0 24
aoqi@0 25 // X86 Common Architecture Description File
aoqi@0 26
aoqi@0 27 //----------REGISTER DEFINITION BLOCK------------------------------------------
aoqi@0 28 // This information is used by the matcher and the register allocator to
aoqi@0 29 // describe individual registers and classes of registers within the target
aoqi@0 30 // archtecture.
aoqi@0 31
aoqi@0 32 register %{
aoqi@0 33 //----------Architecture Description Register Definitions----------------------
aoqi@0 34 // General Registers
aoqi@0 35 // "reg_def" name ( register save type, C convention save type,
aoqi@0 36 // ideal register type, encoding );
aoqi@0 37 // Register Save Types:
aoqi@0 38 //
aoqi@0 39 // NS = No-Save: The register allocator assumes that these registers
aoqi@0 40 // can be used without saving upon entry to the method, &
aoqi@0 41 // that they do not need to be saved at call sites.
aoqi@0 42 //
aoqi@0 43 // SOC = Save-On-Call: The register allocator assumes that these registers
aoqi@0 44 // can be used without saving upon entry to the method,
aoqi@0 45 // but that they must be saved at call sites.
aoqi@0 46 //
aoqi@0 47 // SOE = Save-On-Entry: The register allocator assumes that these registers
aoqi@0 48 // must be saved before using them upon entry to the
aoqi@0 49 // method, but they do not need to be saved at call
aoqi@0 50 // sites.
aoqi@0 51 //
aoqi@0 52 // AS = Always-Save: The register allocator assumes that these registers
aoqi@0 53 // must be saved before using them upon entry to the
aoqi@0 54 // method, & that they must be saved at call sites.
aoqi@0 55 //
aoqi@0 56 // Ideal Register Type is used to determine how to save & restore a
aoqi@0 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
aoqi@0 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
aoqi@0 59 //
aoqi@0 60 // The encoding number is the actual bit-pattern placed into the opcodes.
aoqi@0 61
aoqi@0 62 // XMM registers. 256-bit registers or 8 words each, labeled (a)-h.
aoqi@0 63 // Word a in each register holds a Float, words ab hold a Double.
aoqi@0 64 // The whole registers are used in SSE4.2 version intrinsics,
aoqi@0 65 // array copy stubs and superword operations (see UseSSE42Intrinsics,
aoqi@0 66 // UseXMMForArrayCopy and UseSuperword flags).
aoqi@0 67 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX).
aoqi@0 68 // Linux ABI: No register preserved across function calls
aoqi@0 69 // XMM0-XMM7 might hold parameters
aoqi@0 70 // Windows ABI: XMM6-XMM15 preserved across function calls
aoqi@0 71 // XMM0-XMM3 might hold parameters
aoqi@0 72
aoqi@0 73 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
aoqi@0 74 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
aoqi@0 75 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
aoqi@0 76 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
aoqi@0 77 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
aoqi@0 78 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
aoqi@0 79 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
aoqi@0 80 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
aoqi@0 81
aoqi@0 82 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
aoqi@0 83 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
aoqi@0 84 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
aoqi@0 85 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
aoqi@0 86 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
aoqi@0 87 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
aoqi@0 88 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
aoqi@0 89 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
aoqi@0 90
aoqi@0 91 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
aoqi@0 92 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
aoqi@0 93 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
aoqi@0 94 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
aoqi@0 95 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
aoqi@0 96 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
aoqi@0 97 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
aoqi@0 98 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
aoqi@0 99
aoqi@0 100 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
aoqi@0 101 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
aoqi@0 102 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
aoqi@0 103 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
aoqi@0 104 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
aoqi@0 105 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
aoqi@0 106 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
aoqi@0 107 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
aoqi@0 108
aoqi@0 109 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
aoqi@0 110 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
aoqi@0 111 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
aoqi@0 112 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
aoqi@0 113 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
aoqi@0 114 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
aoqi@0 115 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
aoqi@0 116 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
aoqi@0 117
aoqi@0 118 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
aoqi@0 119 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
aoqi@0 120 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
aoqi@0 121 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
aoqi@0 122 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
aoqi@0 123 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
aoqi@0 124 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
aoqi@0 125 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
aoqi@0 126
aoqi@0 127 #ifdef _WIN64
aoqi@0 128
aoqi@0 129 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg());
aoqi@0 130 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1));
aoqi@0 131 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2));
aoqi@0 132 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3));
aoqi@0 133 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4));
aoqi@0 134 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5));
aoqi@0 135 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6));
aoqi@0 136 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7));
aoqi@0 137
aoqi@0 138 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg());
aoqi@0 139 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1));
aoqi@0 140 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2));
aoqi@0 141 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3));
aoqi@0 142 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4));
aoqi@0 143 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5));
aoqi@0 144 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6));
aoqi@0 145 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7));
aoqi@0 146
aoqi@0 147 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg());
aoqi@0 148 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1));
aoqi@0 149 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2));
aoqi@0 150 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3));
aoqi@0 151 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4));
aoqi@0 152 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5));
aoqi@0 153 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6));
aoqi@0 154 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7));
aoqi@0 155
aoqi@0 156 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg());
aoqi@0 157 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1));
aoqi@0 158 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2));
aoqi@0 159 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3));
aoqi@0 160 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4));
aoqi@0 161 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5));
aoqi@0 162 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6));
aoqi@0 163 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7));
aoqi@0 164
aoqi@0 165 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
aoqi@0 166 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1));
aoqi@0 167 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2));
aoqi@0 168 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3));
aoqi@0 169 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4));
aoqi@0 170 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5));
aoqi@0 171 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6));
aoqi@0 172 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7));
aoqi@0 173
aoqi@0 174 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
aoqi@0 175 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1));
aoqi@0 176 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2));
aoqi@0 177 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3));
aoqi@0 178 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4));
aoqi@0 179 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5));
aoqi@0 180 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6));
aoqi@0 181 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7));
aoqi@0 182
aoqi@0 183 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
aoqi@0 184 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1));
aoqi@0 185 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2));
aoqi@0 186 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3));
aoqi@0 187 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4));
aoqi@0 188 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5));
aoqi@0 189 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6));
aoqi@0 190 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7));
aoqi@0 191
aoqi@0 192 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
aoqi@0 193 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1));
aoqi@0 194 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2));
aoqi@0 195 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3));
aoqi@0 196 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4));
aoqi@0 197 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5));
aoqi@0 198 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6));
aoqi@0 199 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7));
aoqi@0 200
aoqi@0 201 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
aoqi@0 202 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1));
aoqi@0 203 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2));
aoqi@0 204 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3));
aoqi@0 205 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4));
aoqi@0 206 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5));
aoqi@0 207 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6));
aoqi@0 208 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7));
aoqi@0 209
aoqi@0 210 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
aoqi@0 211 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1));
aoqi@0 212 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2));
aoqi@0 213 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3));
aoqi@0 214 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4));
aoqi@0 215 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5));
aoqi@0 216 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6));
aoqi@0 217 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7));
aoqi@0 218
aoqi@0 219 #else // _WIN64
aoqi@0 220
aoqi@0 221 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
aoqi@0 222 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
aoqi@0 223 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
aoqi@0 224 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
aoqi@0 225 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
aoqi@0 226 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
aoqi@0 227 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
aoqi@0 228 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
aoqi@0 229
aoqi@0 230 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
aoqi@0 231 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
aoqi@0 232 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
aoqi@0 233 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
aoqi@0 234 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
aoqi@0 235 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
aoqi@0 236 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
aoqi@0 237 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
aoqi@0 238
aoqi@0 239 #ifdef _LP64
aoqi@0 240
aoqi@0 241 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
aoqi@0 242 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
aoqi@0 243 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
aoqi@0 244 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
aoqi@0 245 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
aoqi@0 246 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
aoqi@0 247 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
aoqi@0 248 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
aoqi@0 249
aoqi@0 250 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
aoqi@0 251 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
aoqi@0 252 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
aoqi@0 253 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
aoqi@0 254 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
aoqi@0 255 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
aoqi@0 256 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
aoqi@0 257 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
aoqi@0 258
aoqi@0 259 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
aoqi@0 260 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
aoqi@0 261 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
aoqi@0 262 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
aoqi@0 263 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
aoqi@0 264 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
aoqi@0 265 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
aoqi@0 266 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
aoqi@0 267
aoqi@0 268 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
aoqi@0 269 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
aoqi@0 270 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
aoqi@0 271 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
aoqi@0 272 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
aoqi@0 273 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
aoqi@0 274 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
aoqi@0 275 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
aoqi@0 276
aoqi@0 277 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
aoqi@0 278 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
aoqi@0 279 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
aoqi@0 280 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
aoqi@0 281 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
aoqi@0 282 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
aoqi@0 283 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
aoqi@0 284 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
aoqi@0 285
aoqi@0 286 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
aoqi@0 287 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
aoqi@0 288 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
aoqi@0 289 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
aoqi@0 290 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
aoqi@0 291 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
aoqi@0 292 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
aoqi@0 293 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
aoqi@0 294
aoqi@0 295 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
aoqi@0 296 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
aoqi@0 297 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
aoqi@0 298 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
aoqi@0 299 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
aoqi@0 300 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
aoqi@0 301 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
aoqi@0 302 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
aoqi@0 303
aoqi@0 304 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
aoqi@0 305 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
aoqi@0 306 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
aoqi@0 307 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
aoqi@0 308 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
aoqi@0 309 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
aoqi@0 310 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
aoqi@0 311 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
aoqi@0 312
aoqi@0 313 #endif // _LP64
aoqi@0 314
aoqi@0 315 #endif // _WIN64
aoqi@0 316
aoqi@0 317 #ifdef _LP64
aoqi@0 318 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
aoqi@0 319 #else
aoqi@0 320 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
aoqi@0 321 #endif // _LP64
aoqi@0 322
aoqi@0 323 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
aoqi@0 324 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
aoqi@0 325 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
aoqi@0 326 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
aoqi@0 327 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
aoqi@0 328 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
aoqi@0 329 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
aoqi@0 330 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h
aoqi@0 331 #ifdef _LP64
aoqi@0 332 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
aoqi@0 333 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
aoqi@0 334 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
aoqi@0 335 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
aoqi@0 336 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
aoqi@0 337 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
aoqi@0 338 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
aoqi@0 339 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
aoqi@0 340 #endif
aoqi@0 341 );
aoqi@0 342
aoqi@0 343 // flags allocation class should be last.
aoqi@0 344 alloc_class chunk2(RFLAGS);
aoqi@0 345
aoqi@0 346 // Singleton class for condition codes
aoqi@0 347 reg_class int_flags(RFLAGS);
aoqi@0 348
aoqi@0 349 // Class for all float registers
aoqi@0 350 reg_class float_reg(XMM0,
aoqi@0 351 XMM1,
aoqi@0 352 XMM2,
aoqi@0 353 XMM3,
aoqi@0 354 XMM4,
aoqi@0 355 XMM5,
aoqi@0 356 XMM6,
aoqi@0 357 XMM7
aoqi@0 358 #ifdef _LP64
aoqi@0 359 ,XMM8,
aoqi@0 360 XMM9,
aoqi@0 361 XMM10,
aoqi@0 362 XMM11,
aoqi@0 363 XMM12,
aoqi@0 364 XMM13,
aoqi@0 365 XMM14,
aoqi@0 366 XMM15
aoqi@0 367 #endif
aoqi@0 368 );
aoqi@0 369
aoqi@0 370 // Class for all double registers
aoqi@0 371 reg_class double_reg(XMM0, XMM0b,
aoqi@0 372 XMM1, XMM1b,
aoqi@0 373 XMM2, XMM2b,
aoqi@0 374 XMM3, XMM3b,
aoqi@0 375 XMM4, XMM4b,
aoqi@0 376 XMM5, XMM5b,
aoqi@0 377 XMM6, XMM6b,
aoqi@0 378 XMM7, XMM7b
aoqi@0 379 #ifdef _LP64
aoqi@0 380 ,XMM8, XMM8b,
aoqi@0 381 XMM9, XMM9b,
aoqi@0 382 XMM10, XMM10b,
aoqi@0 383 XMM11, XMM11b,
aoqi@0 384 XMM12, XMM12b,
aoqi@0 385 XMM13, XMM13b,
aoqi@0 386 XMM14, XMM14b,
aoqi@0 387 XMM15, XMM15b
aoqi@0 388 #endif
aoqi@0 389 );
aoqi@0 390
aoqi@0 391 // Class for all 32bit vector registers
aoqi@0 392 reg_class vectors_reg(XMM0,
aoqi@0 393 XMM1,
aoqi@0 394 XMM2,
aoqi@0 395 XMM3,
aoqi@0 396 XMM4,
aoqi@0 397 XMM5,
aoqi@0 398 XMM6,
aoqi@0 399 XMM7
aoqi@0 400 #ifdef _LP64
aoqi@0 401 ,XMM8,
aoqi@0 402 XMM9,
aoqi@0 403 XMM10,
aoqi@0 404 XMM11,
aoqi@0 405 XMM12,
aoqi@0 406 XMM13,
aoqi@0 407 XMM14,
aoqi@0 408 XMM15
aoqi@0 409 #endif
aoqi@0 410 );
aoqi@0 411
aoqi@0 412 // Class for all 64bit vector registers
aoqi@0 413 reg_class vectord_reg(XMM0, XMM0b,
aoqi@0 414 XMM1, XMM1b,
aoqi@0 415 XMM2, XMM2b,
aoqi@0 416 XMM3, XMM3b,
aoqi@0 417 XMM4, XMM4b,
aoqi@0 418 XMM5, XMM5b,
aoqi@0 419 XMM6, XMM6b,
aoqi@0 420 XMM7, XMM7b
aoqi@0 421 #ifdef _LP64
aoqi@0 422 ,XMM8, XMM8b,
aoqi@0 423 XMM9, XMM9b,
aoqi@0 424 XMM10, XMM10b,
aoqi@0 425 XMM11, XMM11b,
aoqi@0 426 XMM12, XMM12b,
aoqi@0 427 XMM13, XMM13b,
aoqi@0 428 XMM14, XMM14b,
aoqi@0 429 XMM15, XMM15b
aoqi@0 430 #endif
aoqi@0 431 );
aoqi@0 432
aoqi@0 433 // Class for all 128bit vector registers
aoqi@0 434 reg_class vectorx_reg(XMM0, XMM0b, XMM0c, XMM0d,
aoqi@0 435 XMM1, XMM1b, XMM1c, XMM1d,
aoqi@0 436 XMM2, XMM2b, XMM2c, XMM2d,
aoqi@0 437 XMM3, XMM3b, XMM3c, XMM3d,
aoqi@0 438 XMM4, XMM4b, XMM4c, XMM4d,
aoqi@0 439 XMM5, XMM5b, XMM5c, XMM5d,
aoqi@0 440 XMM6, XMM6b, XMM6c, XMM6d,
aoqi@0 441 XMM7, XMM7b, XMM7c, XMM7d
aoqi@0 442 #ifdef _LP64
aoqi@0 443 ,XMM8, XMM8b, XMM8c, XMM8d,
aoqi@0 444 XMM9, XMM9b, XMM9c, XMM9d,
aoqi@0 445 XMM10, XMM10b, XMM10c, XMM10d,
aoqi@0 446 XMM11, XMM11b, XMM11c, XMM11d,
aoqi@0 447 XMM12, XMM12b, XMM12c, XMM12d,
aoqi@0 448 XMM13, XMM13b, XMM13c, XMM13d,
aoqi@0 449 XMM14, XMM14b, XMM14c, XMM14d,
aoqi@0 450 XMM15, XMM15b, XMM15c, XMM15d
aoqi@0 451 #endif
aoqi@0 452 );
aoqi@0 453
aoqi@0 454 // Class for all 256bit vector registers
aoqi@0 455 reg_class vectory_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
aoqi@0 456 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
aoqi@0 457 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
aoqi@0 458 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
aoqi@0 459 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
aoqi@0 460 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
aoqi@0 461 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
aoqi@0 462 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h
aoqi@0 463 #ifdef _LP64
aoqi@0 464 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
aoqi@0 465 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
aoqi@0 466 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
aoqi@0 467 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
aoqi@0 468 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
aoqi@0 469 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
aoqi@0 470 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
aoqi@0 471 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
aoqi@0 472 #endif
aoqi@0 473 );
aoqi@0 474
aoqi@0 475 %}
aoqi@0 476
aoqi@0 477
aoqi@0 478 //----------SOURCE BLOCK-------------------------------------------------------
aoqi@0 479 // This is a block of C++ code which provides values, functions, and
aoqi@0 480 // definitions necessary in the rest of the architecture description
aoqi@0 481
aoqi@0 482 source_hpp %{
aoqi@0 483 // Header information of the source block.
aoqi@0 484 // Method declarations/definitions which are used outside
aoqi@0 485 // the ad-scope can conveniently be defined here.
aoqi@0 486 //
aoqi@0 487 // To keep related declarations/definitions/uses close together,
aoqi@0 488 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
aoqi@0 489
aoqi@0 490 class CallStubImpl {
aoqi@0 491
aoqi@0 492 //--------------------------------------------------------------
aoqi@0 493 //---< Used for optimization in Compile::shorten_branches >---
aoqi@0 494 //--------------------------------------------------------------
aoqi@0 495
aoqi@0 496 public:
aoqi@0 497 // Size of call trampoline stub.
aoqi@0 498 static uint size_call_trampoline() {
aoqi@0 499 return 0; // no call trampolines on this platform
aoqi@0 500 }
aoqi@0 501
aoqi@0 502 // number of relocations needed by a call trampoline stub
aoqi@0 503 static uint reloc_call_trampoline() {
aoqi@0 504 return 0; // no call trampolines on this platform
aoqi@0 505 }
aoqi@0 506 };
aoqi@0 507
aoqi@0 508 class HandlerImpl {
aoqi@0 509
aoqi@0 510 public:
aoqi@0 511
aoqi@0 512 static int emit_exception_handler(CodeBuffer &cbuf);
aoqi@0 513 static int emit_deopt_handler(CodeBuffer& cbuf);
aoqi@0 514
aoqi@0 515 static uint size_exception_handler() {
aoqi@0 516 // NativeCall instruction size is the same as NativeJump.
aoqi@0 517 // exception handler starts out as jump and can be patched to
aoqi@0 518 // a call be deoptimization. (4932387)
aoqi@0 519 // Note that this value is also credited (in output.cpp) to
aoqi@0 520 // the size of the code section.
aoqi@0 521 return NativeJump::instruction_size;
aoqi@0 522 }
aoqi@0 523
aoqi@0 524 #ifdef _LP64
aoqi@0 525 static uint size_deopt_handler() {
aoqi@0 526 // three 5 byte instructions
aoqi@0 527 return 15;
aoqi@0 528 }
aoqi@0 529 #else
aoqi@0 530 static uint size_deopt_handler() {
aoqi@0 531 // NativeCall instruction size is the same as NativeJump.
aoqi@0 532 // exception handler starts out as jump and can be patched to
aoqi@0 533 // a call be deoptimization. (4932387)
aoqi@0 534 // Note that this value is also credited (in output.cpp) to
aoqi@0 535 // the size of the code section.
aoqi@0 536 return 5 + NativeJump::instruction_size; // pushl(); jmp;
aoqi@0 537 }
aoqi@0 538 #endif
aoqi@0 539 };
aoqi@0 540
aoqi@0 541 %} // end source_hpp
aoqi@0 542
aoqi@0 543 source %{
aoqi@0 544
aoqi@0 545 // Emit exception handler code.
aoqi@0 546 // Stuff framesize into a register and call a VM stub routine.
aoqi@0 547 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
aoqi@0 548
aoqi@0 549 // Note that the code buffer's insts_mark is always relative to insts.
aoqi@0 550 // That's why we must use the macroassembler to generate a handler.
aoqi@0 551 MacroAssembler _masm(&cbuf);
aoqi@0 552 address base = __ start_a_stub(size_exception_handler());
aoqi@0 553 if (base == NULL) return 0; // CodeBuffer::expand failed
aoqi@0 554 int offset = __ offset();
aoqi@0 555 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
aoqi@0 556 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
aoqi@0 557 __ end_a_stub();
aoqi@0 558 return offset;
aoqi@0 559 }
aoqi@0 560
aoqi@0 561 // Emit deopt handler code.
aoqi@0 562 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
aoqi@0 563
aoqi@0 564 // Note that the code buffer's insts_mark is always relative to insts.
aoqi@0 565 // That's why we must use the macroassembler to generate a handler.
aoqi@0 566 MacroAssembler _masm(&cbuf);
aoqi@0 567 address base = __ start_a_stub(size_deopt_handler());
aoqi@0 568 if (base == NULL) return 0; // CodeBuffer::expand failed
aoqi@0 569 int offset = __ offset();
aoqi@0 570
aoqi@0 571 #ifdef _LP64
aoqi@0 572 address the_pc = (address) __ pc();
aoqi@0 573 Label next;
aoqi@0 574 // push a "the_pc" on the stack without destroying any registers
aoqi@0 575 // as they all may be live.
aoqi@0 576
aoqi@0 577 // push address of "next"
aoqi@0 578 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
aoqi@0 579 __ bind(next);
aoqi@0 580 // adjust it so it matches "the_pc"
aoqi@0 581 __ subptr(Address(rsp, 0), __ offset() - offset);
aoqi@0 582 #else
aoqi@0 583 InternalAddress here(__ pc());
aoqi@0 584 __ pushptr(here.addr());
aoqi@0 585 #endif
aoqi@0 586
aoqi@0 587 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
aoqi@0 588 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
aoqi@0 589 __ end_a_stub();
aoqi@0 590 return offset;
aoqi@0 591 }
aoqi@0 592
aoqi@0 593
aoqi@0 594 //=============================================================================
aoqi@0 595
aoqi@0 596 // Float masks come from different places depending on platform.
aoqi@0 597 #ifdef _LP64
aoqi@0 598 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
aoqi@0 599 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
aoqi@0 600 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
aoqi@0 601 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
aoqi@0 602 #else
aoqi@0 603 static address float_signmask() { return (address)float_signmask_pool; }
aoqi@0 604 static address float_signflip() { return (address)float_signflip_pool; }
aoqi@0 605 static address double_signmask() { return (address)double_signmask_pool; }
aoqi@0 606 static address double_signflip() { return (address)double_signflip_pool; }
aoqi@0 607 #endif
aoqi@0 608
aoqi@0 609
aoqi@0 610 const bool Matcher::match_rule_supported(int opcode) {
aoqi@0 611 if (!has_match_rule(opcode))
aoqi@0 612 return false;
aoqi@0 613
aoqi@0 614 switch (opcode) {
aoqi@0 615 case Op_PopCountI:
aoqi@0 616 case Op_PopCountL:
aoqi@0 617 if (!UsePopCountInstruction)
aoqi@0 618 return false;
aoqi@0 619 break;
aoqi@0 620 case Op_MulVI:
aoqi@0 621 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
aoqi@0 622 return false;
aoqi@0 623 break;
aoqi@0 624 case Op_CompareAndSwapL:
aoqi@0 625 #ifdef _LP64
aoqi@0 626 case Op_CompareAndSwapP:
aoqi@0 627 #endif
aoqi@0 628 if (!VM_Version::supports_cx8())
aoqi@0 629 return false;
aoqi@0 630 break;
aoqi@0 631 }
aoqi@0 632
aoqi@0 633 return true; // Per default match rules are supported.
aoqi@0 634 }
aoqi@0 635
aoqi@0 636 // Max vector size in bytes. 0 if not supported.
aoqi@0 637 const int Matcher::vector_width_in_bytes(BasicType bt) {
aoqi@0 638 assert(is_java_primitive(bt), "only primitive type vectors");
aoqi@0 639 if (UseSSE < 2) return 0;
aoqi@0 640 // SSE2 supports 128bit vectors for all types.
aoqi@0 641 // AVX2 supports 256bit vectors for all types.
aoqi@0 642 int size = (UseAVX > 1) ? 32 : 16;
aoqi@0 643 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
aoqi@0 644 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
aoqi@0 645 size = 32;
aoqi@0 646 // Use flag to limit vector size.
aoqi@0 647 size = MIN2(size,(int)MaxVectorSize);
aoqi@0 648 // Minimum 2 values in vector (or 4 for bytes).
aoqi@0 649 switch (bt) {
aoqi@0 650 case T_DOUBLE:
aoqi@0 651 case T_LONG:
aoqi@0 652 if (size < 16) return 0;
aoqi@0 653 case T_FLOAT:
aoqi@0 654 case T_INT:
aoqi@0 655 if (size < 8) return 0;
aoqi@0 656 case T_BOOLEAN:
aoqi@0 657 case T_BYTE:
aoqi@0 658 case T_CHAR:
aoqi@0 659 case T_SHORT:
aoqi@0 660 if (size < 4) return 0;
aoqi@0 661 break;
aoqi@0 662 default:
aoqi@0 663 ShouldNotReachHere();
aoqi@0 664 }
aoqi@0 665 return size;
aoqi@0 666 }
aoqi@0 667
aoqi@0 668 // Limits on vector size (number of elements) loaded into vector.
aoqi@0 669 const int Matcher::max_vector_size(const BasicType bt) {
aoqi@0 670 return vector_width_in_bytes(bt)/type2aelembytes(bt);
aoqi@0 671 }
aoqi@0 672 const int Matcher::min_vector_size(const BasicType bt) {
aoqi@0 673 int max_size = max_vector_size(bt);
aoqi@0 674 // Min size which can be loaded into vector is 4 bytes.
aoqi@0 675 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
aoqi@0 676 return MIN2(size,max_size);
aoqi@0 677 }
aoqi@0 678
aoqi@0 679 // Vector ideal reg corresponding to specidied size in bytes
aoqi@0 680 const int Matcher::vector_ideal_reg(int size) {
aoqi@0 681 assert(MaxVectorSize >= size, "");
aoqi@0 682 switch(size) {
aoqi@0 683 case 4: return Op_VecS;
aoqi@0 684 case 8: return Op_VecD;
aoqi@0 685 case 16: return Op_VecX;
aoqi@0 686 case 32: return Op_VecY;
aoqi@0 687 }
aoqi@0 688 ShouldNotReachHere();
aoqi@0 689 return 0;
aoqi@0 690 }
aoqi@0 691
aoqi@0 692 // Only lowest bits of xmm reg are used for vector shift count.
aoqi@0 693 const int Matcher::vector_shift_count_ideal_reg(int size) {
aoqi@0 694 return Op_VecS;
aoqi@0 695 }
aoqi@0 696
aoqi@0 697 // x86 supports misaligned vectors store/load.
aoqi@0 698 const bool Matcher::misaligned_vectors_ok() {
aoqi@0 699 return !AlignVector; // can be changed by flag
aoqi@0 700 }
aoqi@0 701
aoqi@0 702 // x86 AES instructions are compatible with SunJCE expanded
aoqi@0 703 // keys, hence we do not need to pass the original key to stubs
aoqi@0 704 const bool Matcher::pass_original_key_for_aes() {
aoqi@0 705 return false;
aoqi@0 706 }
aoqi@0 707
aoqi@0 708 // Helper methods for MachSpillCopyNode::implementation().
aoqi@0 709 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
aoqi@0 710 int src_hi, int dst_hi, uint ireg, outputStream* st) {
aoqi@0 711 // In 64-bit VM size calculation is very complex. Emitting instructions
aoqi@0 712 // into scratch buffer is used to get size in 64-bit VM.
aoqi@0 713 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
aoqi@0 714 assert(ireg == Op_VecS || // 32bit vector
aoqi@0 715 (src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
aoqi@0 716 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
aoqi@0 717 "no non-adjacent vector moves" );
aoqi@0 718 if (cbuf) {
aoqi@0 719 MacroAssembler _masm(cbuf);
aoqi@0 720 int offset = __ offset();
aoqi@0 721 switch (ireg) {
aoqi@0 722 case Op_VecS: // copy whole register
aoqi@0 723 case Op_VecD:
aoqi@0 724 case Op_VecX:
aoqi@0 725 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
aoqi@0 726 break;
aoqi@0 727 case Op_VecY:
aoqi@0 728 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
aoqi@0 729 break;
aoqi@0 730 default:
aoqi@0 731 ShouldNotReachHere();
aoqi@0 732 }
aoqi@0 733 int size = __ offset() - offset;
aoqi@0 734 #ifdef ASSERT
aoqi@0 735 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
aoqi@0 736 assert(!do_size || size == 4, "incorrect size calculattion");
aoqi@0 737 #endif
aoqi@0 738 return size;
aoqi@0 739 #ifndef PRODUCT
aoqi@0 740 } else if (!do_size) {
aoqi@0 741 switch (ireg) {
aoqi@0 742 case Op_VecS:
aoqi@0 743 case Op_VecD:
aoqi@0 744 case Op_VecX:
aoqi@0 745 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
aoqi@0 746 break;
aoqi@0 747 case Op_VecY:
aoqi@0 748 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
aoqi@0 749 break;
aoqi@0 750 default:
aoqi@0 751 ShouldNotReachHere();
aoqi@0 752 }
aoqi@0 753 #endif
aoqi@0 754 }
aoqi@0 755 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
aoqi@0 756 return 4;
aoqi@0 757 }
aoqi@0 758
aoqi@0 759 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
aoqi@0 760 int stack_offset, int reg, uint ireg, outputStream* st) {
aoqi@0 761 // In 64-bit VM size calculation is very complex. Emitting instructions
aoqi@0 762 // into scratch buffer is used to get size in 64-bit VM.
aoqi@0 763 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
aoqi@0 764 if (cbuf) {
aoqi@0 765 MacroAssembler _masm(cbuf);
aoqi@0 766 int offset = __ offset();
aoqi@0 767 if (is_load) {
aoqi@0 768 switch (ireg) {
aoqi@0 769 case Op_VecS:
aoqi@0 770 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
aoqi@0 771 break;
aoqi@0 772 case Op_VecD:
aoqi@0 773 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
aoqi@0 774 break;
aoqi@0 775 case Op_VecX:
aoqi@0 776 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
aoqi@0 777 break;
aoqi@0 778 case Op_VecY:
aoqi@0 779 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
aoqi@0 780 break;
aoqi@0 781 default:
aoqi@0 782 ShouldNotReachHere();
aoqi@0 783 }
aoqi@0 784 } else { // store
aoqi@0 785 switch (ireg) {
aoqi@0 786 case Op_VecS:
aoqi@0 787 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
aoqi@0 788 break;
aoqi@0 789 case Op_VecD:
aoqi@0 790 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
aoqi@0 791 break;
aoqi@0 792 case Op_VecX:
aoqi@0 793 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
aoqi@0 794 break;
aoqi@0 795 case Op_VecY:
aoqi@0 796 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
aoqi@0 797 break;
aoqi@0 798 default:
aoqi@0 799 ShouldNotReachHere();
aoqi@0 800 }
aoqi@0 801 }
aoqi@0 802 int size = __ offset() - offset;
aoqi@0 803 #ifdef ASSERT
aoqi@0 804 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
aoqi@0 805 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
aoqi@0 806 assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
aoqi@0 807 #endif
aoqi@0 808 return size;
aoqi@0 809 #ifndef PRODUCT
aoqi@0 810 } else if (!do_size) {
aoqi@0 811 if (is_load) {
aoqi@0 812 switch (ireg) {
aoqi@0 813 case Op_VecS:
aoqi@0 814 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
aoqi@0 815 break;
aoqi@0 816 case Op_VecD:
aoqi@0 817 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
aoqi@0 818 break;
aoqi@0 819 case Op_VecX:
aoqi@0 820 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
aoqi@0 821 break;
aoqi@0 822 case Op_VecY:
aoqi@0 823 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
aoqi@0 824 break;
aoqi@0 825 default:
aoqi@0 826 ShouldNotReachHere();
aoqi@0 827 }
aoqi@0 828 } else { // store
aoqi@0 829 switch (ireg) {
aoqi@0 830 case Op_VecS:
aoqi@0 831 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
aoqi@0 832 break;
aoqi@0 833 case Op_VecD:
aoqi@0 834 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
aoqi@0 835 break;
aoqi@0 836 case Op_VecX:
aoqi@0 837 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
aoqi@0 838 break;
aoqi@0 839 case Op_VecY:
aoqi@0 840 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
aoqi@0 841 break;
aoqi@0 842 default:
aoqi@0 843 ShouldNotReachHere();
aoqi@0 844 }
aoqi@0 845 }
aoqi@0 846 #endif
aoqi@0 847 }
aoqi@0 848 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
aoqi@0 849 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
aoqi@0 850 return 5+offset_size;
aoqi@0 851 }
aoqi@0 852
aoqi@0 853 static inline jfloat replicate4_imm(int con, int width) {
aoqi@0 854 // Load a constant of "width" (in bytes) and replicate it to fill 32bit.
aoqi@0 855 assert(width == 1 || width == 2, "only byte or short types here");
aoqi@0 856 int bit_width = width * 8;
aoqi@0 857 jint val = con;
aoqi@0 858 val &= (1 << bit_width) - 1; // mask off sign bits
aoqi@0 859 while(bit_width < 32) {
aoqi@0 860 val |= (val << bit_width);
aoqi@0 861 bit_width <<= 1;
aoqi@0 862 }
aoqi@0 863 jfloat fval = *((jfloat*) &val); // coerce to float type
aoqi@0 864 return fval;
aoqi@0 865 }
aoqi@0 866
aoqi@0 867 static inline jdouble replicate8_imm(int con, int width) {
aoqi@0 868 // Load a constant of "width" (in bytes) and replicate it to fill 64bit.
aoqi@0 869 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here");
aoqi@0 870 int bit_width = width * 8;
aoqi@0 871 jlong val = con;
aoqi@0 872 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits
aoqi@0 873 while(bit_width < 64) {
aoqi@0 874 val |= (val << bit_width);
aoqi@0 875 bit_width <<= 1;
aoqi@0 876 }
aoqi@0 877 jdouble dval = *((jdouble*) &val); // coerce to double type
aoqi@0 878 return dval;
aoqi@0 879 }
aoqi@0 880
aoqi@0 881 #ifndef PRODUCT
aoqi@0 882 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
aoqi@0 883 st->print("nop \t# %d bytes pad for loops and calls", _count);
aoqi@0 884 }
aoqi@0 885 #endif
aoqi@0 886
aoqi@0 887 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
aoqi@0 888 MacroAssembler _masm(&cbuf);
aoqi@0 889 __ nop(_count);
aoqi@0 890 }
aoqi@0 891
aoqi@0 892 uint MachNopNode::size(PhaseRegAlloc*) const {
aoqi@0 893 return _count;
aoqi@0 894 }
aoqi@0 895
aoqi@0 896 #ifndef PRODUCT
aoqi@0 897 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
aoqi@0 898 st->print("# breakpoint");
aoqi@0 899 }
aoqi@0 900 #endif
aoqi@0 901
aoqi@0 902 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
aoqi@0 903 MacroAssembler _masm(&cbuf);
aoqi@0 904 __ int3();
aoqi@0 905 }
aoqi@0 906
aoqi@0 907 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
aoqi@0 908 return MachNode::size(ra_);
aoqi@0 909 }
aoqi@0 910
aoqi@0 911 %}
aoqi@0 912
aoqi@0 913 encode %{
aoqi@0 914
aoqi@0 915 enc_class call_epilog %{
aoqi@0 916 if (VerifyStackAtCalls) {
aoqi@0 917 // Check that stack depth is unchanged: find majik cookie on stack
aoqi@0 918 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
aoqi@0 919 MacroAssembler _masm(&cbuf);
aoqi@0 920 Label L;
aoqi@0 921 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
aoqi@0 922 __ jccb(Assembler::equal, L);
aoqi@0 923 // Die if stack mismatch
aoqi@0 924 __ int3();
aoqi@0 925 __ bind(L);
aoqi@0 926 }
aoqi@0 927 %}
aoqi@0 928
aoqi@0 929 %}
aoqi@0 930
aoqi@0 931
aoqi@0 932 //----------OPERANDS-----------------------------------------------------------
aoqi@0 933 // Operand definitions must precede instruction definitions for correct parsing
aoqi@0 934 // in the ADLC because operands constitute user defined types which are used in
aoqi@0 935 // instruction definitions.
aoqi@0 936
aoqi@0 937 // Vectors
aoqi@0 938 operand vecS() %{
aoqi@0 939 constraint(ALLOC_IN_RC(vectors_reg));
aoqi@0 940 match(VecS);
aoqi@0 941
aoqi@0 942 format %{ %}
aoqi@0 943 interface(REG_INTER);
aoqi@0 944 %}
aoqi@0 945
aoqi@0 946 operand vecD() %{
aoqi@0 947 constraint(ALLOC_IN_RC(vectord_reg));
aoqi@0 948 match(VecD);
aoqi@0 949
aoqi@0 950 format %{ %}
aoqi@0 951 interface(REG_INTER);
aoqi@0 952 %}
aoqi@0 953
aoqi@0 954 operand vecX() %{
aoqi@0 955 constraint(ALLOC_IN_RC(vectorx_reg));
aoqi@0 956 match(VecX);
aoqi@0 957
aoqi@0 958 format %{ %}
aoqi@0 959 interface(REG_INTER);
aoqi@0 960 %}
aoqi@0 961
aoqi@0 962 operand vecY() %{
aoqi@0 963 constraint(ALLOC_IN_RC(vectory_reg));
aoqi@0 964 match(VecY);
aoqi@0 965
aoqi@0 966 format %{ %}
aoqi@0 967 interface(REG_INTER);
aoqi@0 968 %}
aoqi@0 969
aoqi@0 970
aoqi@0 971 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
aoqi@0 972
aoqi@0 973 // ============================================================================
aoqi@0 974
aoqi@0 975 instruct ShouldNotReachHere() %{
aoqi@0 976 match(Halt);
aoqi@0 977 format %{ "int3\t# ShouldNotReachHere" %}
aoqi@0 978 ins_encode %{
aoqi@0 979 __ int3();
aoqi@0 980 %}
aoqi@0 981 ins_pipe(pipe_slow);
aoqi@0 982 %}
aoqi@0 983
aoqi@0 984 // ============================================================================
aoqi@0 985
aoqi@0 986 instruct addF_reg(regF dst, regF src) %{
aoqi@0 987 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 988 match(Set dst (AddF dst src));
aoqi@0 989
aoqi@0 990 format %{ "addss $dst, $src" %}
aoqi@0 991 ins_cost(150);
aoqi@0 992 ins_encode %{
aoqi@0 993 __ addss($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 994 %}
aoqi@0 995 ins_pipe(pipe_slow);
aoqi@0 996 %}
aoqi@0 997
aoqi@0 998 instruct addF_mem(regF dst, memory src) %{
aoqi@0 999 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1000 match(Set dst (AddF dst (LoadF src)));
aoqi@0 1001
aoqi@0 1002 format %{ "addss $dst, $src" %}
aoqi@0 1003 ins_cost(150);
aoqi@0 1004 ins_encode %{
aoqi@0 1005 __ addss($dst$$XMMRegister, $src$$Address);
aoqi@0 1006 %}
aoqi@0 1007 ins_pipe(pipe_slow);
aoqi@0 1008 %}
aoqi@0 1009
aoqi@0 1010 instruct addF_imm(regF dst, immF con) %{
aoqi@0 1011 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1012 match(Set dst (AddF dst con));
aoqi@0 1013 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
aoqi@0 1014 ins_cost(150);
aoqi@0 1015 ins_encode %{
aoqi@0 1016 __ addss($dst$$XMMRegister, $constantaddress($con));
aoqi@0 1017 %}
aoqi@0 1018 ins_pipe(pipe_slow);
aoqi@0 1019 %}
aoqi@0 1020
aoqi@0 1021 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
aoqi@0 1022 predicate(UseAVX > 0);
aoqi@0 1023 match(Set dst (AddF src1 src2));
aoqi@0 1024
aoqi@0 1025 format %{ "vaddss $dst, $src1, $src2" %}
aoqi@0 1026 ins_cost(150);
aoqi@0 1027 ins_encode %{
aoqi@0 1028 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
aoqi@0 1029 %}
aoqi@0 1030 ins_pipe(pipe_slow);
aoqi@0 1031 %}
aoqi@0 1032
aoqi@0 1033 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
aoqi@0 1034 predicate(UseAVX > 0);
aoqi@0 1035 match(Set dst (AddF src1 (LoadF src2)));
aoqi@0 1036
aoqi@0 1037 format %{ "vaddss $dst, $src1, $src2" %}
aoqi@0 1038 ins_cost(150);
aoqi@0 1039 ins_encode %{
aoqi@0 1040 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
aoqi@0 1041 %}
aoqi@0 1042 ins_pipe(pipe_slow);
aoqi@0 1043 %}
aoqi@0 1044
aoqi@0 1045 instruct addF_reg_imm(regF dst, regF src, immF con) %{
aoqi@0 1046 predicate(UseAVX > 0);
aoqi@0 1047 match(Set dst (AddF src con));
aoqi@0 1048
aoqi@0 1049 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
aoqi@0 1050 ins_cost(150);
aoqi@0 1051 ins_encode %{
aoqi@0 1052 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
aoqi@0 1053 %}
aoqi@0 1054 ins_pipe(pipe_slow);
aoqi@0 1055 %}
aoqi@0 1056
aoqi@0 1057 instruct addD_reg(regD dst, regD src) %{
aoqi@0 1058 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1059 match(Set dst (AddD dst src));
aoqi@0 1060
aoqi@0 1061 format %{ "addsd $dst, $src" %}
aoqi@0 1062 ins_cost(150);
aoqi@0 1063 ins_encode %{
aoqi@0 1064 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 1065 %}
aoqi@0 1066 ins_pipe(pipe_slow);
aoqi@0 1067 %}
aoqi@0 1068
aoqi@0 1069 instruct addD_mem(regD dst, memory src) %{
aoqi@0 1070 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1071 match(Set dst (AddD dst (LoadD src)));
aoqi@0 1072
aoqi@0 1073 format %{ "addsd $dst, $src" %}
aoqi@0 1074 ins_cost(150);
aoqi@0 1075 ins_encode %{
aoqi@0 1076 __ addsd($dst$$XMMRegister, $src$$Address);
aoqi@0 1077 %}
aoqi@0 1078 ins_pipe(pipe_slow);
aoqi@0 1079 %}
aoqi@0 1080
aoqi@0 1081 instruct addD_imm(regD dst, immD con) %{
aoqi@0 1082 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1083 match(Set dst (AddD dst con));
aoqi@0 1084 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
aoqi@0 1085 ins_cost(150);
aoqi@0 1086 ins_encode %{
aoqi@0 1087 __ addsd($dst$$XMMRegister, $constantaddress($con));
aoqi@0 1088 %}
aoqi@0 1089 ins_pipe(pipe_slow);
aoqi@0 1090 %}
aoqi@0 1091
aoqi@0 1092 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
aoqi@0 1093 predicate(UseAVX > 0);
aoqi@0 1094 match(Set dst (AddD src1 src2));
aoqi@0 1095
aoqi@0 1096 format %{ "vaddsd $dst, $src1, $src2" %}
aoqi@0 1097 ins_cost(150);
aoqi@0 1098 ins_encode %{
aoqi@0 1099 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
aoqi@0 1100 %}
aoqi@0 1101 ins_pipe(pipe_slow);
aoqi@0 1102 %}
aoqi@0 1103
aoqi@0 1104 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
aoqi@0 1105 predicate(UseAVX > 0);
aoqi@0 1106 match(Set dst (AddD src1 (LoadD src2)));
aoqi@0 1107
aoqi@0 1108 format %{ "vaddsd $dst, $src1, $src2" %}
aoqi@0 1109 ins_cost(150);
aoqi@0 1110 ins_encode %{
aoqi@0 1111 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
aoqi@0 1112 %}
aoqi@0 1113 ins_pipe(pipe_slow);
aoqi@0 1114 %}
aoqi@0 1115
aoqi@0 1116 instruct addD_reg_imm(regD dst, regD src, immD con) %{
aoqi@0 1117 predicate(UseAVX > 0);
aoqi@0 1118 match(Set dst (AddD src con));
aoqi@0 1119
aoqi@0 1120 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
aoqi@0 1121 ins_cost(150);
aoqi@0 1122 ins_encode %{
aoqi@0 1123 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
aoqi@0 1124 %}
aoqi@0 1125 ins_pipe(pipe_slow);
aoqi@0 1126 %}
aoqi@0 1127
aoqi@0 1128 instruct subF_reg(regF dst, regF src) %{
aoqi@0 1129 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1130 match(Set dst (SubF dst src));
aoqi@0 1131
aoqi@0 1132 format %{ "subss $dst, $src" %}
aoqi@0 1133 ins_cost(150);
aoqi@0 1134 ins_encode %{
aoqi@0 1135 __ subss($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 1136 %}
aoqi@0 1137 ins_pipe(pipe_slow);
aoqi@0 1138 %}
aoqi@0 1139
aoqi@0 1140 instruct subF_mem(regF dst, memory src) %{
aoqi@0 1141 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1142 match(Set dst (SubF dst (LoadF src)));
aoqi@0 1143
aoqi@0 1144 format %{ "subss $dst, $src" %}
aoqi@0 1145 ins_cost(150);
aoqi@0 1146 ins_encode %{
aoqi@0 1147 __ subss($dst$$XMMRegister, $src$$Address);
aoqi@0 1148 %}
aoqi@0 1149 ins_pipe(pipe_slow);
aoqi@0 1150 %}
aoqi@0 1151
aoqi@0 1152 instruct subF_imm(regF dst, immF con) %{
aoqi@0 1153 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1154 match(Set dst (SubF dst con));
aoqi@0 1155 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
aoqi@0 1156 ins_cost(150);
aoqi@0 1157 ins_encode %{
aoqi@0 1158 __ subss($dst$$XMMRegister, $constantaddress($con));
aoqi@0 1159 %}
aoqi@0 1160 ins_pipe(pipe_slow);
aoqi@0 1161 %}
aoqi@0 1162
aoqi@0 1163 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
aoqi@0 1164 predicate(UseAVX > 0);
aoqi@0 1165 match(Set dst (SubF src1 src2));
aoqi@0 1166
aoqi@0 1167 format %{ "vsubss $dst, $src1, $src2" %}
aoqi@0 1168 ins_cost(150);
aoqi@0 1169 ins_encode %{
aoqi@0 1170 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
aoqi@0 1171 %}
aoqi@0 1172 ins_pipe(pipe_slow);
aoqi@0 1173 %}
aoqi@0 1174
aoqi@0 1175 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
aoqi@0 1176 predicate(UseAVX > 0);
aoqi@0 1177 match(Set dst (SubF src1 (LoadF src2)));
aoqi@0 1178
aoqi@0 1179 format %{ "vsubss $dst, $src1, $src2" %}
aoqi@0 1180 ins_cost(150);
aoqi@0 1181 ins_encode %{
aoqi@0 1182 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
aoqi@0 1183 %}
aoqi@0 1184 ins_pipe(pipe_slow);
aoqi@0 1185 %}
aoqi@0 1186
aoqi@0 1187 instruct subF_reg_imm(regF dst, regF src, immF con) %{
aoqi@0 1188 predicate(UseAVX > 0);
aoqi@0 1189 match(Set dst (SubF src con));
aoqi@0 1190
aoqi@0 1191 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
aoqi@0 1192 ins_cost(150);
aoqi@0 1193 ins_encode %{
aoqi@0 1194 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
aoqi@0 1195 %}
aoqi@0 1196 ins_pipe(pipe_slow);
aoqi@0 1197 %}
aoqi@0 1198
aoqi@0 1199 instruct subD_reg(regD dst, regD src) %{
aoqi@0 1200 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1201 match(Set dst (SubD dst src));
aoqi@0 1202
aoqi@0 1203 format %{ "subsd $dst, $src" %}
aoqi@0 1204 ins_cost(150);
aoqi@0 1205 ins_encode %{
aoqi@0 1206 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 1207 %}
aoqi@0 1208 ins_pipe(pipe_slow);
aoqi@0 1209 %}
aoqi@0 1210
aoqi@0 1211 instruct subD_mem(regD dst, memory src) %{
aoqi@0 1212 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1213 match(Set dst (SubD dst (LoadD src)));
aoqi@0 1214
aoqi@0 1215 format %{ "subsd $dst, $src" %}
aoqi@0 1216 ins_cost(150);
aoqi@0 1217 ins_encode %{
aoqi@0 1218 __ subsd($dst$$XMMRegister, $src$$Address);
aoqi@0 1219 %}
aoqi@0 1220 ins_pipe(pipe_slow);
aoqi@0 1221 %}
aoqi@0 1222
aoqi@0 1223 instruct subD_imm(regD dst, immD con) %{
aoqi@0 1224 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1225 match(Set dst (SubD dst con));
aoqi@0 1226 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
aoqi@0 1227 ins_cost(150);
aoqi@0 1228 ins_encode %{
aoqi@0 1229 __ subsd($dst$$XMMRegister, $constantaddress($con));
aoqi@0 1230 %}
aoqi@0 1231 ins_pipe(pipe_slow);
aoqi@0 1232 %}
aoqi@0 1233
aoqi@0 1234 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
aoqi@0 1235 predicate(UseAVX > 0);
aoqi@0 1236 match(Set dst (SubD src1 src2));
aoqi@0 1237
aoqi@0 1238 format %{ "vsubsd $dst, $src1, $src2" %}
aoqi@0 1239 ins_cost(150);
aoqi@0 1240 ins_encode %{
aoqi@0 1241 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
aoqi@0 1242 %}
aoqi@0 1243 ins_pipe(pipe_slow);
aoqi@0 1244 %}
aoqi@0 1245
aoqi@0 1246 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
aoqi@0 1247 predicate(UseAVX > 0);
aoqi@0 1248 match(Set dst (SubD src1 (LoadD src2)));
aoqi@0 1249
aoqi@0 1250 format %{ "vsubsd $dst, $src1, $src2" %}
aoqi@0 1251 ins_cost(150);
aoqi@0 1252 ins_encode %{
aoqi@0 1253 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
aoqi@0 1254 %}
aoqi@0 1255 ins_pipe(pipe_slow);
aoqi@0 1256 %}
aoqi@0 1257
aoqi@0 1258 instruct subD_reg_imm(regD dst, regD src, immD con) %{
aoqi@0 1259 predicate(UseAVX > 0);
aoqi@0 1260 match(Set dst (SubD src con));
aoqi@0 1261
aoqi@0 1262 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
aoqi@0 1263 ins_cost(150);
aoqi@0 1264 ins_encode %{
aoqi@0 1265 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
aoqi@0 1266 %}
aoqi@0 1267 ins_pipe(pipe_slow);
aoqi@0 1268 %}
aoqi@0 1269
aoqi@0 1270 instruct mulF_reg(regF dst, regF src) %{
aoqi@0 1271 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1272 match(Set dst (MulF dst src));
aoqi@0 1273
aoqi@0 1274 format %{ "mulss $dst, $src" %}
aoqi@0 1275 ins_cost(150);
aoqi@0 1276 ins_encode %{
aoqi@0 1277 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 1278 %}
aoqi@0 1279 ins_pipe(pipe_slow);
aoqi@0 1280 %}
aoqi@0 1281
aoqi@0 1282 instruct mulF_mem(regF dst, memory src) %{
aoqi@0 1283 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1284 match(Set dst (MulF dst (LoadF src)));
aoqi@0 1285
aoqi@0 1286 format %{ "mulss $dst, $src" %}
aoqi@0 1287 ins_cost(150);
aoqi@0 1288 ins_encode %{
aoqi@0 1289 __ mulss($dst$$XMMRegister, $src$$Address);
aoqi@0 1290 %}
aoqi@0 1291 ins_pipe(pipe_slow);
aoqi@0 1292 %}
aoqi@0 1293
aoqi@0 1294 instruct mulF_imm(regF dst, immF con) %{
aoqi@0 1295 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1296 match(Set dst (MulF dst con));
aoqi@0 1297 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
aoqi@0 1298 ins_cost(150);
aoqi@0 1299 ins_encode %{
aoqi@0 1300 __ mulss($dst$$XMMRegister, $constantaddress($con));
aoqi@0 1301 %}
aoqi@0 1302 ins_pipe(pipe_slow);
aoqi@0 1303 %}
aoqi@0 1304
aoqi@0 1305 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
aoqi@0 1306 predicate(UseAVX > 0);
aoqi@0 1307 match(Set dst (MulF src1 src2));
aoqi@0 1308
aoqi@0 1309 format %{ "vmulss $dst, $src1, $src2" %}
aoqi@0 1310 ins_cost(150);
aoqi@0 1311 ins_encode %{
aoqi@0 1312 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
aoqi@0 1313 %}
aoqi@0 1314 ins_pipe(pipe_slow);
aoqi@0 1315 %}
aoqi@0 1316
aoqi@0 1317 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
aoqi@0 1318 predicate(UseAVX > 0);
aoqi@0 1319 match(Set dst (MulF src1 (LoadF src2)));
aoqi@0 1320
aoqi@0 1321 format %{ "vmulss $dst, $src1, $src2" %}
aoqi@0 1322 ins_cost(150);
aoqi@0 1323 ins_encode %{
aoqi@0 1324 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
aoqi@0 1325 %}
aoqi@0 1326 ins_pipe(pipe_slow);
aoqi@0 1327 %}
aoqi@0 1328
aoqi@0 1329 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
aoqi@0 1330 predicate(UseAVX > 0);
aoqi@0 1331 match(Set dst (MulF src con));
aoqi@0 1332
aoqi@0 1333 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
aoqi@0 1334 ins_cost(150);
aoqi@0 1335 ins_encode %{
aoqi@0 1336 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
aoqi@0 1337 %}
aoqi@0 1338 ins_pipe(pipe_slow);
aoqi@0 1339 %}
aoqi@0 1340
aoqi@0 1341 instruct mulD_reg(regD dst, regD src) %{
aoqi@0 1342 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1343 match(Set dst (MulD dst src));
aoqi@0 1344
aoqi@0 1345 format %{ "mulsd $dst, $src" %}
aoqi@0 1346 ins_cost(150);
aoqi@0 1347 ins_encode %{
aoqi@0 1348 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 1349 %}
aoqi@0 1350 ins_pipe(pipe_slow);
aoqi@0 1351 %}
aoqi@0 1352
aoqi@0 1353 instruct mulD_mem(regD dst, memory src) %{
aoqi@0 1354 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1355 match(Set dst (MulD dst (LoadD src)));
aoqi@0 1356
aoqi@0 1357 format %{ "mulsd $dst, $src" %}
aoqi@0 1358 ins_cost(150);
aoqi@0 1359 ins_encode %{
aoqi@0 1360 __ mulsd($dst$$XMMRegister, $src$$Address);
aoqi@0 1361 %}
aoqi@0 1362 ins_pipe(pipe_slow);
aoqi@0 1363 %}
aoqi@0 1364
aoqi@0 1365 instruct mulD_imm(regD dst, immD con) %{
aoqi@0 1366 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1367 match(Set dst (MulD dst con));
aoqi@0 1368 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
aoqi@0 1369 ins_cost(150);
aoqi@0 1370 ins_encode %{
aoqi@0 1371 __ mulsd($dst$$XMMRegister, $constantaddress($con));
aoqi@0 1372 %}
aoqi@0 1373 ins_pipe(pipe_slow);
aoqi@0 1374 %}
aoqi@0 1375
aoqi@0 1376 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
aoqi@0 1377 predicate(UseAVX > 0);
aoqi@0 1378 match(Set dst (MulD src1 src2));
aoqi@0 1379
aoqi@0 1380 format %{ "vmulsd $dst, $src1, $src2" %}
aoqi@0 1381 ins_cost(150);
aoqi@0 1382 ins_encode %{
aoqi@0 1383 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
aoqi@0 1384 %}
aoqi@0 1385 ins_pipe(pipe_slow);
aoqi@0 1386 %}
aoqi@0 1387
aoqi@0 1388 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
aoqi@0 1389 predicate(UseAVX > 0);
aoqi@0 1390 match(Set dst (MulD src1 (LoadD src2)));
aoqi@0 1391
aoqi@0 1392 format %{ "vmulsd $dst, $src1, $src2" %}
aoqi@0 1393 ins_cost(150);
aoqi@0 1394 ins_encode %{
aoqi@0 1395 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
aoqi@0 1396 %}
aoqi@0 1397 ins_pipe(pipe_slow);
aoqi@0 1398 %}
aoqi@0 1399
aoqi@0 1400 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
aoqi@0 1401 predicate(UseAVX > 0);
aoqi@0 1402 match(Set dst (MulD src con));
aoqi@0 1403
aoqi@0 1404 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
aoqi@0 1405 ins_cost(150);
aoqi@0 1406 ins_encode %{
aoqi@0 1407 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
aoqi@0 1408 %}
aoqi@0 1409 ins_pipe(pipe_slow);
aoqi@0 1410 %}
aoqi@0 1411
aoqi@0 1412 instruct divF_reg(regF dst, regF src) %{
aoqi@0 1413 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1414 match(Set dst (DivF dst src));
aoqi@0 1415
aoqi@0 1416 format %{ "divss $dst, $src" %}
aoqi@0 1417 ins_cost(150);
aoqi@0 1418 ins_encode %{
aoqi@0 1419 __ divss($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 1420 %}
aoqi@0 1421 ins_pipe(pipe_slow);
aoqi@0 1422 %}
aoqi@0 1423
aoqi@0 1424 instruct divF_mem(regF dst, memory src) %{
aoqi@0 1425 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1426 match(Set dst (DivF dst (LoadF src)));
aoqi@0 1427
aoqi@0 1428 format %{ "divss $dst, $src" %}
aoqi@0 1429 ins_cost(150);
aoqi@0 1430 ins_encode %{
aoqi@0 1431 __ divss($dst$$XMMRegister, $src$$Address);
aoqi@0 1432 %}
aoqi@0 1433 ins_pipe(pipe_slow);
aoqi@0 1434 %}
aoqi@0 1435
aoqi@0 1436 instruct divF_imm(regF dst, immF con) %{
aoqi@0 1437 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1438 match(Set dst (DivF dst con));
aoqi@0 1439 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
aoqi@0 1440 ins_cost(150);
aoqi@0 1441 ins_encode %{
aoqi@0 1442 __ divss($dst$$XMMRegister, $constantaddress($con));
aoqi@0 1443 %}
aoqi@0 1444 ins_pipe(pipe_slow);
aoqi@0 1445 %}
aoqi@0 1446
aoqi@0 1447 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
aoqi@0 1448 predicate(UseAVX > 0);
aoqi@0 1449 match(Set dst (DivF src1 src2));
aoqi@0 1450
aoqi@0 1451 format %{ "vdivss $dst, $src1, $src2" %}
aoqi@0 1452 ins_cost(150);
aoqi@0 1453 ins_encode %{
aoqi@0 1454 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
aoqi@0 1455 %}
aoqi@0 1456 ins_pipe(pipe_slow);
aoqi@0 1457 %}
aoqi@0 1458
aoqi@0 1459 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
aoqi@0 1460 predicate(UseAVX > 0);
aoqi@0 1461 match(Set dst (DivF src1 (LoadF src2)));
aoqi@0 1462
aoqi@0 1463 format %{ "vdivss $dst, $src1, $src2" %}
aoqi@0 1464 ins_cost(150);
aoqi@0 1465 ins_encode %{
aoqi@0 1466 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
aoqi@0 1467 %}
aoqi@0 1468 ins_pipe(pipe_slow);
aoqi@0 1469 %}
aoqi@0 1470
aoqi@0 1471 instruct divF_reg_imm(regF dst, regF src, immF con) %{
aoqi@0 1472 predicate(UseAVX > 0);
aoqi@0 1473 match(Set dst (DivF src con));
aoqi@0 1474
aoqi@0 1475 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
aoqi@0 1476 ins_cost(150);
aoqi@0 1477 ins_encode %{
aoqi@0 1478 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
aoqi@0 1479 %}
aoqi@0 1480 ins_pipe(pipe_slow);
aoqi@0 1481 %}
aoqi@0 1482
aoqi@0 1483 instruct divD_reg(regD dst, regD src) %{
aoqi@0 1484 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1485 match(Set dst (DivD dst src));
aoqi@0 1486
aoqi@0 1487 format %{ "divsd $dst, $src" %}
aoqi@0 1488 ins_cost(150);
aoqi@0 1489 ins_encode %{
aoqi@0 1490 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 1491 %}
aoqi@0 1492 ins_pipe(pipe_slow);
aoqi@0 1493 %}
aoqi@0 1494
aoqi@0 1495 instruct divD_mem(regD dst, memory src) %{
aoqi@0 1496 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1497 match(Set dst (DivD dst (LoadD src)));
aoqi@0 1498
aoqi@0 1499 format %{ "divsd $dst, $src" %}
aoqi@0 1500 ins_cost(150);
aoqi@0 1501 ins_encode %{
aoqi@0 1502 __ divsd($dst$$XMMRegister, $src$$Address);
aoqi@0 1503 %}
aoqi@0 1504 ins_pipe(pipe_slow);
aoqi@0 1505 %}
aoqi@0 1506
aoqi@0 1507 instruct divD_imm(regD dst, immD con) %{
aoqi@0 1508 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1509 match(Set dst (DivD dst con));
aoqi@0 1510 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
aoqi@0 1511 ins_cost(150);
aoqi@0 1512 ins_encode %{
aoqi@0 1513 __ divsd($dst$$XMMRegister, $constantaddress($con));
aoqi@0 1514 %}
aoqi@0 1515 ins_pipe(pipe_slow);
aoqi@0 1516 %}
aoqi@0 1517
aoqi@0 1518 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
aoqi@0 1519 predicate(UseAVX > 0);
aoqi@0 1520 match(Set dst (DivD src1 src2));
aoqi@0 1521
aoqi@0 1522 format %{ "vdivsd $dst, $src1, $src2" %}
aoqi@0 1523 ins_cost(150);
aoqi@0 1524 ins_encode %{
aoqi@0 1525 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
aoqi@0 1526 %}
aoqi@0 1527 ins_pipe(pipe_slow);
aoqi@0 1528 %}
aoqi@0 1529
aoqi@0 1530 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
aoqi@0 1531 predicate(UseAVX > 0);
aoqi@0 1532 match(Set dst (DivD src1 (LoadD src2)));
aoqi@0 1533
aoqi@0 1534 format %{ "vdivsd $dst, $src1, $src2" %}
aoqi@0 1535 ins_cost(150);
aoqi@0 1536 ins_encode %{
aoqi@0 1537 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
aoqi@0 1538 %}
aoqi@0 1539 ins_pipe(pipe_slow);
aoqi@0 1540 %}
aoqi@0 1541
aoqi@0 1542 instruct divD_reg_imm(regD dst, regD src, immD con) %{
aoqi@0 1543 predicate(UseAVX > 0);
aoqi@0 1544 match(Set dst (DivD src con));
aoqi@0 1545
aoqi@0 1546 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
aoqi@0 1547 ins_cost(150);
aoqi@0 1548 ins_encode %{
aoqi@0 1549 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
aoqi@0 1550 %}
aoqi@0 1551 ins_pipe(pipe_slow);
aoqi@0 1552 %}
aoqi@0 1553
aoqi@0 1554 instruct absF_reg(regF dst) %{
aoqi@0 1555 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1556 match(Set dst (AbsF dst));
aoqi@0 1557 ins_cost(150);
aoqi@0 1558 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
aoqi@0 1559 ins_encode %{
aoqi@0 1560 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
aoqi@0 1561 %}
aoqi@0 1562 ins_pipe(pipe_slow);
aoqi@0 1563 %}
aoqi@0 1564
aoqi@0 1565 instruct absF_reg_reg(regF dst, regF src) %{
aoqi@0 1566 predicate(UseAVX > 0);
aoqi@0 1567 match(Set dst (AbsF src));
aoqi@0 1568 ins_cost(150);
aoqi@0 1569 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
aoqi@0 1570 ins_encode %{
aoqi@0 1571 bool vector256 = false;
aoqi@0 1572 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
aoqi@0 1573 ExternalAddress(float_signmask()), vector256);
aoqi@0 1574 %}
aoqi@0 1575 ins_pipe(pipe_slow);
aoqi@0 1576 %}
aoqi@0 1577
aoqi@0 1578 instruct absD_reg(regD dst) %{
aoqi@0 1579 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1580 match(Set dst (AbsD dst));
aoqi@0 1581 ins_cost(150);
aoqi@0 1582 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
aoqi@0 1583 "# abs double by sign masking" %}
aoqi@0 1584 ins_encode %{
aoqi@0 1585 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
aoqi@0 1586 %}
aoqi@0 1587 ins_pipe(pipe_slow);
aoqi@0 1588 %}
aoqi@0 1589
aoqi@0 1590 instruct absD_reg_reg(regD dst, regD src) %{
aoqi@0 1591 predicate(UseAVX > 0);
aoqi@0 1592 match(Set dst (AbsD src));
aoqi@0 1593 ins_cost(150);
aoqi@0 1594 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
aoqi@0 1595 "# abs double by sign masking" %}
aoqi@0 1596 ins_encode %{
aoqi@0 1597 bool vector256 = false;
aoqi@0 1598 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
aoqi@0 1599 ExternalAddress(double_signmask()), vector256);
aoqi@0 1600 %}
aoqi@0 1601 ins_pipe(pipe_slow);
aoqi@0 1602 %}
aoqi@0 1603
aoqi@0 1604 instruct negF_reg(regF dst) %{
aoqi@0 1605 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1606 match(Set dst (NegF dst));
aoqi@0 1607 ins_cost(150);
aoqi@0 1608 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
aoqi@0 1609 ins_encode %{
aoqi@0 1610 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
aoqi@0 1611 %}
aoqi@0 1612 ins_pipe(pipe_slow);
aoqi@0 1613 %}
aoqi@0 1614
aoqi@0 1615 instruct negF_reg_reg(regF dst, regF src) %{
aoqi@0 1616 predicate(UseAVX > 0);
aoqi@0 1617 match(Set dst (NegF src));
aoqi@0 1618 ins_cost(150);
aoqi@0 1619 format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
aoqi@0 1620 ins_encode %{
aoqi@0 1621 bool vector256 = false;
aoqi@0 1622 __ vxorps($dst$$XMMRegister, $src$$XMMRegister,
aoqi@0 1623 ExternalAddress(float_signflip()), vector256);
aoqi@0 1624 %}
aoqi@0 1625 ins_pipe(pipe_slow);
aoqi@0 1626 %}
aoqi@0 1627
aoqi@0 1628 instruct negD_reg(regD dst) %{
aoqi@0 1629 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1630 match(Set dst (NegD dst));
aoqi@0 1631 ins_cost(150);
aoqi@0 1632 format %{ "xorpd $dst, [0x8000000000000000]\t"
aoqi@0 1633 "# neg double by sign flipping" %}
aoqi@0 1634 ins_encode %{
aoqi@0 1635 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
aoqi@0 1636 %}
aoqi@0 1637 ins_pipe(pipe_slow);
aoqi@0 1638 %}
aoqi@0 1639
aoqi@0 1640 instruct negD_reg_reg(regD dst, regD src) %{
aoqi@0 1641 predicate(UseAVX > 0);
aoqi@0 1642 match(Set dst (NegD src));
aoqi@0 1643 ins_cost(150);
aoqi@0 1644 format %{ "vxorpd $dst, $src, [0x8000000000000000]\t"
aoqi@0 1645 "# neg double by sign flipping" %}
aoqi@0 1646 ins_encode %{
aoqi@0 1647 bool vector256 = false;
aoqi@0 1648 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister,
aoqi@0 1649 ExternalAddress(double_signflip()), vector256);
aoqi@0 1650 %}
aoqi@0 1651 ins_pipe(pipe_slow);
aoqi@0 1652 %}
aoqi@0 1653
aoqi@0 1654 instruct sqrtF_reg(regF dst, regF src) %{
aoqi@0 1655 predicate(UseSSE>=1);
aoqi@0 1656 match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
aoqi@0 1657
aoqi@0 1658 format %{ "sqrtss $dst, $src" %}
aoqi@0 1659 ins_cost(150);
aoqi@0 1660 ins_encode %{
aoqi@0 1661 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 1662 %}
aoqi@0 1663 ins_pipe(pipe_slow);
aoqi@0 1664 %}
aoqi@0 1665
aoqi@0 1666 instruct sqrtF_mem(regF dst, memory src) %{
aoqi@0 1667 predicate(UseSSE>=1);
aoqi@0 1668 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
aoqi@0 1669
aoqi@0 1670 format %{ "sqrtss $dst, $src" %}
aoqi@0 1671 ins_cost(150);
aoqi@0 1672 ins_encode %{
aoqi@0 1673 __ sqrtss($dst$$XMMRegister, $src$$Address);
aoqi@0 1674 %}
aoqi@0 1675 ins_pipe(pipe_slow);
aoqi@0 1676 %}
aoqi@0 1677
aoqi@0 1678 instruct sqrtF_imm(regF dst, immF con) %{
aoqi@0 1679 predicate(UseSSE>=1);
aoqi@0 1680 match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
aoqi@0 1681 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
aoqi@0 1682 ins_cost(150);
aoqi@0 1683 ins_encode %{
aoqi@0 1684 __ sqrtss($dst$$XMMRegister, $constantaddress($con));
aoqi@0 1685 %}
aoqi@0 1686 ins_pipe(pipe_slow);
aoqi@0 1687 %}
aoqi@0 1688
aoqi@0 1689 instruct sqrtD_reg(regD dst, regD src) %{
aoqi@0 1690 predicate(UseSSE>=2);
aoqi@0 1691 match(Set dst (SqrtD src));
aoqi@0 1692
aoqi@0 1693 format %{ "sqrtsd $dst, $src" %}
aoqi@0 1694 ins_cost(150);
aoqi@0 1695 ins_encode %{
aoqi@0 1696 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 1697 %}
aoqi@0 1698 ins_pipe(pipe_slow);
aoqi@0 1699 %}
aoqi@0 1700
aoqi@0 1701 instruct sqrtD_mem(regD dst, memory src) %{
aoqi@0 1702 predicate(UseSSE>=2);
aoqi@0 1703 match(Set dst (SqrtD (LoadD src)));
aoqi@0 1704
aoqi@0 1705 format %{ "sqrtsd $dst, $src" %}
aoqi@0 1706 ins_cost(150);
aoqi@0 1707 ins_encode %{
aoqi@0 1708 __ sqrtsd($dst$$XMMRegister, $src$$Address);
aoqi@0 1709 %}
aoqi@0 1710 ins_pipe(pipe_slow);
aoqi@0 1711 %}
aoqi@0 1712
aoqi@0 1713 instruct sqrtD_imm(regD dst, immD con) %{
aoqi@0 1714 predicate(UseSSE>=2);
aoqi@0 1715 match(Set dst (SqrtD con));
aoqi@0 1716 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
aoqi@0 1717 ins_cost(150);
aoqi@0 1718 ins_encode %{
aoqi@0 1719 __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
aoqi@0 1720 %}
aoqi@0 1721 ins_pipe(pipe_slow);
aoqi@0 1722 %}
aoqi@0 1723
aoqi@0 1724
aoqi@0 1725 // ====================VECTOR INSTRUCTIONS=====================================
aoqi@0 1726
aoqi@0 1727 // Load vectors (4 bytes long)
aoqi@0 1728 instruct loadV4(vecS dst, memory mem) %{
aoqi@0 1729 predicate(n->as_LoadVector()->memory_size() == 4);
aoqi@0 1730 match(Set dst (LoadVector mem));
aoqi@0 1731 ins_cost(125);
aoqi@0 1732 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %}
aoqi@0 1733 ins_encode %{
aoqi@0 1734 __ movdl($dst$$XMMRegister, $mem$$Address);
aoqi@0 1735 %}
aoqi@0 1736 ins_pipe( pipe_slow );
aoqi@0 1737 %}
aoqi@0 1738
aoqi@0 1739 // Load vectors (8 bytes long)
aoqi@0 1740 instruct loadV8(vecD dst, memory mem) %{
aoqi@0 1741 predicate(n->as_LoadVector()->memory_size() == 8);
aoqi@0 1742 match(Set dst (LoadVector mem));
aoqi@0 1743 ins_cost(125);
aoqi@0 1744 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %}
aoqi@0 1745 ins_encode %{
aoqi@0 1746 __ movq($dst$$XMMRegister, $mem$$Address);
aoqi@0 1747 %}
aoqi@0 1748 ins_pipe( pipe_slow );
aoqi@0 1749 %}
aoqi@0 1750
aoqi@0 1751 // Load vectors (16 bytes long)
aoqi@0 1752 instruct loadV16(vecX dst, memory mem) %{
aoqi@0 1753 predicate(n->as_LoadVector()->memory_size() == 16);
aoqi@0 1754 match(Set dst (LoadVector mem));
aoqi@0 1755 ins_cost(125);
aoqi@0 1756 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %}
aoqi@0 1757 ins_encode %{
aoqi@0 1758 __ movdqu($dst$$XMMRegister, $mem$$Address);
aoqi@0 1759 %}
aoqi@0 1760 ins_pipe( pipe_slow );
aoqi@0 1761 %}
aoqi@0 1762
aoqi@0 1763 // Load vectors (32 bytes long)
aoqi@0 1764 instruct loadV32(vecY dst, memory mem) %{
aoqi@0 1765 predicate(n->as_LoadVector()->memory_size() == 32);
aoqi@0 1766 match(Set dst (LoadVector mem));
aoqi@0 1767 ins_cost(125);
aoqi@0 1768 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %}
aoqi@0 1769 ins_encode %{
aoqi@0 1770 __ vmovdqu($dst$$XMMRegister, $mem$$Address);
aoqi@0 1771 %}
aoqi@0 1772 ins_pipe( pipe_slow );
aoqi@0 1773 %}
aoqi@0 1774
aoqi@0 1775 // Store vectors
aoqi@0 1776 instruct storeV4(memory mem, vecS src) %{
aoqi@0 1777 predicate(n->as_StoreVector()->memory_size() == 4);
aoqi@0 1778 match(Set mem (StoreVector mem src));
aoqi@0 1779 ins_cost(145);
aoqi@0 1780 format %{ "movd $mem,$src\t! store vector (4 bytes)" %}
aoqi@0 1781 ins_encode %{
aoqi@0 1782 __ movdl($mem$$Address, $src$$XMMRegister);
aoqi@0 1783 %}
aoqi@0 1784 ins_pipe( pipe_slow );
aoqi@0 1785 %}
aoqi@0 1786
aoqi@0 1787 instruct storeV8(memory mem, vecD src) %{
aoqi@0 1788 predicate(n->as_StoreVector()->memory_size() == 8);
aoqi@0 1789 match(Set mem (StoreVector mem src));
aoqi@0 1790 ins_cost(145);
aoqi@0 1791 format %{ "movq $mem,$src\t! store vector (8 bytes)" %}
aoqi@0 1792 ins_encode %{
aoqi@0 1793 __ movq($mem$$Address, $src$$XMMRegister);
aoqi@0 1794 %}
aoqi@0 1795 ins_pipe( pipe_slow );
aoqi@0 1796 %}
aoqi@0 1797
aoqi@0 1798 instruct storeV16(memory mem, vecX src) %{
aoqi@0 1799 predicate(n->as_StoreVector()->memory_size() == 16);
aoqi@0 1800 match(Set mem (StoreVector mem src));
aoqi@0 1801 ins_cost(145);
aoqi@0 1802 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %}
aoqi@0 1803 ins_encode %{
aoqi@0 1804 __ movdqu($mem$$Address, $src$$XMMRegister);
aoqi@0 1805 %}
aoqi@0 1806 ins_pipe( pipe_slow );
aoqi@0 1807 %}
aoqi@0 1808
aoqi@0 1809 instruct storeV32(memory mem, vecY src) %{
aoqi@0 1810 predicate(n->as_StoreVector()->memory_size() == 32);
aoqi@0 1811 match(Set mem (StoreVector mem src));
aoqi@0 1812 ins_cost(145);
aoqi@0 1813 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %}
aoqi@0 1814 ins_encode %{
aoqi@0 1815 __ vmovdqu($mem$$Address, $src$$XMMRegister);
aoqi@0 1816 %}
aoqi@0 1817 ins_pipe( pipe_slow );
aoqi@0 1818 %}
aoqi@0 1819
aoqi@0 1820 // Replicate byte scalar to be vector
aoqi@0 1821 instruct Repl4B(vecS dst, rRegI src) %{
aoqi@0 1822 predicate(n->as_Vector()->length() == 4);
aoqi@0 1823 match(Set dst (ReplicateB src));
aoqi@0 1824 format %{ "movd $dst,$src\n\t"
aoqi@0 1825 "punpcklbw $dst,$dst\n\t"
aoqi@0 1826 "pshuflw $dst,$dst,0x00\t! replicate4B" %}
aoqi@0 1827 ins_encode %{
aoqi@0 1828 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 1829 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1830 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 1831 %}
aoqi@0 1832 ins_pipe( pipe_slow );
aoqi@0 1833 %}
aoqi@0 1834
aoqi@0 1835 instruct Repl8B(vecD dst, rRegI src) %{
aoqi@0 1836 predicate(n->as_Vector()->length() == 8);
aoqi@0 1837 match(Set dst (ReplicateB src));
aoqi@0 1838 format %{ "movd $dst,$src\n\t"
aoqi@0 1839 "punpcklbw $dst,$dst\n\t"
aoqi@0 1840 "pshuflw $dst,$dst,0x00\t! replicate8B" %}
aoqi@0 1841 ins_encode %{
aoqi@0 1842 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 1843 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1844 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 1845 %}
aoqi@0 1846 ins_pipe( pipe_slow );
aoqi@0 1847 %}
aoqi@0 1848
aoqi@0 1849 instruct Repl16B(vecX dst, rRegI src) %{
aoqi@0 1850 predicate(n->as_Vector()->length() == 16);
aoqi@0 1851 match(Set dst (ReplicateB src));
aoqi@0 1852 format %{ "movd $dst,$src\n\t"
aoqi@0 1853 "punpcklbw $dst,$dst\n\t"
aoqi@0 1854 "pshuflw $dst,$dst,0x00\n\t"
aoqi@0 1855 "punpcklqdq $dst,$dst\t! replicate16B" %}
aoqi@0 1856 ins_encode %{
aoqi@0 1857 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 1858 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1859 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 1860 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1861 %}
aoqi@0 1862 ins_pipe( pipe_slow );
aoqi@0 1863 %}
aoqi@0 1864
aoqi@0 1865 instruct Repl32B(vecY dst, rRegI src) %{
aoqi@0 1866 predicate(n->as_Vector()->length() == 32);
aoqi@0 1867 match(Set dst (ReplicateB src));
aoqi@0 1868 format %{ "movd $dst,$src\n\t"
aoqi@0 1869 "punpcklbw $dst,$dst\n\t"
aoqi@0 1870 "pshuflw $dst,$dst,0x00\n\t"
aoqi@0 1871 "punpcklqdq $dst,$dst\n\t"
aoqi@0 1872 "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
aoqi@0 1873 ins_encode %{
aoqi@0 1874 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 1875 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1876 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 1877 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1878 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1879 %}
aoqi@0 1880 ins_pipe( pipe_slow );
aoqi@0 1881 %}
aoqi@0 1882
aoqi@0 1883 // Replicate byte scalar immediate to be vector by loading from const table.
aoqi@0 1884 instruct Repl4B_imm(vecS dst, immI con) %{
aoqi@0 1885 predicate(n->as_Vector()->length() == 4);
aoqi@0 1886 match(Set dst (ReplicateB con));
aoqi@0 1887 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %}
aoqi@0 1888 ins_encode %{
aoqi@0 1889 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1)));
aoqi@0 1890 %}
aoqi@0 1891 ins_pipe( pipe_slow );
aoqi@0 1892 %}
aoqi@0 1893
aoqi@0 1894 instruct Repl8B_imm(vecD dst, immI con) %{
aoqi@0 1895 predicate(n->as_Vector()->length() == 8);
aoqi@0 1896 match(Set dst (ReplicateB con));
aoqi@0 1897 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %}
aoqi@0 1898 ins_encode %{
aoqi@0 1899 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
aoqi@0 1900 %}
aoqi@0 1901 ins_pipe( pipe_slow );
aoqi@0 1902 %}
aoqi@0 1903
aoqi@0 1904 instruct Repl16B_imm(vecX dst, immI con) %{
aoqi@0 1905 predicate(n->as_Vector()->length() == 16);
aoqi@0 1906 match(Set dst (ReplicateB con));
aoqi@0 1907 format %{ "movq $dst,[$constantaddress]\n\t"
aoqi@0 1908 "punpcklqdq $dst,$dst\t! replicate16B($con)" %}
aoqi@0 1909 ins_encode %{
aoqi@0 1910 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
aoqi@0 1911 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1912 %}
aoqi@0 1913 ins_pipe( pipe_slow );
aoqi@0 1914 %}
aoqi@0 1915
aoqi@0 1916 instruct Repl32B_imm(vecY dst, immI con) %{
aoqi@0 1917 predicate(n->as_Vector()->length() == 32);
aoqi@0 1918 match(Set dst (ReplicateB con));
aoqi@0 1919 format %{ "movq $dst,[$constantaddress]\n\t"
aoqi@0 1920 "punpcklqdq $dst,$dst\n\t"
aoqi@0 1921 "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %}
aoqi@0 1922 ins_encode %{
aoqi@0 1923 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
aoqi@0 1924 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1925 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1926 %}
aoqi@0 1927 ins_pipe( pipe_slow );
aoqi@0 1928 %}
aoqi@0 1929
aoqi@0 1930 // Replicate byte scalar zero to be vector
aoqi@0 1931 instruct Repl4B_zero(vecS dst, immI0 zero) %{
aoqi@0 1932 predicate(n->as_Vector()->length() == 4);
aoqi@0 1933 match(Set dst (ReplicateB zero));
aoqi@0 1934 format %{ "pxor $dst,$dst\t! replicate4B zero" %}
aoqi@0 1935 ins_encode %{
aoqi@0 1936 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1937 %}
aoqi@0 1938 ins_pipe( fpu_reg_reg );
aoqi@0 1939 %}
aoqi@0 1940
aoqi@0 1941 instruct Repl8B_zero(vecD dst, immI0 zero) %{
aoqi@0 1942 predicate(n->as_Vector()->length() == 8);
aoqi@0 1943 match(Set dst (ReplicateB zero));
aoqi@0 1944 format %{ "pxor $dst,$dst\t! replicate8B zero" %}
aoqi@0 1945 ins_encode %{
aoqi@0 1946 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1947 %}
aoqi@0 1948 ins_pipe( fpu_reg_reg );
aoqi@0 1949 %}
aoqi@0 1950
aoqi@0 1951 instruct Repl16B_zero(vecX dst, immI0 zero) %{
aoqi@0 1952 predicate(n->as_Vector()->length() == 16);
aoqi@0 1953 match(Set dst (ReplicateB zero));
aoqi@0 1954 format %{ "pxor $dst,$dst\t! replicate16B zero" %}
aoqi@0 1955 ins_encode %{
aoqi@0 1956 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1957 %}
aoqi@0 1958 ins_pipe( fpu_reg_reg );
aoqi@0 1959 %}
aoqi@0 1960
aoqi@0 1961 instruct Repl32B_zero(vecY dst, immI0 zero) %{
aoqi@0 1962 predicate(n->as_Vector()->length() == 32);
aoqi@0 1963 match(Set dst (ReplicateB zero));
aoqi@0 1964 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %}
aoqi@0 1965 ins_encode %{
aoqi@0 1966 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
aoqi@0 1967 bool vector256 = true;
aoqi@0 1968 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
aoqi@0 1969 %}
aoqi@0 1970 ins_pipe( fpu_reg_reg );
aoqi@0 1971 %}
aoqi@0 1972
aoqi@0 1973 // Replicate char/short (2 byte) scalar to be vector
aoqi@0 1974 instruct Repl2S(vecS dst, rRegI src) %{
aoqi@0 1975 predicate(n->as_Vector()->length() == 2);
aoqi@0 1976 match(Set dst (ReplicateS src));
aoqi@0 1977 format %{ "movd $dst,$src\n\t"
aoqi@0 1978 "pshuflw $dst,$dst,0x00\t! replicate2S" %}
aoqi@0 1979 ins_encode %{
aoqi@0 1980 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 1981 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 1982 %}
aoqi@0 1983 ins_pipe( fpu_reg_reg );
aoqi@0 1984 %}
aoqi@0 1985
aoqi@0 1986 instruct Repl4S(vecD dst, rRegI src) %{
aoqi@0 1987 predicate(n->as_Vector()->length() == 4);
aoqi@0 1988 match(Set dst (ReplicateS src));
aoqi@0 1989 format %{ "movd $dst,$src\n\t"
aoqi@0 1990 "pshuflw $dst,$dst,0x00\t! replicate4S" %}
aoqi@0 1991 ins_encode %{
aoqi@0 1992 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 1993 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 1994 %}
aoqi@0 1995 ins_pipe( fpu_reg_reg );
aoqi@0 1996 %}
aoqi@0 1997
aoqi@0 1998 instruct Repl8S(vecX dst, rRegI src) %{
aoqi@0 1999 predicate(n->as_Vector()->length() == 8);
aoqi@0 2000 match(Set dst (ReplicateS src));
aoqi@0 2001 format %{ "movd $dst,$src\n\t"
aoqi@0 2002 "pshuflw $dst,$dst,0x00\n\t"
aoqi@0 2003 "punpcklqdq $dst,$dst\t! replicate8S" %}
aoqi@0 2004 ins_encode %{
aoqi@0 2005 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 2006 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 2007 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2008 %}
aoqi@0 2009 ins_pipe( pipe_slow );
aoqi@0 2010 %}
aoqi@0 2011
aoqi@0 2012 instruct Repl16S(vecY dst, rRegI src) %{
aoqi@0 2013 predicate(n->as_Vector()->length() == 16);
aoqi@0 2014 match(Set dst (ReplicateS src));
aoqi@0 2015 format %{ "movd $dst,$src\n\t"
aoqi@0 2016 "pshuflw $dst,$dst,0x00\n\t"
aoqi@0 2017 "punpcklqdq $dst,$dst\n\t"
aoqi@0 2018 "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
aoqi@0 2019 ins_encode %{
aoqi@0 2020 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 2021 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 2022 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2023 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2024 %}
aoqi@0 2025 ins_pipe( pipe_slow );
aoqi@0 2026 %}
aoqi@0 2027
aoqi@0 2028 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table.
aoqi@0 2029 instruct Repl2S_imm(vecS dst, immI con) %{
aoqi@0 2030 predicate(n->as_Vector()->length() == 2);
aoqi@0 2031 match(Set dst (ReplicateS con));
aoqi@0 2032 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %}
aoqi@0 2033 ins_encode %{
aoqi@0 2034 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2)));
aoqi@0 2035 %}
aoqi@0 2036 ins_pipe( fpu_reg_reg );
aoqi@0 2037 %}
aoqi@0 2038
aoqi@0 2039 instruct Repl4S_imm(vecD dst, immI con) %{
aoqi@0 2040 predicate(n->as_Vector()->length() == 4);
aoqi@0 2041 match(Set dst (ReplicateS con));
aoqi@0 2042 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %}
aoqi@0 2043 ins_encode %{
aoqi@0 2044 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
aoqi@0 2045 %}
aoqi@0 2046 ins_pipe( fpu_reg_reg );
aoqi@0 2047 %}
aoqi@0 2048
aoqi@0 2049 instruct Repl8S_imm(vecX dst, immI con) %{
aoqi@0 2050 predicate(n->as_Vector()->length() == 8);
aoqi@0 2051 match(Set dst (ReplicateS con));
aoqi@0 2052 format %{ "movq $dst,[$constantaddress]\n\t"
aoqi@0 2053 "punpcklqdq $dst,$dst\t! replicate8S($con)" %}
aoqi@0 2054 ins_encode %{
aoqi@0 2055 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
aoqi@0 2056 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2057 %}
aoqi@0 2058 ins_pipe( pipe_slow );
aoqi@0 2059 %}
aoqi@0 2060
aoqi@0 2061 instruct Repl16S_imm(vecY dst, immI con) %{
aoqi@0 2062 predicate(n->as_Vector()->length() == 16);
aoqi@0 2063 match(Set dst (ReplicateS con));
aoqi@0 2064 format %{ "movq $dst,[$constantaddress]\n\t"
aoqi@0 2065 "punpcklqdq $dst,$dst\n\t"
aoqi@0 2066 "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %}
aoqi@0 2067 ins_encode %{
aoqi@0 2068 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
aoqi@0 2069 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2070 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2071 %}
aoqi@0 2072 ins_pipe( pipe_slow );
aoqi@0 2073 %}
aoqi@0 2074
aoqi@0 2075 // Replicate char/short (2 byte) scalar zero to be vector
aoqi@0 2076 instruct Repl2S_zero(vecS dst, immI0 zero) %{
aoqi@0 2077 predicate(n->as_Vector()->length() == 2);
aoqi@0 2078 match(Set dst (ReplicateS zero));
aoqi@0 2079 format %{ "pxor $dst,$dst\t! replicate2S zero" %}
aoqi@0 2080 ins_encode %{
aoqi@0 2081 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2082 %}
aoqi@0 2083 ins_pipe( fpu_reg_reg );
aoqi@0 2084 %}
aoqi@0 2085
aoqi@0 2086 instruct Repl4S_zero(vecD dst, immI0 zero) %{
aoqi@0 2087 predicate(n->as_Vector()->length() == 4);
aoqi@0 2088 match(Set dst (ReplicateS zero));
aoqi@0 2089 format %{ "pxor $dst,$dst\t! replicate4S zero" %}
aoqi@0 2090 ins_encode %{
aoqi@0 2091 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2092 %}
aoqi@0 2093 ins_pipe( fpu_reg_reg );
aoqi@0 2094 %}
aoqi@0 2095
aoqi@0 2096 instruct Repl8S_zero(vecX dst, immI0 zero) %{
aoqi@0 2097 predicate(n->as_Vector()->length() == 8);
aoqi@0 2098 match(Set dst (ReplicateS zero));
aoqi@0 2099 format %{ "pxor $dst,$dst\t! replicate8S zero" %}
aoqi@0 2100 ins_encode %{
aoqi@0 2101 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2102 %}
aoqi@0 2103 ins_pipe( fpu_reg_reg );
aoqi@0 2104 %}
aoqi@0 2105
aoqi@0 2106 instruct Repl16S_zero(vecY dst, immI0 zero) %{
aoqi@0 2107 predicate(n->as_Vector()->length() == 16);
aoqi@0 2108 match(Set dst (ReplicateS zero));
aoqi@0 2109 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %}
aoqi@0 2110 ins_encode %{
aoqi@0 2111 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
aoqi@0 2112 bool vector256 = true;
aoqi@0 2113 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
aoqi@0 2114 %}
aoqi@0 2115 ins_pipe( fpu_reg_reg );
aoqi@0 2116 %}
aoqi@0 2117
aoqi@0 2118 // Replicate integer (4 byte) scalar to be vector
aoqi@0 2119 instruct Repl2I(vecD dst, rRegI src) %{
aoqi@0 2120 predicate(n->as_Vector()->length() == 2);
aoqi@0 2121 match(Set dst (ReplicateI src));
aoqi@0 2122 format %{ "movd $dst,$src\n\t"
aoqi@0 2123 "pshufd $dst,$dst,0x00\t! replicate2I" %}
aoqi@0 2124 ins_encode %{
aoqi@0 2125 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 2126 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 2127 %}
aoqi@0 2128 ins_pipe( fpu_reg_reg );
aoqi@0 2129 %}
aoqi@0 2130
aoqi@0 2131 instruct Repl4I(vecX dst, rRegI src) %{
aoqi@0 2132 predicate(n->as_Vector()->length() == 4);
aoqi@0 2133 match(Set dst (ReplicateI src));
aoqi@0 2134 format %{ "movd $dst,$src\n\t"
aoqi@0 2135 "pshufd $dst,$dst,0x00\t! replicate4I" %}
aoqi@0 2136 ins_encode %{
aoqi@0 2137 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 2138 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 2139 %}
aoqi@0 2140 ins_pipe( pipe_slow );
aoqi@0 2141 %}
aoqi@0 2142
aoqi@0 2143 instruct Repl8I(vecY dst, rRegI src) %{
aoqi@0 2144 predicate(n->as_Vector()->length() == 8);
aoqi@0 2145 match(Set dst (ReplicateI src));
aoqi@0 2146 format %{ "movd $dst,$src\n\t"
aoqi@0 2147 "pshufd $dst,$dst,0x00\n\t"
aoqi@0 2148 "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
aoqi@0 2149 ins_encode %{
aoqi@0 2150 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 2151 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 2152 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2153 %}
aoqi@0 2154 ins_pipe( pipe_slow );
aoqi@0 2155 %}
aoqi@0 2156
aoqi@0 2157 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
aoqi@0 2158 instruct Repl2I_imm(vecD dst, immI con) %{
aoqi@0 2159 predicate(n->as_Vector()->length() == 2);
aoqi@0 2160 match(Set dst (ReplicateI con));
aoqi@0 2161 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %}
aoqi@0 2162 ins_encode %{
aoqi@0 2163 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
aoqi@0 2164 %}
aoqi@0 2165 ins_pipe( fpu_reg_reg );
aoqi@0 2166 %}
aoqi@0 2167
aoqi@0 2168 instruct Repl4I_imm(vecX dst, immI con) %{
aoqi@0 2169 predicate(n->as_Vector()->length() == 4);
aoqi@0 2170 match(Set dst (ReplicateI con));
aoqi@0 2171 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t"
aoqi@0 2172 "punpcklqdq $dst,$dst" %}
aoqi@0 2173 ins_encode %{
aoqi@0 2174 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
aoqi@0 2175 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2176 %}
aoqi@0 2177 ins_pipe( pipe_slow );
aoqi@0 2178 %}
aoqi@0 2179
aoqi@0 2180 instruct Repl8I_imm(vecY dst, immI con) %{
aoqi@0 2181 predicate(n->as_Vector()->length() == 8);
aoqi@0 2182 match(Set dst (ReplicateI con));
aoqi@0 2183 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t"
aoqi@0 2184 "punpcklqdq $dst,$dst\n\t"
aoqi@0 2185 "vinserti128h $dst,$dst,$dst" %}
aoqi@0 2186 ins_encode %{
aoqi@0 2187 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
aoqi@0 2188 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2189 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2190 %}
aoqi@0 2191 ins_pipe( pipe_slow );
aoqi@0 2192 %}
aoqi@0 2193
aoqi@0 2194 // Integer could be loaded into xmm register directly from memory.
aoqi@0 2195 instruct Repl2I_mem(vecD dst, memory mem) %{
aoqi@0 2196 predicate(n->as_Vector()->length() == 2);
aoqi@0 2197 match(Set dst (ReplicateI (LoadI mem)));
aoqi@0 2198 format %{ "movd $dst,$mem\n\t"
aoqi@0 2199 "pshufd $dst,$dst,0x00\t! replicate2I" %}
aoqi@0 2200 ins_encode %{
aoqi@0 2201 __ movdl($dst$$XMMRegister, $mem$$Address);
aoqi@0 2202 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 2203 %}
aoqi@0 2204 ins_pipe( fpu_reg_reg );
aoqi@0 2205 %}
aoqi@0 2206
aoqi@0 2207 instruct Repl4I_mem(vecX dst, memory mem) %{
aoqi@0 2208 predicate(n->as_Vector()->length() == 4);
aoqi@0 2209 match(Set dst (ReplicateI (LoadI mem)));
aoqi@0 2210 format %{ "movd $dst,$mem\n\t"
aoqi@0 2211 "pshufd $dst,$dst,0x00\t! replicate4I" %}
aoqi@0 2212 ins_encode %{
aoqi@0 2213 __ movdl($dst$$XMMRegister, $mem$$Address);
aoqi@0 2214 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 2215 %}
aoqi@0 2216 ins_pipe( pipe_slow );
aoqi@0 2217 %}
aoqi@0 2218
aoqi@0 2219 instruct Repl8I_mem(vecY dst, memory mem) %{
aoqi@0 2220 predicate(n->as_Vector()->length() == 8);
aoqi@0 2221 match(Set dst (ReplicateI (LoadI mem)));
aoqi@0 2222 format %{ "movd $dst,$mem\n\t"
aoqi@0 2223 "pshufd $dst,$dst,0x00\n\t"
aoqi@0 2224 "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
aoqi@0 2225 ins_encode %{
aoqi@0 2226 __ movdl($dst$$XMMRegister, $mem$$Address);
aoqi@0 2227 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 2228 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2229 %}
aoqi@0 2230 ins_pipe( pipe_slow );
aoqi@0 2231 %}
aoqi@0 2232
aoqi@0 2233 // Replicate integer (4 byte) scalar zero to be vector
aoqi@0 2234 instruct Repl2I_zero(vecD dst, immI0 zero) %{
aoqi@0 2235 predicate(n->as_Vector()->length() == 2);
aoqi@0 2236 match(Set dst (ReplicateI zero));
aoqi@0 2237 format %{ "pxor $dst,$dst\t! replicate2I" %}
aoqi@0 2238 ins_encode %{
aoqi@0 2239 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2240 %}
aoqi@0 2241 ins_pipe( fpu_reg_reg );
aoqi@0 2242 %}
aoqi@0 2243
aoqi@0 2244 instruct Repl4I_zero(vecX dst, immI0 zero) %{
aoqi@0 2245 predicate(n->as_Vector()->length() == 4);
aoqi@0 2246 match(Set dst (ReplicateI zero));
aoqi@0 2247 format %{ "pxor $dst,$dst\t! replicate4I zero)" %}
aoqi@0 2248 ins_encode %{
aoqi@0 2249 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2250 %}
aoqi@0 2251 ins_pipe( fpu_reg_reg );
aoqi@0 2252 %}
aoqi@0 2253
aoqi@0 2254 instruct Repl8I_zero(vecY dst, immI0 zero) %{
aoqi@0 2255 predicate(n->as_Vector()->length() == 8);
aoqi@0 2256 match(Set dst (ReplicateI zero));
aoqi@0 2257 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %}
aoqi@0 2258 ins_encode %{
aoqi@0 2259 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
aoqi@0 2260 bool vector256 = true;
aoqi@0 2261 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
aoqi@0 2262 %}
aoqi@0 2263 ins_pipe( fpu_reg_reg );
aoqi@0 2264 %}
aoqi@0 2265
aoqi@0 2266 // Replicate long (8 byte) scalar to be vector
aoqi@0 2267 #ifdef _LP64
aoqi@0 2268 instruct Repl2L(vecX dst, rRegL src) %{
aoqi@0 2269 predicate(n->as_Vector()->length() == 2);
aoqi@0 2270 match(Set dst (ReplicateL src));
aoqi@0 2271 format %{ "movdq $dst,$src\n\t"
aoqi@0 2272 "punpcklqdq $dst,$dst\t! replicate2L" %}
aoqi@0 2273 ins_encode %{
aoqi@0 2274 __ movdq($dst$$XMMRegister, $src$$Register);
aoqi@0 2275 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2276 %}
aoqi@0 2277 ins_pipe( pipe_slow );
aoqi@0 2278 %}
aoqi@0 2279
aoqi@0 2280 instruct Repl4L(vecY dst, rRegL src) %{
aoqi@0 2281 predicate(n->as_Vector()->length() == 4);
aoqi@0 2282 match(Set dst (ReplicateL src));
aoqi@0 2283 format %{ "movdq $dst,$src\n\t"
aoqi@0 2284 "punpcklqdq $dst,$dst\n\t"
aoqi@0 2285 "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
aoqi@0 2286 ins_encode %{
aoqi@0 2287 __ movdq($dst$$XMMRegister, $src$$Register);
aoqi@0 2288 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2289 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2290 %}
aoqi@0 2291 ins_pipe( pipe_slow );
aoqi@0 2292 %}
aoqi@0 2293 #else // _LP64
aoqi@0 2294 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{
aoqi@0 2295 predicate(n->as_Vector()->length() == 2);
aoqi@0 2296 match(Set dst (ReplicateL src));
aoqi@0 2297 effect(TEMP dst, USE src, TEMP tmp);
aoqi@0 2298 format %{ "movdl $dst,$src.lo\n\t"
aoqi@0 2299 "movdl $tmp,$src.hi\n\t"
aoqi@0 2300 "punpckldq $dst,$tmp\n\t"
aoqi@0 2301 "punpcklqdq $dst,$dst\t! replicate2L"%}
aoqi@0 2302 ins_encode %{
aoqi@0 2303 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 2304 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
aoqi@0 2305 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
aoqi@0 2306 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2307 %}
aoqi@0 2308 ins_pipe( pipe_slow );
aoqi@0 2309 %}
aoqi@0 2310
aoqi@0 2311 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
aoqi@0 2312 predicate(n->as_Vector()->length() == 4);
aoqi@0 2313 match(Set dst (ReplicateL src));
aoqi@0 2314 effect(TEMP dst, USE src, TEMP tmp);
aoqi@0 2315 format %{ "movdl $dst,$src.lo\n\t"
aoqi@0 2316 "movdl $tmp,$src.hi\n\t"
aoqi@0 2317 "punpckldq $dst,$tmp\n\t"
aoqi@0 2318 "punpcklqdq $dst,$dst\n\t"
aoqi@0 2319 "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
aoqi@0 2320 ins_encode %{
aoqi@0 2321 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 2322 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
aoqi@0 2323 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
aoqi@0 2324 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2325 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2326 %}
aoqi@0 2327 ins_pipe( pipe_slow );
aoqi@0 2328 %}
aoqi@0 2329 #endif // _LP64
aoqi@0 2330
aoqi@0 2331 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
aoqi@0 2332 instruct Repl2L_imm(vecX dst, immL con) %{
aoqi@0 2333 predicate(n->as_Vector()->length() == 2);
aoqi@0 2334 match(Set dst (ReplicateL con));
aoqi@0 2335 format %{ "movq $dst,[$constantaddress]\n\t"
aoqi@0 2336 "punpcklqdq $dst,$dst\t! replicate2L($con)" %}
aoqi@0 2337 ins_encode %{
aoqi@0 2338 __ movq($dst$$XMMRegister, $constantaddress($con));
aoqi@0 2339 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2340 %}
aoqi@0 2341 ins_pipe( pipe_slow );
aoqi@0 2342 %}
aoqi@0 2343
aoqi@0 2344 instruct Repl4L_imm(vecY dst, immL con) %{
aoqi@0 2345 predicate(n->as_Vector()->length() == 4);
aoqi@0 2346 match(Set dst (ReplicateL con));
aoqi@0 2347 format %{ "movq $dst,[$constantaddress]\n\t"
aoqi@0 2348 "punpcklqdq $dst,$dst\n\t"
aoqi@0 2349 "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %}
aoqi@0 2350 ins_encode %{
aoqi@0 2351 __ movq($dst$$XMMRegister, $constantaddress($con));
aoqi@0 2352 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2353 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2354 %}
aoqi@0 2355 ins_pipe( pipe_slow );
aoqi@0 2356 %}
aoqi@0 2357
aoqi@0 2358 // Long could be loaded into xmm register directly from memory.
aoqi@0 2359 instruct Repl2L_mem(vecX dst, memory mem) %{
aoqi@0 2360 predicate(n->as_Vector()->length() == 2);
aoqi@0 2361 match(Set dst (ReplicateL (LoadL mem)));
aoqi@0 2362 format %{ "movq $dst,$mem\n\t"
aoqi@0 2363 "punpcklqdq $dst,$dst\t! replicate2L" %}
aoqi@0 2364 ins_encode %{
aoqi@0 2365 __ movq($dst$$XMMRegister, $mem$$Address);
aoqi@0 2366 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2367 %}
aoqi@0 2368 ins_pipe( pipe_slow );
aoqi@0 2369 %}
aoqi@0 2370
aoqi@0 2371 instruct Repl4L_mem(vecY dst, memory mem) %{
aoqi@0 2372 predicate(n->as_Vector()->length() == 4);
aoqi@0 2373 match(Set dst (ReplicateL (LoadL mem)));
aoqi@0 2374 format %{ "movq $dst,$mem\n\t"
aoqi@0 2375 "punpcklqdq $dst,$dst\n\t"
aoqi@0 2376 "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
aoqi@0 2377 ins_encode %{
aoqi@0 2378 __ movq($dst$$XMMRegister, $mem$$Address);
aoqi@0 2379 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2380 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2381 %}
aoqi@0 2382 ins_pipe( pipe_slow );
aoqi@0 2383 %}
aoqi@0 2384
aoqi@0 2385 // Replicate long (8 byte) scalar zero to be vector
aoqi@0 2386 instruct Repl2L_zero(vecX dst, immL0 zero) %{
aoqi@0 2387 predicate(n->as_Vector()->length() == 2);
aoqi@0 2388 match(Set dst (ReplicateL zero));
aoqi@0 2389 format %{ "pxor $dst,$dst\t! replicate2L zero" %}
aoqi@0 2390 ins_encode %{
aoqi@0 2391 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2392 %}
aoqi@0 2393 ins_pipe( fpu_reg_reg );
aoqi@0 2394 %}
aoqi@0 2395
aoqi@0 2396 instruct Repl4L_zero(vecY dst, immL0 zero) %{
aoqi@0 2397 predicate(n->as_Vector()->length() == 4);
aoqi@0 2398 match(Set dst (ReplicateL zero));
aoqi@0 2399 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %}
aoqi@0 2400 ins_encode %{
aoqi@0 2401 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
aoqi@0 2402 bool vector256 = true;
aoqi@0 2403 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
aoqi@0 2404 %}
aoqi@0 2405 ins_pipe( fpu_reg_reg );
aoqi@0 2406 %}
aoqi@0 2407
aoqi@0 2408 // Replicate float (4 byte) scalar to be vector
aoqi@0 2409 instruct Repl2F(vecD dst, regF src) %{
aoqi@0 2410 predicate(n->as_Vector()->length() == 2);
aoqi@0 2411 match(Set dst (ReplicateF src));
aoqi@0 2412 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %}
aoqi@0 2413 ins_encode %{
aoqi@0 2414 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
aoqi@0 2415 %}
aoqi@0 2416 ins_pipe( fpu_reg_reg );
aoqi@0 2417 %}
aoqi@0 2418
aoqi@0 2419 instruct Repl4F(vecX dst, regF src) %{
aoqi@0 2420 predicate(n->as_Vector()->length() == 4);
aoqi@0 2421 match(Set dst (ReplicateF src));
aoqi@0 2422 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %}
aoqi@0 2423 ins_encode %{
aoqi@0 2424 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
aoqi@0 2425 %}
aoqi@0 2426 ins_pipe( pipe_slow );
aoqi@0 2427 %}
aoqi@0 2428
aoqi@0 2429 instruct Repl8F(vecY dst, regF src) %{
aoqi@0 2430 predicate(n->as_Vector()->length() == 8);
aoqi@0 2431 match(Set dst (ReplicateF src));
aoqi@0 2432 format %{ "pshufd $dst,$src,0x00\n\t"
aoqi@0 2433 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
aoqi@0 2434 ins_encode %{
aoqi@0 2435 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
aoqi@0 2436 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2437 %}
aoqi@0 2438 ins_pipe( pipe_slow );
aoqi@0 2439 %}
aoqi@0 2440
aoqi@0 2441 // Replicate float (4 byte) scalar zero to be vector
aoqi@0 2442 instruct Repl2F_zero(vecD dst, immF0 zero) %{
aoqi@0 2443 predicate(n->as_Vector()->length() == 2);
aoqi@0 2444 match(Set dst (ReplicateF zero));
aoqi@0 2445 format %{ "xorps $dst,$dst\t! replicate2F zero" %}
aoqi@0 2446 ins_encode %{
aoqi@0 2447 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2448 %}
aoqi@0 2449 ins_pipe( fpu_reg_reg );
aoqi@0 2450 %}
aoqi@0 2451
aoqi@0 2452 instruct Repl4F_zero(vecX dst, immF0 zero) %{
aoqi@0 2453 predicate(n->as_Vector()->length() == 4);
aoqi@0 2454 match(Set dst (ReplicateF zero));
aoqi@0 2455 format %{ "xorps $dst,$dst\t! replicate4F zero" %}
aoqi@0 2456 ins_encode %{
aoqi@0 2457 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2458 %}
aoqi@0 2459 ins_pipe( fpu_reg_reg );
aoqi@0 2460 %}
aoqi@0 2461
aoqi@0 2462 instruct Repl8F_zero(vecY dst, immF0 zero) %{
aoqi@0 2463 predicate(n->as_Vector()->length() == 8);
aoqi@0 2464 match(Set dst (ReplicateF zero));
aoqi@0 2465 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %}
aoqi@0 2466 ins_encode %{
aoqi@0 2467 bool vector256 = true;
aoqi@0 2468 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
aoqi@0 2469 %}
aoqi@0 2470 ins_pipe( fpu_reg_reg );
aoqi@0 2471 %}
aoqi@0 2472
aoqi@0 2473 // Replicate double (8 bytes) scalar to be vector
aoqi@0 2474 instruct Repl2D(vecX dst, regD src) %{
aoqi@0 2475 predicate(n->as_Vector()->length() == 2);
aoqi@0 2476 match(Set dst (ReplicateD src));
aoqi@0 2477 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %}
aoqi@0 2478 ins_encode %{
aoqi@0 2479 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
aoqi@0 2480 %}
aoqi@0 2481 ins_pipe( pipe_slow );
aoqi@0 2482 %}
aoqi@0 2483
aoqi@0 2484 instruct Repl4D(vecY dst, regD src) %{
aoqi@0 2485 predicate(n->as_Vector()->length() == 4);
aoqi@0 2486 match(Set dst (ReplicateD src));
aoqi@0 2487 format %{ "pshufd $dst,$src,0x44\n\t"
aoqi@0 2488 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
aoqi@0 2489 ins_encode %{
aoqi@0 2490 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
aoqi@0 2491 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2492 %}
aoqi@0 2493 ins_pipe( pipe_slow );
aoqi@0 2494 %}
aoqi@0 2495
aoqi@0 2496 // Replicate double (8 byte) scalar zero to be vector
aoqi@0 2497 instruct Repl2D_zero(vecX dst, immD0 zero) %{
aoqi@0 2498 predicate(n->as_Vector()->length() == 2);
aoqi@0 2499 match(Set dst (ReplicateD zero));
aoqi@0 2500 format %{ "xorpd $dst,$dst\t! replicate2D zero" %}
aoqi@0 2501 ins_encode %{
aoqi@0 2502 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2503 %}
aoqi@0 2504 ins_pipe( fpu_reg_reg );
aoqi@0 2505 %}
aoqi@0 2506
aoqi@0 2507 instruct Repl4D_zero(vecY dst, immD0 zero) %{
aoqi@0 2508 predicate(n->as_Vector()->length() == 4);
aoqi@0 2509 match(Set dst (ReplicateD zero));
aoqi@0 2510 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %}
aoqi@0 2511 ins_encode %{
aoqi@0 2512 bool vector256 = true;
aoqi@0 2513 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
aoqi@0 2514 %}
aoqi@0 2515 ins_pipe( fpu_reg_reg );
aoqi@0 2516 %}
aoqi@0 2517
aoqi@0 2518 // ====================VECTOR ARITHMETIC=======================================
aoqi@0 2519
aoqi@0 2520 // --------------------------------- ADD --------------------------------------
aoqi@0 2521
aoqi@0 2522 // Bytes vector add
aoqi@0 2523 instruct vadd4B(vecS dst, vecS src) %{
aoqi@0 2524 predicate(n->as_Vector()->length() == 4);
aoqi@0 2525 match(Set dst (AddVB dst src));
aoqi@0 2526 format %{ "paddb $dst,$src\t! add packed4B" %}
aoqi@0 2527 ins_encode %{
aoqi@0 2528 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2529 %}
aoqi@0 2530 ins_pipe( pipe_slow );
aoqi@0 2531 %}
aoqi@0 2532
aoqi@0 2533 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{
aoqi@0 2534 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 2535 match(Set dst (AddVB src1 src2));
aoqi@0 2536 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %}
aoqi@0 2537 ins_encode %{
aoqi@0 2538 bool vector256 = false;
aoqi@0 2539 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2540 %}
aoqi@0 2541 ins_pipe( pipe_slow );
aoqi@0 2542 %}
aoqi@0 2543
aoqi@0 2544 instruct vadd8B(vecD dst, vecD src) %{
aoqi@0 2545 predicate(n->as_Vector()->length() == 8);
aoqi@0 2546 match(Set dst (AddVB dst src));
aoqi@0 2547 format %{ "paddb $dst,$src\t! add packed8B" %}
aoqi@0 2548 ins_encode %{
aoqi@0 2549 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2550 %}
aoqi@0 2551 ins_pipe( pipe_slow );
aoqi@0 2552 %}
aoqi@0 2553
aoqi@0 2554 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 2555 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 2556 match(Set dst (AddVB src1 src2));
aoqi@0 2557 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %}
aoqi@0 2558 ins_encode %{
aoqi@0 2559 bool vector256 = false;
aoqi@0 2560 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2561 %}
aoqi@0 2562 ins_pipe( pipe_slow );
aoqi@0 2563 %}
aoqi@0 2564
aoqi@0 2565 instruct vadd16B(vecX dst, vecX src) %{
aoqi@0 2566 predicate(n->as_Vector()->length() == 16);
aoqi@0 2567 match(Set dst (AddVB dst src));
aoqi@0 2568 format %{ "paddb $dst,$src\t! add packed16B" %}
aoqi@0 2569 ins_encode %{
aoqi@0 2570 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2571 %}
aoqi@0 2572 ins_pipe( pipe_slow );
aoqi@0 2573 %}
aoqi@0 2574
aoqi@0 2575 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 2576 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
aoqi@0 2577 match(Set dst (AddVB src1 src2));
aoqi@0 2578 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %}
aoqi@0 2579 ins_encode %{
aoqi@0 2580 bool vector256 = false;
aoqi@0 2581 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2582 %}
aoqi@0 2583 ins_pipe( pipe_slow );
aoqi@0 2584 %}
aoqi@0 2585
aoqi@0 2586 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 2587 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
aoqi@0 2588 match(Set dst (AddVB src (LoadVector mem)));
aoqi@0 2589 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %}
aoqi@0 2590 ins_encode %{
aoqi@0 2591 bool vector256 = false;
aoqi@0 2592 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2593 %}
aoqi@0 2594 ins_pipe( pipe_slow );
aoqi@0 2595 %}
aoqi@0 2596
aoqi@0 2597 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 2598 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
aoqi@0 2599 match(Set dst (AddVB src1 src2));
aoqi@0 2600 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %}
aoqi@0 2601 ins_encode %{
aoqi@0 2602 bool vector256 = true;
aoqi@0 2603 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2604 %}
aoqi@0 2605 ins_pipe( pipe_slow );
aoqi@0 2606 %}
aoqi@0 2607
aoqi@0 2608 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 2609 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
aoqi@0 2610 match(Set dst (AddVB src (LoadVector mem)));
aoqi@0 2611 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %}
aoqi@0 2612 ins_encode %{
aoqi@0 2613 bool vector256 = true;
aoqi@0 2614 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2615 %}
aoqi@0 2616 ins_pipe( pipe_slow );
aoqi@0 2617 %}
aoqi@0 2618
aoqi@0 2619 // Shorts/Chars vector add
aoqi@0 2620 instruct vadd2S(vecS dst, vecS src) %{
aoqi@0 2621 predicate(n->as_Vector()->length() == 2);
aoqi@0 2622 match(Set dst (AddVS dst src));
aoqi@0 2623 format %{ "paddw $dst,$src\t! add packed2S" %}
aoqi@0 2624 ins_encode %{
aoqi@0 2625 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2626 %}
aoqi@0 2627 ins_pipe( pipe_slow );
aoqi@0 2628 %}
aoqi@0 2629
aoqi@0 2630 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{
aoqi@0 2631 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 2632 match(Set dst (AddVS src1 src2));
aoqi@0 2633 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %}
aoqi@0 2634 ins_encode %{
aoqi@0 2635 bool vector256 = false;
aoqi@0 2636 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2637 %}
aoqi@0 2638 ins_pipe( pipe_slow );
aoqi@0 2639 %}
aoqi@0 2640
aoqi@0 2641 instruct vadd4S(vecD dst, vecD src) %{
aoqi@0 2642 predicate(n->as_Vector()->length() == 4);
aoqi@0 2643 match(Set dst (AddVS dst src));
aoqi@0 2644 format %{ "paddw $dst,$src\t! add packed4S" %}
aoqi@0 2645 ins_encode %{
aoqi@0 2646 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2647 %}
aoqi@0 2648 ins_pipe( pipe_slow );
aoqi@0 2649 %}
aoqi@0 2650
aoqi@0 2651 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 2652 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 2653 match(Set dst (AddVS src1 src2));
aoqi@0 2654 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %}
aoqi@0 2655 ins_encode %{
aoqi@0 2656 bool vector256 = false;
aoqi@0 2657 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2658 %}
aoqi@0 2659 ins_pipe( pipe_slow );
aoqi@0 2660 %}
aoqi@0 2661
aoqi@0 2662 instruct vadd8S(vecX dst, vecX src) %{
aoqi@0 2663 predicate(n->as_Vector()->length() == 8);
aoqi@0 2664 match(Set dst (AddVS dst src));
aoqi@0 2665 format %{ "paddw $dst,$src\t! add packed8S" %}
aoqi@0 2666 ins_encode %{
aoqi@0 2667 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2668 %}
aoqi@0 2669 ins_pipe( pipe_slow );
aoqi@0 2670 %}
aoqi@0 2671
aoqi@0 2672 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 2673 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 2674 match(Set dst (AddVS src1 src2));
aoqi@0 2675 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %}
aoqi@0 2676 ins_encode %{
aoqi@0 2677 bool vector256 = false;
aoqi@0 2678 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2679 %}
aoqi@0 2680 ins_pipe( pipe_slow );
aoqi@0 2681 %}
aoqi@0 2682
aoqi@0 2683 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 2684 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 2685 match(Set dst (AddVS src (LoadVector mem)));
aoqi@0 2686 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %}
aoqi@0 2687 ins_encode %{
aoqi@0 2688 bool vector256 = false;
aoqi@0 2689 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2690 %}
aoqi@0 2691 ins_pipe( pipe_slow );
aoqi@0 2692 %}
aoqi@0 2693
aoqi@0 2694 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 2695 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 2696 match(Set dst (AddVS src1 src2));
aoqi@0 2697 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %}
aoqi@0 2698 ins_encode %{
aoqi@0 2699 bool vector256 = true;
aoqi@0 2700 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2701 %}
aoqi@0 2702 ins_pipe( pipe_slow );
aoqi@0 2703 %}
aoqi@0 2704
aoqi@0 2705 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 2706 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 2707 match(Set dst (AddVS src (LoadVector mem)));
aoqi@0 2708 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %}
aoqi@0 2709 ins_encode %{
aoqi@0 2710 bool vector256 = true;
aoqi@0 2711 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2712 %}
aoqi@0 2713 ins_pipe( pipe_slow );
aoqi@0 2714 %}
aoqi@0 2715
aoqi@0 2716 // Integers vector add
aoqi@0 2717 instruct vadd2I(vecD dst, vecD src) %{
aoqi@0 2718 predicate(n->as_Vector()->length() == 2);
aoqi@0 2719 match(Set dst (AddVI dst src));
aoqi@0 2720 format %{ "paddd $dst,$src\t! add packed2I" %}
aoqi@0 2721 ins_encode %{
aoqi@0 2722 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2723 %}
aoqi@0 2724 ins_pipe( pipe_slow );
aoqi@0 2725 %}
aoqi@0 2726
aoqi@0 2727 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 2728 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 2729 match(Set dst (AddVI src1 src2));
aoqi@0 2730 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %}
aoqi@0 2731 ins_encode %{
aoqi@0 2732 bool vector256 = false;
aoqi@0 2733 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2734 %}
aoqi@0 2735 ins_pipe( pipe_slow );
aoqi@0 2736 %}
aoqi@0 2737
aoqi@0 2738 instruct vadd4I(vecX dst, vecX src) %{
aoqi@0 2739 predicate(n->as_Vector()->length() == 4);
aoqi@0 2740 match(Set dst (AddVI dst src));
aoqi@0 2741 format %{ "paddd $dst,$src\t! add packed4I" %}
aoqi@0 2742 ins_encode %{
aoqi@0 2743 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2744 %}
aoqi@0 2745 ins_pipe( pipe_slow );
aoqi@0 2746 %}
aoqi@0 2747
aoqi@0 2748 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 2749 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 2750 match(Set dst (AddVI src1 src2));
aoqi@0 2751 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %}
aoqi@0 2752 ins_encode %{
aoqi@0 2753 bool vector256 = false;
aoqi@0 2754 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2755 %}
aoqi@0 2756 ins_pipe( pipe_slow );
aoqi@0 2757 %}
aoqi@0 2758
aoqi@0 2759 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 2760 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 2761 match(Set dst (AddVI src (LoadVector mem)));
aoqi@0 2762 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %}
aoqi@0 2763 ins_encode %{
aoqi@0 2764 bool vector256 = false;
aoqi@0 2765 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2766 %}
aoqi@0 2767 ins_pipe( pipe_slow );
aoqi@0 2768 %}
aoqi@0 2769
aoqi@0 2770 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 2771 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 2772 match(Set dst (AddVI src1 src2));
aoqi@0 2773 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %}
aoqi@0 2774 ins_encode %{
aoqi@0 2775 bool vector256 = true;
aoqi@0 2776 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2777 %}
aoqi@0 2778 ins_pipe( pipe_slow );
aoqi@0 2779 %}
aoqi@0 2780
aoqi@0 2781 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 2782 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 2783 match(Set dst (AddVI src (LoadVector mem)));
aoqi@0 2784 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %}
aoqi@0 2785 ins_encode %{
aoqi@0 2786 bool vector256 = true;
aoqi@0 2787 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2788 %}
aoqi@0 2789 ins_pipe( pipe_slow );
aoqi@0 2790 %}
aoqi@0 2791
aoqi@0 2792 // Longs vector add
aoqi@0 2793 instruct vadd2L(vecX dst, vecX src) %{
aoqi@0 2794 predicate(n->as_Vector()->length() == 2);
aoqi@0 2795 match(Set dst (AddVL dst src));
aoqi@0 2796 format %{ "paddq $dst,$src\t! add packed2L" %}
aoqi@0 2797 ins_encode %{
aoqi@0 2798 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2799 %}
aoqi@0 2800 ins_pipe( pipe_slow );
aoqi@0 2801 %}
aoqi@0 2802
aoqi@0 2803 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 2804 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 2805 match(Set dst (AddVL src1 src2));
aoqi@0 2806 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %}
aoqi@0 2807 ins_encode %{
aoqi@0 2808 bool vector256 = false;
aoqi@0 2809 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2810 %}
aoqi@0 2811 ins_pipe( pipe_slow );
aoqi@0 2812 %}
aoqi@0 2813
aoqi@0 2814 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 2815 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 2816 match(Set dst (AddVL src (LoadVector mem)));
aoqi@0 2817 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %}
aoqi@0 2818 ins_encode %{
aoqi@0 2819 bool vector256 = false;
aoqi@0 2820 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2821 %}
aoqi@0 2822 ins_pipe( pipe_slow );
aoqi@0 2823 %}
aoqi@0 2824
aoqi@0 2825 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 2826 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
aoqi@0 2827 match(Set dst (AddVL src1 src2));
aoqi@0 2828 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %}
aoqi@0 2829 ins_encode %{
aoqi@0 2830 bool vector256 = true;
aoqi@0 2831 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2832 %}
aoqi@0 2833 ins_pipe( pipe_slow );
aoqi@0 2834 %}
aoqi@0 2835
aoqi@0 2836 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 2837 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
aoqi@0 2838 match(Set dst (AddVL src (LoadVector mem)));
aoqi@0 2839 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %}
aoqi@0 2840 ins_encode %{
aoqi@0 2841 bool vector256 = true;
aoqi@0 2842 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2843 %}
aoqi@0 2844 ins_pipe( pipe_slow );
aoqi@0 2845 %}
aoqi@0 2846
aoqi@0 2847 // Floats vector add
aoqi@0 2848 instruct vadd2F(vecD dst, vecD src) %{
aoqi@0 2849 predicate(n->as_Vector()->length() == 2);
aoqi@0 2850 match(Set dst (AddVF dst src));
aoqi@0 2851 format %{ "addps $dst,$src\t! add packed2F" %}
aoqi@0 2852 ins_encode %{
aoqi@0 2853 __ addps($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2854 %}
aoqi@0 2855 ins_pipe( pipe_slow );
aoqi@0 2856 %}
aoqi@0 2857
aoqi@0 2858 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 2859 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 2860 match(Set dst (AddVF src1 src2));
aoqi@0 2861 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %}
aoqi@0 2862 ins_encode %{
aoqi@0 2863 bool vector256 = false;
aoqi@0 2864 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2865 %}
aoqi@0 2866 ins_pipe( pipe_slow );
aoqi@0 2867 %}
aoqi@0 2868
aoqi@0 2869 instruct vadd4F(vecX dst, vecX src) %{
aoqi@0 2870 predicate(n->as_Vector()->length() == 4);
aoqi@0 2871 match(Set dst (AddVF dst src));
aoqi@0 2872 format %{ "addps $dst,$src\t! add packed4F" %}
aoqi@0 2873 ins_encode %{
aoqi@0 2874 __ addps($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2875 %}
aoqi@0 2876 ins_pipe( pipe_slow );
aoqi@0 2877 %}
aoqi@0 2878
aoqi@0 2879 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 2880 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 2881 match(Set dst (AddVF src1 src2));
aoqi@0 2882 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %}
aoqi@0 2883 ins_encode %{
aoqi@0 2884 bool vector256 = false;
aoqi@0 2885 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2886 %}
aoqi@0 2887 ins_pipe( pipe_slow );
aoqi@0 2888 %}
aoqi@0 2889
aoqi@0 2890 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 2891 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 2892 match(Set dst (AddVF src (LoadVector mem)));
aoqi@0 2893 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %}
aoqi@0 2894 ins_encode %{
aoqi@0 2895 bool vector256 = false;
aoqi@0 2896 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2897 %}
aoqi@0 2898 ins_pipe( pipe_slow );
aoqi@0 2899 %}
aoqi@0 2900
aoqi@0 2901 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 2902 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 2903 match(Set dst (AddVF src1 src2));
aoqi@0 2904 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %}
aoqi@0 2905 ins_encode %{
aoqi@0 2906 bool vector256 = true;
aoqi@0 2907 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2908 %}
aoqi@0 2909 ins_pipe( pipe_slow );
aoqi@0 2910 %}
aoqi@0 2911
aoqi@0 2912 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 2913 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 2914 match(Set dst (AddVF src (LoadVector mem)));
aoqi@0 2915 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %}
aoqi@0 2916 ins_encode %{
aoqi@0 2917 bool vector256 = true;
aoqi@0 2918 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2919 %}
aoqi@0 2920 ins_pipe( pipe_slow );
aoqi@0 2921 %}
aoqi@0 2922
aoqi@0 2923 // Doubles vector add
aoqi@0 2924 instruct vadd2D(vecX dst, vecX src) %{
aoqi@0 2925 predicate(n->as_Vector()->length() == 2);
aoqi@0 2926 match(Set dst (AddVD dst src));
aoqi@0 2927 format %{ "addpd $dst,$src\t! add packed2D" %}
aoqi@0 2928 ins_encode %{
aoqi@0 2929 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2930 %}
aoqi@0 2931 ins_pipe( pipe_slow );
aoqi@0 2932 %}
aoqi@0 2933
aoqi@0 2934 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 2935 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 2936 match(Set dst (AddVD src1 src2));
aoqi@0 2937 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %}
aoqi@0 2938 ins_encode %{
aoqi@0 2939 bool vector256 = false;
aoqi@0 2940 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2941 %}
aoqi@0 2942 ins_pipe( pipe_slow );
aoqi@0 2943 %}
aoqi@0 2944
aoqi@0 2945 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 2946 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 2947 match(Set dst (AddVD src (LoadVector mem)));
aoqi@0 2948 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %}
aoqi@0 2949 ins_encode %{
aoqi@0 2950 bool vector256 = false;
aoqi@0 2951 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2952 %}
aoqi@0 2953 ins_pipe( pipe_slow );
aoqi@0 2954 %}
aoqi@0 2955
aoqi@0 2956 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 2957 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 2958 match(Set dst (AddVD src1 src2));
aoqi@0 2959 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %}
aoqi@0 2960 ins_encode %{
aoqi@0 2961 bool vector256 = true;
aoqi@0 2962 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2963 %}
aoqi@0 2964 ins_pipe( pipe_slow );
aoqi@0 2965 %}
aoqi@0 2966
aoqi@0 2967 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 2968 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 2969 match(Set dst (AddVD src (LoadVector mem)));
aoqi@0 2970 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %}
aoqi@0 2971 ins_encode %{
aoqi@0 2972 bool vector256 = true;
aoqi@0 2973 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2974 %}
aoqi@0 2975 ins_pipe( pipe_slow );
aoqi@0 2976 %}
aoqi@0 2977
aoqi@0 2978 // --------------------------------- SUB --------------------------------------
aoqi@0 2979
aoqi@0 2980 // Bytes vector sub
aoqi@0 2981 instruct vsub4B(vecS dst, vecS src) %{
aoqi@0 2982 predicate(n->as_Vector()->length() == 4);
aoqi@0 2983 match(Set dst (SubVB dst src));
aoqi@0 2984 format %{ "psubb $dst,$src\t! sub packed4B" %}
aoqi@0 2985 ins_encode %{
aoqi@0 2986 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2987 %}
aoqi@0 2988 ins_pipe( pipe_slow );
aoqi@0 2989 %}
aoqi@0 2990
aoqi@0 2991 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{
aoqi@0 2992 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 2993 match(Set dst (SubVB src1 src2));
aoqi@0 2994 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %}
aoqi@0 2995 ins_encode %{
aoqi@0 2996 bool vector256 = false;
aoqi@0 2997 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2998 %}
aoqi@0 2999 ins_pipe( pipe_slow );
aoqi@0 3000 %}
aoqi@0 3001
aoqi@0 3002 instruct vsub8B(vecD dst, vecD src) %{
aoqi@0 3003 predicate(n->as_Vector()->length() == 8);
aoqi@0 3004 match(Set dst (SubVB dst src));
aoqi@0 3005 format %{ "psubb $dst,$src\t! sub packed8B" %}
aoqi@0 3006 ins_encode %{
aoqi@0 3007 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3008 %}
aoqi@0 3009 ins_pipe( pipe_slow );
aoqi@0 3010 %}
aoqi@0 3011
aoqi@0 3012 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 3013 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 3014 match(Set dst (SubVB src1 src2));
aoqi@0 3015 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %}
aoqi@0 3016 ins_encode %{
aoqi@0 3017 bool vector256 = false;
aoqi@0 3018 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3019 %}
aoqi@0 3020 ins_pipe( pipe_slow );
aoqi@0 3021 %}
aoqi@0 3022
aoqi@0 3023 instruct vsub16B(vecX dst, vecX src) %{
aoqi@0 3024 predicate(n->as_Vector()->length() == 16);
aoqi@0 3025 match(Set dst (SubVB dst src));
aoqi@0 3026 format %{ "psubb $dst,$src\t! sub packed16B" %}
aoqi@0 3027 ins_encode %{
aoqi@0 3028 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3029 %}
aoqi@0 3030 ins_pipe( pipe_slow );
aoqi@0 3031 %}
aoqi@0 3032
aoqi@0 3033 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3034 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
aoqi@0 3035 match(Set dst (SubVB src1 src2));
aoqi@0 3036 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %}
aoqi@0 3037 ins_encode %{
aoqi@0 3038 bool vector256 = false;
aoqi@0 3039 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3040 %}
aoqi@0 3041 ins_pipe( pipe_slow );
aoqi@0 3042 %}
aoqi@0 3043
aoqi@0 3044 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3045 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
aoqi@0 3046 match(Set dst (SubVB src (LoadVector mem)));
aoqi@0 3047 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %}
aoqi@0 3048 ins_encode %{
aoqi@0 3049 bool vector256 = false;
aoqi@0 3050 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3051 %}
aoqi@0 3052 ins_pipe( pipe_slow );
aoqi@0 3053 %}
aoqi@0 3054
aoqi@0 3055 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3056 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
aoqi@0 3057 match(Set dst (SubVB src1 src2));
aoqi@0 3058 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %}
aoqi@0 3059 ins_encode %{
aoqi@0 3060 bool vector256 = true;
aoqi@0 3061 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3062 %}
aoqi@0 3063 ins_pipe( pipe_slow );
aoqi@0 3064 %}
aoqi@0 3065
aoqi@0 3066 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3067 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
aoqi@0 3068 match(Set dst (SubVB src (LoadVector mem)));
aoqi@0 3069 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %}
aoqi@0 3070 ins_encode %{
aoqi@0 3071 bool vector256 = true;
aoqi@0 3072 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3073 %}
aoqi@0 3074 ins_pipe( pipe_slow );
aoqi@0 3075 %}
aoqi@0 3076
aoqi@0 3077 // Shorts/Chars vector sub
aoqi@0 3078 instruct vsub2S(vecS dst, vecS src) %{
aoqi@0 3079 predicate(n->as_Vector()->length() == 2);
aoqi@0 3080 match(Set dst (SubVS dst src));
aoqi@0 3081 format %{ "psubw $dst,$src\t! sub packed2S" %}
aoqi@0 3082 ins_encode %{
aoqi@0 3083 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3084 %}
aoqi@0 3085 ins_pipe( pipe_slow );
aoqi@0 3086 %}
aoqi@0 3087
aoqi@0 3088 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{
aoqi@0 3089 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3090 match(Set dst (SubVS src1 src2));
aoqi@0 3091 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %}
aoqi@0 3092 ins_encode %{
aoqi@0 3093 bool vector256 = false;
aoqi@0 3094 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3095 %}
aoqi@0 3096 ins_pipe( pipe_slow );
aoqi@0 3097 %}
aoqi@0 3098
aoqi@0 3099 instruct vsub4S(vecD dst, vecD src) %{
aoqi@0 3100 predicate(n->as_Vector()->length() == 4);
aoqi@0 3101 match(Set dst (SubVS dst src));
aoqi@0 3102 format %{ "psubw $dst,$src\t! sub packed4S" %}
aoqi@0 3103 ins_encode %{
aoqi@0 3104 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3105 %}
aoqi@0 3106 ins_pipe( pipe_slow );
aoqi@0 3107 %}
aoqi@0 3108
aoqi@0 3109 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 3110 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3111 match(Set dst (SubVS src1 src2));
aoqi@0 3112 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %}
aoqi@0 3113 ins_encode %{
aoqi@0 3114 bool vector256 = false;
aoqi@0 3115 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3116 %}
aoqi@0 3117 ins_pipe( pipe_slow );
aoqi@0 3118 %}
aoqi@0 3119
aoqi@0 3120 instruct vsub8S(vecX dst, vecX src) %{
aoqi@0 3121 predicate(n->as_Vector()->length() == 8);
aoqi@0 3122 match(Set dst (SubVS dst src));
aoqi@0 3123 format %{ "psubw $dst,$src\t! sub packed8S" %}
aoqi@0 3124 ins_encode %{
aoqi@0 3125 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3126 %}
aoqi@0 3127 ins_pipe( pipe_slow );
aoqi@0 3128 %}
aoqi@0 3129
aoqi@0 3130 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3131 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 3132 match(Set dst (SubVS src1 src2));
aoqi@0 3133 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %}
aoqi@0 3134 ins_encode %{
aoqi@0 3135 bool vector256 = false;
aoqi@0 3136 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3137 %}
aoqi@0 3138 ins_pipe( pipe_slow );
aoqi@0 3139 %}
aoqi@0 3140
aoqi@0 3141 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3142 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 3143 match(Set dst (SubVS src (LoadVector mem)));
aoqi@0 3144 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %}
aoqi@0 3145 ins_encode %{
aoqi@0 3146 bool vector256 = false;
aoqi@0 3147 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3148 %}
aoqi@0 3149 ins_pipe( pipe_slow );
aoqi@0 3150 %}
aoqi@0 3151
aoqi@0 3152 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3153 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 3154 match(Set dst (SubVS src1 src2));
aoqi@0 3155 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %}
aoqi@0 3156 ins_encode %{
aoqi@0 3157 bool vector256 = true;
aoqi@0 3158 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3159 %}
aoqi@0 3160 ins_pipe( pipe_slow );
aoqi@0 3161 %}
aoqi@0 3162
aoqi@0 3163 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3164 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 3165 match(Set dst (SubVS src (LoadVector mem)));
aoqi@0 3166 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %}
aoqi@0 3167 ins_encode %{
aoqi@0 3168 bool vector256 = true;
aoqi@0 3169 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3170 %}
aoqi@0 3171 ins_pipe( pipe_slow );
aoqi@0 3172 %}
aoqi@0 3173
aoqi@0 3174 // Integers vector sub
aoqi@0 3175 instruct vsub2I(vecD dst, vecD src) %{
aoqi@0 3176 predicate(n->as_Vector()->length() == 2);
aoqi@0 3177 match(Set dst (SubVI dst src));
aoqi@0 3178 format %{ "psubd $dst,$src\t! sub packed2I" %}
aoqi@0 3179 ins_encode %{
aoqi@0 3180 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3181 %}
aoqi@0 3182 ins_pipe( pipe_slow );
aoqi@0 3183 %}
aoqi@0 3184
aoqi@0 3185 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 3186 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3187 match(Set dst (SubVI src1 src2));
aoqi@0 3188 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %}
aoqi@0 3189 ins_encode %{
aoqi@0 3190 bool vector256 = false;
aoqi@0 3191 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3192 %}
aoqi@0 3193 ins_pipe( pipe_slow );
aoqi@0 3194 %}
aoqi@0 3195
aoqi@0 3196 instruct vsub4I(vecX dst, vecX src) %{
aoqi@0 3197 predicate(n->as_Vector()->length() == 4);
aoqi@0 3198 match(Set dst (SubVI dst src));
aoqi@0 3199 format %{ "psubd $dst,$src\t! sub packed4I" %}
aoqi@0 3200 ins_encode %{
aoqi@0 3201 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3202 %}
aoqi@0 3203 ins_pipe( pipe_slow );
aoqi@0 3204 %}
aoqi@0 3205
aoqi@0 3206 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3207 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3208 match(Set dst (SubVI src1 src2));
aoqi@0 3209 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %}
aoqi@0 3210 ins_encode %{
aoqi@0 3211 bool vector256 = false;
aoqi@0 3212 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3213 %}
aoqi@0 3214 ins_pipe( pipe_slow );
aoqi@0 3215 %}
aoqi@0 3216
aoqi@0 3217 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3218 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3219 match(Set dst (SubVI src (LoadVector mem)));
aoqi@0 3220 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %}
aoqi@0 3221 ins_encode %{
aoqi@0 3222 bool vector256 = false;
aoqi@0 3223 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3224 %}
aoqi@0 3225 ins_pipe( pipe_slow );
aoqi@0 3226 %}
aoqi@0 3227
aoqi@0 3228 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3229 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 3230 match(Set dst (SubVI src1 src2));
aoqi@0 3231 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %}
aoqi@0 3232 ins_encode %{
aoqi@0 3233 bool vector256 = true;
aoqi@0 3234 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3235 %}
aoqi@0 3236 ins_pipe( pipe_slow );
aoqi@0 3237 %}
aoqi@0 3238
aoqi@0 3239 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3240 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 3241 match(Set dst (SubVI src (LoadVector mem)));
aoqi@0 3242 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %}
aoqi@0 3243 ins_encode %{
aoqi@0 3244 bool vector256 = true;
aoqi@0 3245 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3246 %}
aoqi@0 3247 ins_pipe( pipe_slow );
aoqi@0 3248 %}
aoqi@0 3249
aoqi@0 3250 // Longs vector sub
aoqi@0 3251 instruct vsub2L(vecX dst, vecX src) %{
aoqi@0 3252 predicate(n->as_Vector()->length() == 2);
aoqi@0 3253 match(Set dst (SubVL dst src));
aoqi@0 3254 format %{ "psubq $dst,$src\t! sub packed2L" %}
aoqi@0 3255 ins_encode %{
aoqi@0 3256 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3257 %}
aoqi@0 3258 ins_pipe( pipe_slow );
aoqi@0 3259 %}
aoqi@0 3260
aoqi@0 3261 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3262 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3263 match(Set dst (SubVL src1 src2));
aoqi@0 3264 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %}
aoqi@0 3265 ins_encode %{
aoqi@0 3266 bool vector256 = false;
aoqi@0 3267 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3268 %}
aoqi@0 3269 ins_pipe( pipe_slow );
aoqi@0 3270 %}
aoqi@0 3271
aoqi@0 3272 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3273 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3274 match(Set dst (SubVL src (LoadVector mem)));
aoqi@0 3275 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %}
aoqi@0 3276 ins_encode %{
aoqi@0 3277 bool vector256 = false;
aoqi@0 3278 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3279 %}
aoqi@0 3280 ins_pipe( pipe_slow );
aoqi@0 3281 %}
aoqi@0 3282
aoqi@0 3283 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3284 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
aoqi@0 3285 match(Set dst (SubVL src1 src2));
aoqi@0 3286 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %}
aoqi@0 3287 ins_encode %{
aoqi@0 3288 bool vector256 = true;
aoqi@0 3289 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3290 %}
aoqi@0 3291 ins_pipe( pipe_slow );
aoqi@0 3292 %}
aoqi@0 3293
aoqi@0 3294 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3295 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
aoqi@0 3296 match(Set dst (SubVL src (LoadVector mem)));
aoqi@0 3297 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %}
aoqi@0 3298 ins_encode %{
aoqi@0 3299 bool vector256 = true;
aoqi@0 3300 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3301 %}
aoqi@0 3302 ins_pipe( pipe_slow );
aoqi@0 3303 %}
aoqi@0 3304
aoqi@0 3305 // Floats vector sub
aoqi@0 3306 instruct vsub2F(vecD dst, vecD src) %{
aoqi@0 3307 predicate(n->as_Vector()->length() == 2);
aoqi@0 3308 match(Set dst (SubVF dst src));
aoqi@0 3309 format %{ "subps $dst,$src\t! sub packed2F" %}
aoqi@0 3310 ins_encode %{
aoqi@0 3311 __ subps($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3312 %}
aoqi@0 3313 ins_pipe( pipe_slow );
aoqi@0 3314 %}
aoqi@0 3315
aoqi@0 3316 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 3317 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3318 match(Set dst (SubVF src1 src2));
aoqi@0 3319 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %}
aoqi@0 3320 ins_encode %{
aoqi@0 3321 bool vector256 = false;
aoqi@0 3322 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3323 %}
aoqi@0 3324 ins_pipe( pipe_slow );
aoqi@0 3325 %}
aoqi@0 3326
aoqi@0 3327 instruct vsub4F(vecX dst, vecX src) %{
aoqi@0 3328 predicate(n->as_Vector()->length() == 4);
aoqi@0 3329 match(Set dst (SubVF dst src));
aoqi@0 3330 format %{ "subps $dst,$src\t! sub packed4F" %}
aoqi@0 3331 ins_encode %{
aoqi@0 3332 __ subps($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3333 %}
aoqi@0 3334 ins_pipe( pipe_slow );
aoqi@0 3335 %}
aoqi@0 3336
aoqi@0 3337 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3338 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3339 match(Set dst (SubVF src1 src2));
aoqi@0 3340 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %}
aoqi@0 3341 ins_encode %{
aoqi@0 3342 bool vector256 = false;
aoqi@0 3343 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3344 %}
aoqi@0 3345 ins_pipe( pipe_slow );
aoqi@0 3346 %}
aoqi@0 3347
aoqi@0 3348 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3349 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3350 match(Set dst (SubVF src (LoadVector mem)));
aoqi@0 3351 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %}
aoqi@0 3352 ins_encode %{
aoqi@0 3353 bool vector256 = false;
aoqi@0 3354 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3355 %}
aoqi@0 3356 ins_pipe( pipe_slow );
aoqi@0 3357 %}
aoqi@0 3358
aoqi@0 3359 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3360 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 3361 match(Set dst (SubVF src1 src2));
aoqi@0 3362 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %}
aoqi@0 3363 ins_encode %{
aoqi@0 3364 bool vector256 = true;
aoqi@0 3365 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3366 %}
aoqi@0 3367 ins_pipe( pipe_slow );
aoqi@0 3368 %}
aoqi@0 3369
aoqi@0 3370 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3371 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 3372 match(Set dst (SubVF src (LoadVector mem)));
aoqi@0 3373 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %}
aoqi@0 3374 ins_encode %{
aoqi@0 3375 bool vector256 = true;
aoqi@0 3376 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3377 %}
aoqi@0 3378 ins_pipe( pipe_slow );
aoqi@0 3379 %}
aoqi@0 3380
aoqi@0 3381 // Doubles vector sub
aoqi@0 3382 instruct vsub2D(vecX dst, vecX src) %{
aoqi@0 3383 predicate(n->as_Vector()->length() == 2);
aoqi@0 3384 match(Set dst (SubVD dst src));
aoqi@0 3385 format %{ "subpd $dst,$src\t! sub packed2D" %}
aoqi@0 3386 ins_encode %{
aoqi@0 3387 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3388 %}
aoqi@0 3389 ins_pipe( pipe_slow );
aoqi@0 3390 %}
aoqi@0 3391
aoqi@0 3392 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3393 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3394 match(Set dst (SubVD src1 src2));
aoqi@0 3395 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %}
aoqi@0 3396 ins_encode %{
aoqi@0 3397 bool vector256 = false;
aoqi@0 3398 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3399 %}
aoqi@0 3400 ins_pipe( pipe_slow );
aoqi@0 3401 %}
aoqi@0 3402
aoqi@0 3403 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3404 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3405 match(Set dst (SubVD src (LoadVector mem)));
aoqi@0 3406 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %}
aoqi@0 3407 ins_encode %{
aoqi@0 3408 bool vector256 = false;
aoqi@0 3409 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3410 %}
aoqi@0 3411 ins_pipe( pipe_slow );
aoqi@0 3412 %}
aoqi@0 3413
aoqi@0 3414 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3415 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3416 match(Set dst (SubVD src1 src2));
aoqi@0 3417 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %}
aoqi@0 3418 ins_encode %{
aoqi@0 3419 bool vector256 = true;
aoqi@0 3420 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3421 %}
aoqi@0 3422 ins_pipe( pipe_slow );
aoqi@0 3423 %}
aoqi@0 3424
aoqi@0 3425 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3426 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3427 match(Set dst (SubVD src (LoadVector mem)));
aoqi@0 3428 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %}
aoqi@0 3429 ins_encode %{
aoqi@0 3430 bool vector256 = true;
aoqi@0 3431 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3432 %}
aoqi@0 3433 ins_pipe( pipe_slow );
aoqi@0 3434 %}
aoqi@0 3435
aoqi@0 3436 // --------------------------------- MUL --------------------------------------
aoqi@0 3437
aoqi@0 3438 // Shorts/Chars vector mul
aoqi@0 3439 instruct vmul2S(vecS dst, vecS src) %{
aoqi@0 3440 predicate(n->as_Vector()->length() == 2);
aoqi@0 3441 match(Set dst (MulVS dst src));
aoqi@0 3442 format %{ "pmullw $dst,$src\t! mul packed2S" %}
aoqi@0 3443 ins_encode %{
aoqi@0 3444 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3445 %}
aoqi@0 3446 ins_pipe( pipe_slow );
aoqi@0 3447 %}
aoqi@0 3448
aoqi@0 3449 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{
aoqi@0 3450 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3451 match(Set dst (MulVS src1 src2));
aoqi@0 3452 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
aoqi@0 3453 ins_encode %{
aoqi@0 3454 bool vector256 = false;
aoqi@0 3455 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3456 %}
aoqi@0 3457 ins_pipe( pipe_slow );
aoqi@0 3458 %}
aoqi@0 3459
aoqi@0 3460 instruct vmul4S(vecD dst, vecD src) %{
aoqi@0 3461 predicate(n->as_Vector()->length() == 4);
aoqi@0 3462 match(Set dst (MulVS dst src));
aoqi@0 3463 format %{ "pmullw $dst,$src\t! mul packed4S" %}
aoqi@0 3464 ins_encode %{
aoqi@0 3465 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3466 %}
aoqi@0 3467 ins_pipe( pipe_slow );
aoqi@0 3468 %}
aoqi@0 3469
aoqi@0 3470 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 3471 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3472 match(Set dst (MulVS src1 src2));
aoqi@0 3473 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
aoqi@0 3474 ins_encode %{
aoqi@0 3475 bool vector256 = false;
aoqi@0 3476 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3477 %}
aoqi@0 3478 ins_pipe( pipe_slow );
aoqi@0 3479 %}
aoqi@0 3480
aoqi@0 3481 instruct vmul8S(vecX dst, vecX src) %{
aoqi@0 3482 predicate(n->as_Vector()->length() == 8);
aoqi@0 3483 match(Set dst (MulVS dst src));
aoqi@0 3484 format %{ "pmullw $dst,$src\t! mul packed8S" %}
aoqi@0 3485 ins_encode %{
aoqi@0 3486 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3487 %}
aoqi@0 3488 ins_pipe( pipe_slow );
aoqi@0 3489 %}
aoqi@0 3490
aoqi@0 3491 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3492 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 3493 match(Set dst (MulVS src1 src2));
aoqi@0 3494 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
aoqi@0 3495 ins_encode %{
aoqi@0 3496 bool vector256 = false;
aoqi@0 3497 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3498 %}
aoqi@0 3499 ins_pipe( pipe_slow );
aoqi@0 3500 %}
aoqi@0 3501
aoqi@0 3502 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3503 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 3504 match(Set dst (MulVS src (LoadVector mem)));
aoqi@0 3505 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
aoqi@0 3506 ins_encode %{
aoqi@0 3507 bool vector256 = false;
aoqi@0 3508 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3509 %}
aoqi@0 3510 ins_pipe( pipe_slow );
aoqi@0 3511 %}
aoqi@0 3512
aoqi@0 3513 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3514 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 3515 match(Set dst (MulVS src1 src2));
aoqi@0 3516 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
aoqi@0 3517 ins_encode %{
aoqi@0 3518 bool vector256 = true;
aoqi@0 3519 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3520 %}
aoqi@0 3521 ins_pipe( pipe_slow );
aoqi@0 3522 %}
aoqi@0 3523
aoqi@0 3524 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3525 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 3526 match(Set dst (MulVS src (LoadVector mem)));
aoqi@0 3527 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
aoqi@0 3528 ins_encode %{
aoqi@0 3529 bool vector256 = true;
aoqi@0 3530 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3531 %}
aoqi@0 3532 ins_pipe( pipe_slow );
aoqi@0 3533 %}
aoqi@0 3534
aoqi@0 3535 // Integers vector mul (sse4_1)
aoqi@0 3536 instruct vmul2I(vecD dst, vecD src) %{
aoqi@0 3537 predicate(UseSSE > 3 && n->as_Vector()->length() == 2);
aoqi@0 3538 match(Set dst (MulVI dst src));
aoqi@0 3539 format %{ "pmulld $dst,$src\t! mul packed2I" %}
aoqi@0 3540 ins_encode %{
aoqi@0 3541 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3542 %}
aoqi@0 3543 ins_pipe( pipe_slow );
aoqi@0 3544 %}
aoqi@0 3545
aoqi@0 3546 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 3547 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3548 match(Set dst (MulVI src1 src2));
aoqi@0 3549 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %}
aoqi@0 3550 ins_encode %{
aoqi@0 3551 bool vector256 = false;
aoqi@0 3552 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3553 %}
aoqi@0 3554 ins_pipe( pipe_slow );
aoqi@0 3555 %}
aoqi@0 3556
aoqi@0 3557 instruct vmul4I(vecX dst, vecX src) %{
aoqi@0 3558 predicate(UseSSE > 3 && n->as_Vector()->length() == 4);
aoqi@0 3559 match(Set dst (MulVI dst src));
aoqi@0 3560 format %{ "pmulld $dst,$src\t! mul packed4I" %}
aoqi@0 3561 ins_encode %{
aoqi@0 3562 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3563 %}
aoqi@0 3564 ins_pipe( pipe_slow );
aoqi@0 3565 %}
aoqi@0 3566
aoqi@0 3567 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3568 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3569 match(Set dst (MulVI src1 src2));
aoqi@0 3570 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %}
aoqi@0 3571 ins_encode %{
aoqi@0 3572 bool vector256 = false;
aoqi@0 3573 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3574 %}
aoqi@0 3575 ins_pipe( pipe_slow );
aoqi@0 3576 %}
aoqi@0 3577
aoqi@0 3578 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3579 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3580 match(Set dst (MulVI src (LoadVector mem)));
aoqi@0 3581 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %}
aoqi@0 3582 ins_encode %{
aoqi@0 3583 bool vector256 = false;
aoqi@0 3584 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3585 %}
aoqi@0 3586 ins_pipe( pipe_slow );
aoqi@0 3587 %}
aoqi@0 3588
aoqi@0 3589 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3590 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 3591 match(Set dst (MulVI src1 src2));
aoqi@0 3592 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %}
aoqi@0 3593 ins_encode %{
aoqi@0 3594 bool vector256 = true;
aoqi@0 3595 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3596 %}
aoqi@0 3597 ins_pipe( pipe_slow );
aoqi@0 3598 %}
aoqi@0 3599
aoqi@0 3600 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3601 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 3602 match(Set dst (MulVI src (LoadVector mem)));
aoqi@0 3603 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %}
aoqi@0 3604 ins_encode %{
aoqi@0 3605 bool vector256 = true;
aoqi@0 3606 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3607 %}
aoqi@0 3608 ins_pipe( pipe_slow );
aoqi@0 3609 %}
aoqi@0 3610
aoqi@0 3611 // Floats vector mul
aoqi@0 3612 instruct vmul2F(vecD dst, vecD src) %{
aoqi@0 3613 predicate(n->as_Vector()->length() == 2);
aoqi@0 3614 match(Set dst (MulVF dst src));
aoqi@0 3615 format %{ "mulps $dst,$src\t! mul packed2F" %}
aoqi@0 3616 ins_encode %{
aoqi@0 3617 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3618 %}
aoqi@0 3619 ins_pipe( pipe_slow );
aoqi@0 3620 %}
aoqi@0 3621
aoqi@0 3622 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 3623 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3624 match(Set dst (MulVF src1 src2));
aoqi@0 3625 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %}
aoqi@0 3626 ins_encode %{
aoqi@0 3627 bool vector256 = false;
aoqi@0 3628 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3629 %}
aoqi@0 3630 ins_pipe( pipe_slow );
aoqi@0 3631 %}
aoqi@0 3632
aoqi@0 3633 instruct vmul4F(vecX dst, vecX src) %{
aoqi@0 3634 predicate(n->as_Vector()->length() == 4);
aoqi@0 3635 match(Set dst (MulVF dst src));
aoqi@0 3636 format %{ "mulps $dst,$src\t! mul packed4F" %}
aoqi@0 3637 ins_encode %{
aoqi@0 3638 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3639 %}
aoqi@0 3640 ins_pipe( pipe_slow );
aoqi@0 3641 %}
aoqi@0 3642
aoqi@0 3643 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3644 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3645 match(Set dst (MulVF src1 src2));
aoqi@0 3646 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %}
aoqi@0 3647 ins_encode %{
aoqi@0 3648 bool vector256 = false;
aoqi@0 3649 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3650 %}
aoqi@0 3651 ins_pipe( pipe_slow );
aoqi@0 3652 %}
aoqi@0 3653
aoqi@0 3654 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3655 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3656 match(Set dst (MulVF src (LoadVector mem)));
aoqi@0 3657 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %}
aoqi@0 3658 ins_encode %{
aoqi@0 3659 bool vector256 = false;
aoqi@0 3660 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3661 %}
aoqi@0 3662 ins_pipe( pipe_slow );
aoqi@0 3663 %}
aoqi@0 3664
aoqi@0 3665 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3666 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 3667 match(Set dst (MulVF src1 src2));
aoqi@0 3668 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %}
aoqi@0 3669 ins_encode %{
aoqi@0 3670 bool vector256 = true;
aoqi@0 3671 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3672 %}
aoqi@0 3673 ins_pipe( pipe_slow );
aoqi@0 3674 %}
aoqi@0 3675
aoqi@0 3676 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3677 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 3678 match(Set dst (MulVF src (LoadVector mem)));
aoqi@0 3679 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %}
aoqi@0 3680 ins_encode %{
aoqi@0 3681 bool vector256 = true;
aoqi@0 3682 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3683 %}
aoqi@0 3684 ins_pipe( pipe_slow );
aoqi@0 3685 %}
aoqi@0 3686
aoqi@0 3687 // Doubles vector mul
aoqi@0 3688 instruct vmul2D(vecX dst, vecX src) %{
aoqi@0 3689 predicate(n->as_Vector()->length() == 2);
aoqi@0 3690 match(Set dst (MulVD dst src));
aoqi@0 3691 format %{ "mulpd $dst,$src\t! mul packed2D" %}
aoqi@0 3692 ins_encode %{
aoqi@0 3693 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3694 %}
aoqi@0 3695 ins_pipe( pipe_slow );
aoqi@0 3696 %}
aoqi@0 3697
aoqi@0 3698 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3699 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3700 match(Set dst (MulVD src1 src2));
aoqi@0 3701 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %}
aoqi@0 3702 ins_encode %{
aoqi@0 3703 bool vector256 = false;
aoqi@0 3704 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3705 %}
aoqi@0 3706 ins_pipe( pipe_slow );
aoqi@0 3707 %}
aoqi@0 3708
aoqi@0 3709 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3710 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3711 match(Set dst (MulVD src (LoadVector mem)));
aoqi@0 3712 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %}
aoqi@0 3713 ins_encode %{
aoqi@0 3714 bool vector256 = false;
aoqi@0 3715 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3716 %}
aoqi@0 3717 ins_pipe( pipe_slow );
aoqi@0 3718 %}
aoqi@0 3719
aoqi@0 3720 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3721 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3722 match(Set dst (MulVD src1 src2));
aoqi@0 3723 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %}
aoqi@0 3724 ins_encode %{
aoqi@0 3725 bool vector256 = true;
aoqi@0 3726 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3727 %}
aoqi@0 3728 ins_pipe( pipe_slow );
aoqi@0 3729 %}
aoqi@0 3730
aoqi@0 3731 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3732 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3733 match(Set dst (MulVD src (LoadVector mem)));
aoqi@0 3734 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %}
aoqi@0 3735 ins_encode %{
aoqi@0 3736 bool vector256 = true;
aoqi@0 3737 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3738 %}
aoqi@0 3739 ins_pipe( pipe_slow );
aoqi@0 3740 %}
aoqi@0 3741
aoqi@0 3742 // --------------------------------- DIV --------------------------------------
aoqi@0 3743
aoqi@0 3744 // Floats vector div
aoqi@0 3745 instruct vdiv2F(vecD dst, vecD src) %{
aoqi@0 3746 predicate(n->as_Vector()->length() == 2);
aoqi@0 3747 match(Set dst (DivVF dst src));
aoqi@0 3748 format %{ "divps $dst,$src\t! div packed2F" %}
aoqi@0 3749 ins_encode %{
aoqi@0 3750 __ divps($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3751 %}
aoqi@0 3752 ins_pipe( pipe_slow );
aoqi@0 3753 %}
aoqi@0 3754
aoqi@0 3755 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 3756 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3757 match(Set dst (DivVF src1 src2));
aoqi@0 3758 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %}
aoqi@0 3759 ins_encode %{
aoqi@0 3760 bool vector256 = false;
aoqi@0 3761 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3762 %}
aoqi@0 3763 ins_pipe( pipe_slow );
aoqi@0 3764 %}
aoqi@0 3765
aoqi@0 3766 instruct vdiv4F(vecX dst, vecX src) %{
aoqi@0 3767 predicate(n->as_Vector()->length() == 4);
aoqi@0 3768 match(Set dst (DivVF dst src));
aoqi@0 3769 format %{ "divps $dst,$src\t! div packed4F" %}
aoqi@0 3770 ins_encode %{
aoqi@0 3771 __ divps($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3772 %}
aoqi@0 3773 ins_pipe( pipe_slow );
aoqi@0 3774 %}
aoqi@0 3775
aoqi@0 3776 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3777 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3778 match(Set dst (DivVF src1 src2));
aoqi@0 3779 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %}
aoqi@0 3780 ins_encode %{
aoqi@0 3781 bool vector256 = false;
aoqi@0 3782 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3783 %}
aoqi@0 3784 ins_pipe( pipe_slow );
aoqi@0 3785 %}
aoqi@0 3786
aoqi@0 3787 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3788 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3789 match(Set dst (DivVF src (LoadVector mem)));
aoqi@0 3790 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %}
aoqi@0 3791 ins_encode %{
aoqi@0 3792 bool vector256 = false;
aoqi@0 3793 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3794 %}
aoqi@0 3795 ins_pipe( pipe_slow );
aoqi@0 3796 %}
aoqi@0 3797
aoqi@0 3798 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3799 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 3800 match(Set dst (DivVF src1 src2));
aoqi@0 3801 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %}
aoqi@0 3802 ins_encode %{
aoqi@0 3803 bool vector256 = true;
aoqi@0 3804 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3805 %}
aoqi@0 3806 ins_pipe( pipe_slow );
aoqi@0 3807 %}
aoqi@0 3808
aoqi@0 3809 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3810 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 3811 match(Set dst (DivVF src (LoadVector mem)));
aoqi@0 3812 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %}
aoqi@0 3813 ins_encode %{
aoqi@0 3814 bool vector256 = true;
aoqi@0 3815 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3816 %}
aoqi@0 3817 ins_pipe( pipe_slow );
aoqi@0 3818 %}
aoqi@0 3819
aoqi@0 3820 // Doubles vector div
aoqi@0 3821 instruct vdiv2D(vecX dst, vecX src) %{
aoqi@0 3822 predicate(n->as_Vector()->length() == 2);
aoqi@0 3823 match(Set dst (DivVD dst src));
aoqi@0 3824 format %{ "divpd $dst,$src\t! div packed2D" %}
aoqi@0 3825 ins_encode %{
aoqi@0 3826 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3827 %}
aoqi@0 3828 ins_pipe( pipe_slow );
aoqi@0 3829 %}
aoqi@0 3830
aoqi@0 3831 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3832 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3833 match(Set dst (DivVD src1 src2));
aoqi@0 3834 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %}
aoqi@0 3835 ins_encode %{
aoqi@0 3836 bool vector256 = false;
aoqi@0 3837 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3838 %}
aoqi@0 3839 ins_pipe( pipe_slow );
aoqi@0 3840 %}
aoqi@0 3841
aoqi@0 3842 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3843 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3844 match(Set dst (DivVD src (LoadVector mem)));
aoqi@0 3845 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %}
aoqi@0 3846 ins_encode %{
aoqi@0 3847 bool vector256 = false;
aoqi@0 3848 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3849 %}
aoqi@0 3850 ins_pipe( pipe_slow );
aoqi@0 3851 %}
aoqi@0 3852
aoqi@0 3853 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3854 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3855 match(Set dst (DivVD src1 src2));
aoqi@0 3856 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %}
aoqi@0 3857 ins_encode %{
aoqi@0 3858 bool vector256 = true;
aoqi@0 3859 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3860 %}
aoqi@0 3861 ins_pipe( pipe_slow );
aoqi@0 3862 %}
aoqi@0 3863
aoqi@0 3864 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3865 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3866 match(Set dst (DivVD src (LoadVector mem)));
aoqi@0 3867 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %}
aoqi@0 3868 ins_encode %{
aoqi@0 3869 bool vector256 = true;
aoqi@0 3870 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3871 %}
aoqi@0 3872 ins_pipe( pipe_slow );
aoqi@0 3873 %}
aoqi@0 3874
aoqi@0 3875 // ------------------------------ Shift ---------------------------------------
aoqi@0 3876
aoqi@0 3877 // Left and right shift count vectors are the same on x86
aoqi@0 3878 // (only lowest bits of xmm reg are used for count).
aoqi@0 3879 instruct vshiftcnt(vecS dst, rRegI cnt) %{
aoqi@0 3880 match(Set dst (LShiftCntV cnt));
aoqi@0 3881 match(Set dst (RShiftCntV cnt));
aoqi@0 3882 format %{ "movd $dst,$cnt\t! load shift count" %}
aoqi@0 3883 ins_encode %{
aoqi@0 3884 __ movdl($dst$$XMMRegister, $cnt$$Register);
aoqi@0 3885 %}
aoqi@0 3886 ins_pipe( pipe_slow );
aoqi@0 3887 %}
aoqi@0 3888
aoqi@0 3889 // ------------------------------ LeftShift -----------------------------------
aoqi@0 3890
aoqi@0 3891 // Shorts/Chars vector left shift
aoqi@0 3892 instruct vsll2S(vecS dst, vecS shift) %{
aoqi@0 3893 predicate(n->as_Vector()->length() == 2);
aoqi@0 3894 match(Set dst (LShiftVS dst shift));
aoqi@0 3895 format %{ "psllw $dst,$shift\t! left shift packed2S" %}
aoqi@0 3896 ins_encode %{
aoqi@0 3897 __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 3898 %}
aoqi@0 3899 ins_pipe( pipe_slow );
aoqi@0 3900 %}
aoqi@0 3901
aoqi@0 3902 instruct vsll2S_imm(vecS dst, immI8 shift) %{
aoqi@0 3903 predicate(n->as_Vector()->length() == 2);
aoqi@0 3904 match(Set dst (LShiftVS dst shift));
aoqi@0 3905 format %{ "psllw $dst,$shift\t! left shift packed2S" %}
aoqi@0 3906 ins_encode %{
aoqi@0 3907 __ psllw($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 3908 %}
aoqi@0 3909 ins_pipe( pipe_slow );
aoqi@0 3910 %}
aoqi@0 3911
aoqi@0 3912 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{
aoqi@0 3913 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3914 match(Set dst (LShiftVS src shift));
aoqi@0 3915 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
aoqi@0 3916 ins_encode %{
aoqi@0 3917 bool vector256 = false;
aoqi@0 3918 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 3919 %}
aoqi@0 3920 ins_pipe( pipe_slow );
aoqi@0 3921 %}
aoqi@0 3922
aoqi@0 3923 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
aoqi@0 3924 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3925 match(Set dst (LShiftVS src shift));
aoqi@0 3926 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
aoqi@0 3927 ins_encode %{
aoqi@0 3928 bool vector256 = false;
aoqi@0 3929 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 3930 %}
aoqi@0 3931 ins_pipe( pipe_slow );
aoqi@0 3932 %}
aoqi@0 3933
aoqi@0 3934 instruct vsll4S(vecD dst, vecS shift) %{
aoqi@0 3935 predicate(n->as_Vector()->length() == 4);
aoqi@0 3936 match(Set dst (LShiftVS dst shift));
aoqi@0 3937 format %{ "psllw $dst,$shift\t! left shift packed4S" %}
aoqi@0 3938 ins_encode %{
aoqi@0 3939 __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 3940 %}
aoqi@0 3941 ins_pipe( pipe_slow );
aoqi@0 3942 %}
aoqi@0 3943
aoqi@0 3944 instruct vsll4S_imm(vecD dst, immI8 shift) %{
aoqi@0 3945 predicate(n->as_Vector()->length() == 4);
aoqi@0 3946 match(Set dst (LShiftVS dst shift));
aoqi@0 3947 format %{ "psllw $dst,$shift\t! left shift packed4S" %}
aoqi@0 3948 ins_encode %{
aoqi@0 3949 __ psllw($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 3950 %}
aoqi@0 3951 ins_pipe( pipe_slow );
aoqi@0 3952 %}
aoqi@0 3953
aoqi@0 3954 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{
aoqi@0 3955 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3956 match(Set dst (LShiftVS src shift));
aoqi@0 3957 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
aoqi@0 3958 ins_encode %{
aoqi@0 3959 bool vector256 = false;
aoqi@0 3960 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 3961 %}
aoqi@0 3962 ins_pipe( pipe_slow );
aoqi@0 3963 %}
aoqi@0 3964
aoqi@0 3965 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
aoqi@0 3966 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3967 match(Set dst (LShiftVS src shift));
aoqi@0 3968 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
aoqi@0 3969 ins_encode %{
aoqi@0 3970 bool vector256 = false;
aoqi@0 3971 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 3972 %}
aoqi@0 3973 ins_pipe( pipe_slow );
aoqi@0 3974 %}
aoqi@0 3975
aoqi@0 3976 instruct vsll8S(vecX dst, vecS shift) %{
aoqi@0 3977 predicate(n->as_Vector()->length() == 8);
aoqi@0 3978 match(Set dst (LShiftVS dst shift));
aoqi@0 3979 format %{ "psllw $dst,$shift\t! left shift packed8S" %}
aoqi@0 3980 ins_encode %{
aoqi@0 3981 __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 3982 %}
aoqi@0 3983 ins_pipe( pipe_slow );
aoqi@0 3984 %}
aoqi@0 3985
aoqi@0 3986 instruct vsll8S_imm(vecX dst, immI8 shift) %{
aoqi@0 3987 predicate(n->as_Vector()->length() == 8);
aoqi@0 3988 match(Set dst (LShiftVS dst shift));
aoqi@0 3989 format %{ "psllw $dst,$shift\t! left shift packed8S" %}
aoqi@0 3990 ins_encode %{
aoqi@0 3991 __ psllw($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 3992 %}
aoqi@0 3993 ins_pipe( pipe_slow );
aoqi@0 3994 %}
aoqi@0 3995
aoqi@0 3996 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{
aoqi@0 3997 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 3998 match(Set dst (LShiftVS src shift));
aoqi@0 3999 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
aoqi@0 4000 ins_encode %{
aoqi@0 4001 bool vector256 = false;
aoqi@0 4002 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4003 %}
aoqi@0 4004 ins_pipe( pipe_slow );
aoqi@0 4005 %}
aoqi@0 4006
aoqi@0 4007 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
aoqi@0 4008 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 4009 match(Set dst (LShiftVS src shift));
aoqi@0 4010 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
aoqi@0 4011 ins_encode %{
aoqi@0 4012 bool vector256 = false;
aoqi@0 4013 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4014 %}
aoqi@0 4015 ins_pipe( pipe_slow );
aoqi@0 4016 %}
aoqi@0 4017
aoqi@0 4018 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{
aoqi@0 4019 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 4020 match(Set dst (LShiftVS src shift));
aoqi@0 4021 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
aoqi@0 4022 ins_encode %{
aoqi@0 4023 bool vector256 = true;
aoqi@0 4024 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4025 %}
aoqi@0 4026 ins_pipe( pipe_slow );
aoqi@0 4027 %}
aoqi@0 4028
aoqi@0 4029 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
aoqi@0 4030 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 4031 match(Set dst (LShiftVS src shift));
aoqi@0 4032 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
aoqi@0 4033 ins_encode %{
aoqi@0 4034 bool vector256 = true;
aoqi@0 4035 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4036 %}
aoqi@0 4037 ins_pipe( pipe_slow );
aoqi@0 4038 %}
aoqi@0 4039
aoqi@0 4040 // Integers vector left shift
aoqi@0 4041 instruct vsll2I(vecD dst, vecS shift) %{
aoqi@0 4042 predicate(n->as_Vector()->length() == 2);
aoqi@0 4043 match(Set dst (LShiftVI dst shift));
aoqi@0 4044 format %{ "pslld $dst,$shift\t! left shift packed2I" %}
aoqi@0 4045 ins_encode %{
aoqi@0 4046 __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4047 %}
aoqi@0 4048 ins_pipe( pipe_slow );
aoqi@0 4049 %}
aoqi@0 4050
aoqi@0 4051 instruct vsll2I_imm(vecD dst, immI8 shift) %{
aoqi@0 4052 predicate(n->as_Vector()->length() == 2);
aoqi@0 4053 match(Set dst (LShiftVI dst shift));
aoqi@0 4054 format %{ "pslld $dst,$shift\t! left shift packed2I" %}
aoqi@0 4055 ins_encode %{
aoqi@0 4056 __ pslld($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4057 %}
aoqi@0 4058 ins_pipe( pipe_slow );
aoqi@0 4059 %}
aoqi@0 4060
aoqi@0 4061 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{
aoqi@0 4062 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4063 match(Set dst (LShiftVI src shift));
aoqi@0 4064 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %}
aoqi@0 4065 ins_encode %{
aoqi@0 4066 bool vector256 = false;
aoqi@0 4067 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4068 %}
aoqi@0 4069 ins_pipe( pipe_slow );
aoqi@0 4070 %}
aoqi@0 4071
aoqi@0 4072 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
aoqi@0 4073 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4074 match(Set dst (LShiftVI src shift));
aoqi@0 4075 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %}
aoqi@0 4076 ins_encode %{
aoqi@0 4077 bool vector256 = false;
aoqi@0 4078 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4079 %}
aoqi@0 4080 ins_pipe( pipe_slow );
aoqi@0 4081 %}
aoqi@0 4082
aoqi@0 4083 instruct vsll4I(vecX dst, vecS shift) %{
aoqi@0 4084 predicate(n->as_Vector()->length() == 4);
aoqi@0 4085 match(Set dst (LShiftVI dst shift));
aoqi@0 4086 format %{ "pslld $dst,$shift\t! left shift packed4I" %}
aoqi@0 4087 ins_encode %{
aoqi@0 4088 __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4089 %}
aoqi@0 4090 ins_pipe( pipe_slow );
aoqi@0 4091 %}
aoqi@0 4092
aoqi@0 4093 instruct vsll4I_imm(vecX dst, immI8 shift) %{
aoqi@0 4094 predicate(n->as_Vector()->length() == 4);
aoqi@0 4095 match(Set dst (LShiftVI dst shift));
aoqi@0 4096 format %{ "pslld $dst,$shift\t! left shift packed4I" %}
aoqi@0 4097 ins_encode %{
aoqi@0 4098 __ pslld($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4099 %}
aoqi@0 4100 ins_pipe( pipe_slow );
aoqi@0 4101 %}
aoqi@0 4102
aoqi@0 4103 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{
aoqi@0 4104 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 4105 match(Set dst (LShiftVI src shift));
aoqi@0 4106 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %}
aoqi@0 4107 ins_encode %{
aoqi@0 4108 bool vector256 = false;
aoqi@0 4109 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4110 %}
aoqi@0 4111 ins_pipe( pipe_slow );
aoqi@0 4112 %}
aoqi@0 4113
aoqi@0 4114 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
aoqi@0 4115 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 4116 match(Set dst (LShiftVI src shift));
aoqi@0 4117 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %}
aoqi@0 4118 ins_encode %{
aoqi@0 4119 bool vector256 = false;
aoqi@0 4120 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4121 %}
aoqi@0 4122 ins_pipe( pipe_slow );
aoqi@0 4123 %}
aoqi@0 4124
aoqi@0 4125 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{
aoqi@0 4126 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 4127 match(Set dst (LShiftVI src shift));
aoqi@0 4128 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %}
aoqi@0 4129 ins_encode %{
aoqi@0 4130 bool vector256 = true;
aoqi@0 4131 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4132 %}
aoqi@0 4133 ins_pipe( pipe_slow );
aoqi@0 4134 %}
aoqi@0 4135
aoqi@0 4136 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
aoqi@0 4137 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 4138 match(Set dst (LShiftVI src shift));
aoqi@0 4139 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %}
aoqi@0 4140 ins_encode %{
aoqi@0 4141 bool vector256 = true;
aoqi@0 4142 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4143 %}
aoqi@0 4144 ins_pipe( pipe_slow );
aoqi@0 4145 %}
aoqi@0 4146
aoqi@0 4147 // Longs vector left shift
aoqi@0 4148 instruct vsll2L(vecX dst, vecS shift) %{
aoqi@0 4149 predicate(n->as_Vector()->length() == 2);
aoqi@0 4150 match(Set dst (LShiftVL dst shift));
aoqi@0 4151 format %{ "psllq $dst,$shift\t! left shift packed2L" %}
aoqi@0 4152 ins_encode %{
aoqi@0 4153 __ psllq($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4154 %}
aoqi@0 4155 ins_pipe( pipe_slow );
aoqi@0 4156 %}
aoqi@0 4157
aoqi@0 4158 instruct vsll2L_imm(vecX dst, immI8 shift) %{
aoqi@0 4159 predicate(n->as_Vector()->length() == 2);
aoqi@0 4160 match(Set dst (LShiftVL dst shift));
aoqi@0 4161 format %{ "psllq $dst,$shift\t! left shift packed2L" %}
aoqi@0 4162 ins_encode %{
aoqi@0 4163 __ psllq($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4164 %}
aoqi@0 4165 ins_pipe( pipe_slow );
aoqi@0 4166 %}
aoqi@0 4167
aoqi@0 4168 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{
aoqi@0 4169 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4170 match(Set dst (LShiftVL src shift));
aoqi@0 4171 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %}
aoqi@0 4172 ins_encode %{
aoqi@0 4173 bool vector256 = false;
aoqi@0 4174 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4175 %}
aoqi@0 4176 ins_pipe( pipe_slow );
aoqi@0 4177 %}
aoqi@0 4178
aoqi@0 4179 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
aoqi@0 4180 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4181 match(Set dst (LShiftVL src shift));
aoqi@0 4182 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %}
aoqi@0 4183 ins_encode %{
aoqi@0 4184 bool vector256 = false;
aoqi@0 4185 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4186 %}
aoqi@0 4187 ins_pipe( pipe_slow );
aoqi@0 4188 %}
aoqi@0 4189
aoqi@0 4190 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{
aoqi@0 4191 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
aoqi@0 4192 match(Set dst (LShiftVL src shift));
aoqi@0 4193 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %}
aoqi@0 4194 ins_encode %{
aoqi@0 4195 bool vector256 = true;
aoqi@0 4196 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4197 %}
aoqi@0 4198 ins_pipe( pipe_slow );
aoqi@0 4199 %}
aoqi@0 4200
aoqi@0 4201 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
aoqi@0 4202 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
aoqi@0 4203 match(Set dst (LShiftVL src shift));
aoqi@0 4204 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %}
aoqi@0 4205 ins_encode %{
aoqi@0 4206 bool vector256 = true;
aoqi@0 4207 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4208 %}
aoqi@0 4209 ins_pipe( pipe_slow );
aoqi@0 4210 %}
aoqi@0 4211
aoqi@0 4212 // ----------------------- LogicalRightShift -----------------------------------
aoqi@0 4213
aoqi@0 4214 // Shorts vector logical right shift produces incorrect Java result
aoqi@0 4215 // for negative data because java code convert short value into int with
aoqi@0 4216 // sign extension before a shift. But char vectors are fine since chars are
aoqi@0 4217 // unsigned values.
aoqi@0 4218
aoqi@0 4219 instruct vsrl2S(vecS dst, vecS shift) %{
aoqi@0 4220 predicate(n->as_Vector()->length() == 2);
aoqi@0 4221 match(Set dst (URShiftVS dst shift));
aoqi@0 4222 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %}
aoqi@0 4223 ins_encode %{
aoqi@0 4224 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4225 %}
aoqi@0 4226 ins_pipe( pipe_slow );
aoqi@0 4227 %}
aoqi@0 4228
aoqi@0 4229 instruct vsrl2S_imm(vecS dst, immI8 shift) %{
aoqi@0 4230 predicate(n->as_Vector()->length() == 2);
aoqi@0 4231 match(Set dst (URShiftVS dst shift));
aoqi@0 4232 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %}
aoqi@0 4233 ins_encode %{
aoqi@0 4234 __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4235 %}
aoqi@0 4236 ins_pipe( pipe_slow );
aoqi@0 4237 %}
aoqi@0 4238
aoqi@0 4239 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{
aoqi@0 4240 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4241 match(Set dst (URShiftVS src shift));
aoqi@0 4242 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
aoqi@0 4243 ins_encode %{
aoqi@0 4244 bool vector256 = false;
aoqi@0 4245 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4246 %}
aoqi@0 4247 ins_pipe( pipe_slow );
aoqi@0 4248 %}
aoqi@0 4249
aoqi@0 4250 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
aoqi@0 4251 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4252 match(Set dst (URShiftVS src shift));
aoqi@0 4253 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
aoqi@0 4254 ins_encode %{
aoqi@0 4255 bool vector256 = false;
aoqi@0 4256 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4257 %}
aoqi@0 4258 ins_pipe( pipe_slow );
aoqi@0 4259 %}
aoqi@0 4260
aoqi@0 4261 instruct vsrl4S(vecD dst, vecS shift) %{
aoqi@0 4262 predicate(n->as_Vector()->length() == 4);
aoqi@0 4263 match(Set dst (URShiftVS dst shift));
aoqi@0 4264 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %}
aoqi@0 4265 ins_encode %{
aoqi@0 4266 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4267 %}
aoqi@0 4268 ins_pipe( pipe_slow );
aoqi@0 4269 %}
aoqi@0 4270
aoqi@0 4271 instruct vsrl4S_imm(vecD dst, immI8 shift) %{
aoqi@0 4272 predicate(n->as_Vector()->length() == 4);
aoqi@0 4273 match(Set dst (URShiftVS dst shift));
aoqi@0 4274 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %}
aoqi@0 4275 ins_encode %{
aoqi@0 4276 __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4277 %}
aoqi@0 4278 ins_pipe( pipe_slow );
aoqi@0 4279 %}
aoqi@0 4280
aoqi@0 4281 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{
aoqi@0 4282 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 4283 match(Set dst (URShiftVS src shift));
aoqi@0 4284 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
aoqi@0 4285 ins_encode %{
aoqi@0 4286 bool vector256 = false;
aoqi@0 4287 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4288 %}
aoqi@0 4289 ins_pipe( pipe_slow );
aoqi@0 4290 %}
aoqi@0 4291
aoqi@0 4292 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
aoqi@0 4293 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 4294 match(Set dst (URShiftVS src shift));
aoqi@0 4295 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
aoqi@0 4296 ins_encode %{
aoqi@0 4297 bool vector256 = false;
aoqi@0 4298 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4299 %}
aoqi@0 4300 ins_pipe( pipe_slow );
aoqi@0 4301 %}
aoqi@0 4302
aoqi@0 4303 instruct vsrl8S(vecX dst, vecS shift) %{
aoqi@0 4304 predicate(n->as_Vector()->length() == 8);
aoqi@0 4305 match(Set dst (URShiftVS dst shift));
aoqi@0 4306 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %}
aoqi@0 4307 ins_encode %{
aoqi@0 4308 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4309 %}
aoqi@0 4310 ins_pipe( pipe_slow );
aoqi@0 4311 %}
aoqi@0 4312
aoqi@0 4313 instruct vsrl8S_imm(vecX dst, immI8 shift) %{
aoqi@0 4314 predicate(n->as_Vector()->length() == 8);
aoqi@0 4315 match(Set dst (URShiftVS dst shift));
aoqi@0 4316 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %}
aoqi@0 4317 ins_encode %{
aoqi@0 4318 __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4319 %}
aoqi@0 4320 ins_pipe( pipe_slow );
aoqi@0 4321 %}
aoqi@0 4322
aoqi@0 4323 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{
aoqi@0 4324 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 4325 match(Set dst (URShiftVS src shift));
aoqi@0 4326 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
aoqi@0 4327 ins_encode %{
aoqi@0 4328 bool vector256 = false;
aoqi@0 4329 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4330 %}
aoqi@0 4331 ins_pipe( pipe_slow );
aoqi@0 4332 %}
aoqi@0 4333
aoqi@0 4334 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
aoqi@0 4335 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 4336 match(Set dst (URShiftVS src shift));
aoqi@0 4337 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
aoqi@0 4338 ins_encode %{
aoqi@0 4339 bool vector256 = false;
aoqi@0 4340 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4341 %}
aoqi@0 4342 ins_pipe( pipe_slow );
aoqi@0 4343 %}
aoqi@0 4344
aoqi@0 4345 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{
aoqi@0 4346 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 4347 match(Set dst (URShiftVS src shift));
aoqi@0 4348 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
aoqi@0 4349 ins_encode %{
aoqi@0 4350 bool vector256 = true;
aoqi@0 4351 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4352 %}
aoqi@0 4353 ins_pipe( pipe_slow );
aoqi@0 4354 %}
aoqi@0 4355
aoqi@0 4356 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
aoqi@0 4357 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 4358 match(Set dst (URShiftVS src shift));
aoqi@0 4359 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
aoqi@0 4360 ins_encode %{
aoqi@0 4361 bool vector256 = true;
aoqi@0 4362 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4363 %}
aoqi@0 4364 ins_pipe( pipe_slow );
aoqi@0 4365 %}
aoqi@0 4366
aoqi@0 4367 // Integers vector logical right shift
aoqi@0 4368 instruct vsrl2I(vecD dst, vecS shift) %{
aoqi@0 4369 predicate(n->as_Vector()->length() == 2);
aoqi@0 4370 match(Set dst (URShiftVI dst shift));
aoqi@0 4371 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %}
aoqi@0 4372 ins_encode %{
aoqi@0 4373 __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4374 %}
aoqi@0 4375 ins_pipe( pipe_slow );
aoqi@0 4376 %}
aoqi@0 4377
aoqi@0 4378 instruct vsrl2I_imm(vecD dst, immI8 shift) %{
aoqi@0 4379 predicate(n->as_Vector()->length() == 2);
aoqi@0 4380 match(Set dst (URShiftVI dst shift));
aoqi@0 4381 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %}
aoqi@0 4382 ins_encode %{
aoqi@0 4383 __ psrld($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4384 %}
aoqi@0 4385 ins_pipe( pipe_slow );
aoqi@0 4386 %}
aoqi@0 4387
aoqi@0 4388 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{
aoqi@0 4389 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4390 match(Set dst (URShiftVI src shift));
aoqi@0 4391 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %}
aoqi@0 4392 ins_encode %{
aoqi@0 4393 bool vector256 = false;
aoqi@0 4394 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4395 %}
aoqi@0 4396 ins_pipe( pipe_slow );
aoqi@0 4397 %}
aoqi@0 4398
aoqi@0 4399 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
aoqi@0 4400 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4401 match(Set dst (URShiftVI src shift));
aoqi@0 4402 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %}
aoqi@0 4403 ins_encode %{
aoqi@0 4404 bool vector256 = false;
aoqi@0 4405 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4406 %}
aoqi@0 4407 ins_pipe( pipe_slow );
aoqi@0 4408 %}
aoqi@0 4409
aoqi@0 4410 instruct vsrl4I(vecX dst, vecS shift) %{
aoqi@0 4411 predicate(n->as_Vector()->length() == 4);
aoqi@0 4412 match(Set dst (URShiftVI dst shift));
aoqi@0 4413 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %}
aoqi@0 4414 ins_encode %{
aoqi@0 4415 __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4416 %}
aoqi@0 4417 ins_pipe( pipe_slow );
aoqi@0 4418 %}
aoqi@0 4419
aoqi@0 4420 instruct vsrl4I_imm(vecX dst, immI8 shift) %{
aoqi@0 4421 predicate(n->as_Vector()->length() == 4);
aoqi@0 4422 match(Set dst (URShiftVI dst shift));
aoqi@0 4423 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %}
aoqi@0 4424 ins_encode %{
aoqi@0 4425 __ psrld($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4426 %}
aoqi@0 4427 ins_pipe( pipe_slow );
aoqi@0 4428 %}
aoqi@0 4429
aoqi@0 4430 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{
aoqi@0 4431 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 4432 match(Set dst (URShiftVI src shift));
aoqi@0 4433 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %}
aoqi@0 4434 ins_encode %{
aoqi@0 4435 bool vector256 = false;
aoqi@0 4436 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4437 %}
aoqi@0 4438 ins_pipe( pipe_slow );
aoqi@0 4439 %}
aoqi@0 4440
aoqi@0 4441 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
aoqi@0 4442 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 4443 match(Set dst (URShiftVI src shift));
aoqi@0 4444 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %}
aoqi@0 4445 ins_encode %{
aoqi@0 4446 bool vector256 = false;
aoqi@0 4447 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4448 %}
aoqi@0 4449 ins_pipe( pipe_slow );
aoqi@0 4450 %}
aoqi@0 4451
aoqi@0 4452 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{
aoqi@0 4453 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 4454 match(Set dst (URShiftVI src shift));
aoqi@0 4455 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %}
aoqi@0 4456 ins_encode %{
aoqi@0 4457 bool vector256 = true;
aoqi@0 4458 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4459 %}
aoqi@0 4460 ins_pipe( pipe_slow );
aoqi@0 4461 %}
aoqi@0 4462
aoqi@0 4463 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
aoqi@0 4464 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 4465 match(Set dst (URShiftVI src shift));
aoqi@0 4466 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %}
aoqi@0 4467 ins_encode %{
aoqi@0 4468 bool vector256 = true;
aoqi@0 4469 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4470 %}
aoqi@0 4471 ins_pipe( pipe_slow );
aoqi@0 4472 %}
aoqi@0 4473
aoqi@0 4474 // Longs vector logical right shift
aoqi@0 4475 instruct vsrl2L(vecX dst, vecS shift) %{
aoqi@0 4476 predicate(n->as_Vector()->length() == 2);
aoqi@0 4477 match(Set dst (URShiftVL dst shift));
aoqi@0 4478 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %}
aoqi@0 4479 ins_encode %{
aoqi@0 4480 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4481 %}
aoqi@0 4482 ins_pipe( pipe_slow );
aoqi@0 4483 %}
aoqi@0 4484
aoqi@0 4485 instruct vsrl2L_imm(vecX dst, immI8 shift) %{
aoqi@0 4486 predicate(n->as_Vector()->length() == 2);
aoqi@0 4487 match(Set dst (URShiftVL dst shift));
aoqi@0 4488 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %}
aoqi@0 4489 ins_encode %{
aoqi@0 4490 __ psrlq($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4491 %}
aoqi@0 4492 ins_pipe( pipe_slow );
aoqi@0 4493 %}
aoqi@0 4494
aoqi@0 4495 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{
aoqi@0 4496 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4497 match(Set dst (URShiftVL src shift));
aoqi@0 4498 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %}
aoqi@0 4499 ins_encode %{
aoqi@0 4500 bool vector256 = false;
aoqi@0 4501 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4502 %}
aoqi@0 4503 ins_pipe( pipe_slow );
aoqi@0 4504 %}
aoqi@0 4505
aoqi@0 4506 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
aoqi@0 4507 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4508 match(Set dst (URShiftVL src shift));
aoqi@0 4509 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %}
aoqi@0 4510 ins_encode %{
aoqi@0 4511 bool vector256 = false;
aoqi@0 4512 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4513 %}
aoqi@0 4514 ins_pipe( pipe_slow );
aoqi@0 4515 %}
aoqi@0 4516
aoqi@0 4517 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{
aoqi@0 4518 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
aoqi@0 4519 match(Set dst (URShiftVL src shift));
aoqi@0 4520 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %}
aoqi@0 4521 ins_encode %{
aoqi@0 4522 bool vector256 = true;
aoqi@0 4523 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4524 %}
aoqi@0 4525 ins_pipe( pipe_slow );
aoqi@0 4526 %}
aoqi@0 4527
aoqi@0 4528 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
aoqi@0 4529 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
aoqi@0 4530 match(Set dst (URShiftVL src shift));
aoqi@0 4531 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %}
aoqi@0 4532 ins_encode %{
aoqi@0 4533 bool vector256 = true;
aoqi@0 4534 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4535 %}
aoqi@0 4536 ins_pipe( pipe_slow );
aoqi@0 4537 %}
aoqi@0 4538
aoqi@0 4539 // ------------------- ArithmeticRightShift -----------------------------------
aoqi@0 4540
aoqi@0 4541 // Shorts/Chars vector arithmetic right shift
aoqi@0 4542 instruct vsra2S(vecS dst, vecS shift) %{
aoqi@0 4543 predicate(n->as_Vector()->length() == 2);
aoqi@0 4544 match(Set dst (RShiftVS dst shift));
aoqi@0 4545 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %}
aoqi@0 4546 ins_encode %{
aoqi@0 4547 __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4548 %}
aoqi@0 4549 ins_pipe( pipe_slow );
aoqi@0 4550 %}
aoqi@0 4551
aoqi@0 4552 instruct vsra2S_imm(vecS dst, immI8 shift) %{
aoqi@0 4553 predicate(n->as_Vector()->length() == 2);
aoqi@0 4554 match(Set dst (RShiftVS dst shift));
aoqi@0 4555 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %}
aoqi@0 4556 ins_encode %{
aoqi@0 4557 __ psraw($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4558 %}
aoqi@0 4559 ins_pipe( pipe_slow );
aoqi@0 4560 %}
aoqi@0 4561
aoqi@0 4562 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{
aoqi@0 4563 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4564 match(Set dst (RShiftVS src shift));
aoqi@0 4565 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
aoqi@0 4566 ins_encode %{
aoqi@0 4567 bool vector256 = false;
aoqi@0 4568 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4569 %}
aoqi@0 4570 ins_pipe( pipe_slow );
aoqi@0 4571 %}
aoqi@0 4572
aoqi@0 4573 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
aoqi@0 4574 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4575 match(Set dst (RShiftVS src shift));
aoqi@0 4576 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
aoqi@0 4577 ins_encode %{
aoqi@0 4578 bool vector256 = false;
aoqi@0 4579 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4580 %}
aoqi@0 4581 ins_pipe( pipe_slow );
aoqi@0 4582 %}
aoqi@0 4583
aoqi@0 4584 instruct vsra4S(vecD dst, vecS shift) %{
aoqi@0 4585 predicate(n->as_Vector()->length() == 4);
aoqi@0 4586 match(Set dst (RShiftVS dst shift));
aoqi@0 4587 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %}
aoqi@0 4588 ins_encode %{
aoqi@0 4589 __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4590 %}
aoqi@0 4591 ins_pipe( pipe_slow );
aoqi@0 4592 %}
aoqi@0 4593
aoqi@0 4594 instruct vsra4S_imm(vecD dst, immI8 shift) %{
aoqi@0 4595 predicate(n->as_Vector()->length() == 4);
aoqi@0 4596 match(Set dst (RShiftVS dst shift));
aoqi@0 4597 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %}
aoqi@0 4598 ins_encode %{
aoqi@0 4599 __ psraw($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4600 %}
aoqi@0 4601 ins_pipe( pipe_slow );
aoqi@0 4602 %}
aoqi@0 4603
aoqi@0 4604 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{
aoqi@0 4605 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 4606 match(Set dst (RShiftVS src shift));
aoqi@0 4607 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
aoqi@0 4608 ins_encode %{
aoqi@0 4609 bool vector256 = false;
aoqi@0 4610 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4611 %}
aoqi@0 4612 ins_pipe( pipe_slow );
aoqi@0 4613 %}
aoqi@0 4614
aoqi@0 4615 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
aoqi@0 4616 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 4617 match(Set dst (RShiftVS src shift));
aoqi@0 4618 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
aoqi@0 4619 ins_encode %{
aoqi@0 4620 bool vector256 = false;
aoqi@0 4621 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4622 %}
aoqi@0 4623 ins_pipe( pipe_slow );
aoqi@0 4624 %}
aoqi@0 4625
aoqi@0 4626 instruct vsra8S(vecX dst, vecS shift) %{
aoqi@0 4627 predicate(n->as_Vector()->length() == 8);
aoqi@0 4628 match(Set dst (RShiftVS dst shift));
aoqi@0 4629 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %}
aoqi@0 4630 ins_encode %{
aoqi@0 4631 __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4632 %}
aoqi@0 4633 ins_pipe( pipe_slow );
aoqi@0 4634 %}
aoqi@0 4635
aoqi@0 4636 instruct vsra8S_imm(vecX dst, immI8 shift) %{
aoqi@0 4637 predicate(n->as_Vector()->length() == 8);
aoqi@0 4638 match(Set dst (RShiftVS dst shift));
aoqi@0 4639 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %}
aoqi@0 4640 ins_encode %{
aoqi@0 4641 __ psraw($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4642 %}
aoqi@0 4643 ins_pipe( pipe_slow );
aoqi@0 4644 %}
aoqi@0 4645
aoqi@0 4646 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{
aoqi@0 4647 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 4648 match(Set dst (RShiftVS src shift));
aoqi@0 4649 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
aoqi@0 4650 ins_encode %{
aoqi@0 4651 bool vector256 = false;
aoqi@0 4652 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4653 %}
aoqi@0 4654 ins_pipe( pipe_slow );
aoqi@0 4655 %}
aoqi@0 4656
aoqi@0 4657 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
aoqi@0 4658 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 4659 match(Set dst (RShiftVS src shift));
aoqi@0 4660 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
aoqi@0 4661 ins_encode %{
aoqi@0 4662 bool vector256 = false;
aoqi@0 4663 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4664 %}
aoqi@0 4665 ins_pipe( pipe_slow );
aoqi@0 4666 %}
aoqi@0 4667
aoqi@0 4668 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{
aoqi@0 4669 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 4670 match(Set dst (RShiftVS src shift));
aoqi@0 4671 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
aoqi@0 4672 ins_encode %{
aoqi@0 4673 bool vector256 = true;
aoqi@0 4674 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4675 %}
aoqi@0 4676 ins_pipe( pipe_slow );
aoqi@0 4677 %}
aoqi@0 4678
aoqi@0 4679 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
aoqi@0 4680 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 4681 match(Set dst (RShiftVS src shift));
aoqi@0 4682 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
aoqi@0 4683 ins_encode %{
aoqi@0 4684 bool vector256 = true;
aoqi@0 4685 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4686 %}
aoqi@0 4687 ins_pipe( pipe_slow );
aoqi@0 4688 %}
aoqi@0 4689
aoqi@0 4690 // Integers vector arithmetic right shift
aoqi@0 4691 instruct vsra2I(vecD dst, vecS shift) %{
aoqi@0 4692 predicate(n->as_Vector()->length() == 2);
aoqi@0 4693 match(Set dst (RShiftVI dst shift));
aoqi@0 4694 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %}
aoqi@0 4695 ins_encode %{
aoqi@0 4696 __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4697 %}
aoqi@0 4698 ins_pipe( pipe_slow );
aoqi@0 4699 %}
aoqi@0 4700
aoqi@0 4701 instruct vsra2I_imm(vecD dst, immI8 shift) %{
aoqi@0 4702 predicate(n->as_Vector()->length() == 2);
aoqi@0 4703 match(Set dst (RShiftVI dst shift));
aoqi@0 4704 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %}
aoqi@0 4705 ins_encode %{
aoqi@0 4706 __ psrad($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4707 %}
aoqi@0 4708 ins_pipe( pipe_slow );
aoqi@0 4709 %}
aoqi@0 4710
aoqi@0 4711 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{
aoqi@0 4712 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4713 match(Set dst (RShiftVI src shift));
aoqi@0 4714 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %}
aoqi@0 4715 ins_encode %{
aoqi@0 4716 bool vector256 = false;
aoqi@0 4717 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4718 %}
aoqi@0 4719 ins_pipe( pipe_slow );
aoqi@0 4720 %}
aoqi@0 4721
aoqi@0 4722 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
aoqi@0 4723 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4724 match(Set dst (RShiftVI src shift));
aoqi@0 4725 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %}
aoqi@0 4726 ins_encode %{
aoqi@0 4727 bool vector256 = false;
aoqi@0 4728 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4729 %}
aoqi@0 4730 ins_pipe( pipe_slow );
aoqi@0 4731 %}
aoqi@0 4732
aoqi@0 4733 instruct vsra4I(vecX dst, vecS shift) %{
aoqi@0 4734 predicate(n->as_Vector()->length() == 4);
aoqi@0 4735 match(Set dst (RShiftVI dst shift));
aoqi@0 4736 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %}
aoqi@0 4737 ins_encode %{
aoqi@0 4738 __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4739 %}
aoqi@0 4740 ins_pipe( pipe_slow );
aoqi@0 4741 %}
aoqi@0 4742
aoqi@0 4743 instruct vsra4I_imm(vecX dst, immI8 shift) %{
aoqi@0 4744 predicate(n->as_Vector()->length() == 4);
aoqi@0 4745 match(Set dst (RShiftVI dst shift));
aoqi@0 4746 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %}
aoqi@0 4747 ins_encode %{
aoqi@0 4748 __ psrad($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4749 %}
aoqi@0 4750 ins_pipe( pipe_slow );
aoqi@0 4751 %}
aoqi@0 4752
aoqi@0 4753 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{
aoqi@0 4754 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 4755 match(Set dst (RShiftVI src shift));
aoqi@0 4756 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %}
aoqi@0 4757 ins_encode %{
aoqi@0 4758 bool vector256 = false;
aoqi@0 4759 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4760 %}
aoqi@0 4761 ins_pipe( pipe_slow );
aoqi@0 4762 %}
aoqi@0 4763
aoqi@0 4764 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
aoqi@0 4765 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 4766 match(Set dst (RShiftVI src shift));
aoqi@0 4767 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %}
aoqi@0 4768 ins_encode %{
aoqi@0 4769 bool vector256 = false;
aoqi@0 4770 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4771 %}
aoqi@0 4772 ins_pipe( pipe_slow );
aoqi@0 4773 %}
aoqi@0 4774
aoqi@0 4775 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{
aoqi@0 4776 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 4777 match(Set dst (RShiftVI src shift));
aoqi@0 4778 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %}
aoqi@0 4779 ins_encode %{
aoqi@0 4780 bool vector256 = true;
aoqi@0 4781 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4782 %}
aoqi@0 4783 ins_pipe( pipe_slow );
aoqi@0 4784 %}
aoqi@0 4785
aoqi@0 4786 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
aoqi@0 4787 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 4788 match(Set dst (RShiftVI src shift));
aoqi@0 4789 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %}
aoqi@0 4790 ins_encode %{
aoqi@0 4791 bool vector256 = true;
aoqi@0 4792 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4793 %}
aoqi@0 4794 ins_pipe( pipe_slow );
aoqi@0 4795 %}
aoqi@0 4796
aoqi@0 4797 // There are no longs vector arithmetic right shift instructions.
aoqi@0 4798
aoqi@0 4799
aoqi@0 4800 // --------------------------------- AND --------------------------------------
aoqi@0 4801
aoqi@0 4802 instruct vand4B(vecS dst, vecS src) %{
aoqi@0 4803 predicate(n->as_Vector()->length_in_bytes() == 4);
aoqi@0 4804 match(Set dst (AndV dst src));
aoqi@0 4805 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %}
aoqi@0 4806 ins_encode %{
aoqi@0 4807 __ pand($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 4808 %}
aoqi@0 4809 ins_pipe( pipe_slow );
aoqi@0 4810 %}
aoqi@0 4811
aoqi@0 4812 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{
aoqi@0 4813 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
aoqi@0 4814 match(Set dst (AndV src1 src2));
aoqi@0 4815 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %}
aoqi@0 4816 ins_encode %{
aoqi@0 4817 bool vector256 = false;
aoqi@0 4818 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 4819 %}
aoqi@0 4820 ins_pipe( pipe_slow );
aoqi@0 4821 %}
aoqi@0 4822
aoqi@0 4823 instruct vand8B(vecD dst, vecD src) %{
aoqi@0 4824 predicate(n->as_Vector()->length_in_bytes() == 8);
aoqi@0 4825 match(Set dst (AndV dst src));
aoqi@0 4826 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %}
aoqi@0 4827 ins_encode %{
aoqi@0 4828 __ pand($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 4829 %}
aoqi@0 4830 ins_pipe( pipe_slow );
aoqi@0 4831 %}
aoqi@0 4832
aoqi@0 4833 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 4834 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
aoqi@0 4835 match(Set dst (AndV src1 src2));
aoqi@0 4836 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %}
aoqi@0 4837 ins_encode %{
aoqi@0 4838 bool vector256 = false;
aoqi@0 4839 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 4840 %}
aoqi@0 4841 ins_pipe( pipe_slow );
aoqi@0 4842 %}
aoqi@0 4843
aoqi@0 4844 instruct vand16B(vecX dst, vecX src) %{
aoqi@0 4845 predicate(n->as_Vector()->length_in_bytes() == 16);
aoqi@0 4846 match(Set dst (AndV dst src));
aoqi@0 4847 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %}
aoqi@0 4848 ins_encode %{
aoqi@0 4849 __ pand($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 4850 %}
aoqi@0 4851 ins_pipe( pipe_slow );
aoqi@0 4852 %}
aoqi@0 4853
aoqi@0 4854 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 4855 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
aoqi@0 4856 match(Set dst (AndV src1 src2));
aoqi@0 4857 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %}
aoqi@0 4858 ins_encode %{
aoqi@0 4859 bool vector256 = false;
aoqi@0 4860 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 4861 %}
aoqi@0 4862 ins_pipe( pipe_slow );
aoqi@0 4863 %}
aoqi@0 4864
aoqi@0 4865 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 4866 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
aoqi@0 4867 match(Set dst (AndV src (LoadVector mem)));
aoqi@0 4868 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %}
aoqi@0 4869 ins_encode %{
aoqi@0 4870 bool vector256 = false;
aoqi@0 4871 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 4872 %}
aoqi@0 4873 ins_pipe( pipe_slow );
aoqi@0 4874 %}
aoqi@0 4875
aoqi@0 4876 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 4877 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
aoqi@0 4878 match(Set dst (AndV src1 src2));
aoqi@0 4879 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %}
aoqi@0 4880 ins_encode %{
aoqi@0 4881 bool vector256 = true;
aoqi@0 4882 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 4883 %}
aoqi@0 4884 ins_pipe( pipe_slow );
aoqi@0 4885 %}
aoqi@0 4886
aoqi@0 4887 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 4888 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
aoqi@0 4889 match(Set dst (AndV src (LoadVector mem)));
aoqi@0 4890 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %}
aoqi@0 4891 ins_encode %{
aoqi@0 4892 bool vector256 = true;
aoqi@0 4893 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 4894 %}
aoqi@0 4895 ins_pipe( pipe_slow );
aoqi@0 4896 %}
aoqi@0 4897
aoqi@0 4898 // --------------------------------- OR ---------------------------------------
aoqi@0 4899
aoqi@0 4900 instruct vor4B(vecS dst, vecS src) %{
aoqi@0 4901 predicate(n->as_Vector()->length_in_bytes() == 4);
aoqi@0 4902 match(Set dst (OrV dst src));
aoqi@0 4903 format %{ "por $dst,$src\t! or vectors (4 bytes)" %}
aoqi@0 4904 ins_encode %{
aoqi@0 4905 __ por($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 4906 %}
aoqi@0 4907 ins_pipe( pipe_slow );
aoqi@0 4908 %}
aoqi@0 4909
aoqi@0 4910 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{
aoqi@0 4911 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
aoqi@0 4912 match(Set dst (OrV src1 src2));
aoqi@0 4913 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %}
aoqi@0 4914 ins_encode %{
aoqi@0 4915 bool vector256 = false;
aoqi@0 4916 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 4917 %}
aoqi@0 4918 ins_pipe( pipe_slow );
aoqi@0 4919 %}
aoqi@0 4920
aoqi@0 4921 instruct vor8B(vecD dst, vecD src) %{
aoqi@0 4922 predicate(n->as_Vector()->length_in_bytes() == 8);
aoqi@0 4923 match(Set dst (OrV dst src));
aoqi@0 4924 format %{ "por $dst,$src\t! or vectors (8 bytes)" %}
aoqi@0 4925 ins_encode %{
aoqi@0 4926 __ por($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 4927 %}
aoqi@0 4928 ins_pipe( pipe_slow );
aoqi@0 4929 %}
aoqi@0 4930
aoqi@0 4931 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 4932 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
aoqi@0 4933 match(Set dst (OrV src1 src2));
aoqi@0 4934 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %}
aoqi@0 4935 ins_encode %{
aoqi@0 4936 bool vector256 = false;
aoqi@0 4937 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 4938 %}
aoqi@0 4939 ins_pipe( pipe_slow );
aoqi@0 4940 %}
aoqi@0 4941
aoqi@0 4942 instruct vor16B(vecX dst, vecX src) %{
aoqi@0 4943 predicate(n->as_Vector()->length_in_bytes() == 16);
aoqi@0 4944 match(Set dst (OrV dst src));
aoqi@0 4945 format %{ "por $dst,$src\t! or vectors (16 bytes)" %}
aoqi@0 4946 ins_encode %{
aoqi@0 4947 __ por($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 4948 %}
aoqi@0 4949 ins_pipe( pipe_slow );
aoqi@0 4950 %}
aoqi@0 4951
aoqi@0 4952 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 4953 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
aoqi@0 4954 match(Set dst (OrV src1 src2));
aoqi@0 4955 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %}
aoqi@0 4956 ins_encode %{
aoqi@0 4957 bool vector256 = false;
aoqi@0 4958 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 4959 %}
aoqi@0 4960 ins_pipe( pipe_slow );
aoqi@0 4961 %}
aoqi@0 4962
aoqi@0 4963 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 4964 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
aoqi@0 4965 match(Set dst (OrV src (LoadVector mem)));
aoqi@0 4966 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %}
aoqi@0 4967 ins_encode %{
aoqi@0 4968 bool vector256 = false;
aoqi@0 4969 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 4970 %}
aoqi@0 4971 ins_pipe( pipe_slow );
aoqi@0 4972 %}
aoqi@0 4973
aoqi@0 4974 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 4975 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
aoqi@0 4976 match(Set dst (OrV src1 src2));
aoqi@0 4977 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %}
aoqi@0 4978 ins_encode %{
aoqi@0 4979 bool vector256 = true;
aoqi@0 4980 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 4981 %}
aoqi@0 4982 ins_pipe( pipe_slow );
aoqi@0 4983 %}
aoqi@0 4984
aoqi@0 4985 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 4986 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
aoqi@0 4987 match(Set dst (OrV src (LoadVector mem)));
aoqi@0 4988 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %}
aoqi@0 4989 ins_encode %{
aoqi@0 4990 bool vector256 = true;
aoqi@0 4991 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 4992 %}
aoqi@0 4993 ins_pipe( pipe_slow );
aoqi@0 4994 %}
aoqi@0 4995
aoqi@0 4996 // --------------------------------- XOR --------------------------------------
aoqi@0 4997
aoqi@0 4998 instruct vxor4B(vecS dst, vecS src) %{
aoqi@0 4999 predicate(n->as_Vector()->length_in_bytes() == 4);
aoqi@0 5000 match(Set dst (XorV dst src));
aoqi@0 5001 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %}
aoqi@0 5002 ins_encode %{
aoqi@0 5003 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 5004 %}
aoqi@0 5005 ins_pipe( pipe_slow );
aoqi@0 5006 %}
aoqi@0 5007
aoqi@0 5008 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{
aoqi@0 5009 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
aoqi@0 5010 match(Set dst (XorV src1 src2));
aoqi@0 5011 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %}
aoqi@0 5012 ins_encode %{
aoqi@0 5013 bool vector256 = false;
aoqi@0 5014 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 5015 %}
aoqi@0 5016 ins_pipe( pipe_slow );
aoqi@0 5017 %}
aoqi@0 5018
aoqi@0 5019 instruct vxor8B(vecD dst, vecD src) %{
aoqi@0 5020 predicate(n->as_Vector()->length_in_bytes() == 8);
aoqi@0 5021 match(Set dst (XorV dst src));
aoqi@0 5022 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %}
aoqi@0 5023 ins_encode %{
aoqi@0 5024 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 5025 %}
aoqi@0 5026 ins_pipe( pipe_slow );
aoqi@0 5027 %}
aoqi@0 5028
aoqi@0 5029 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 5030 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
aoqi@0 5031 match(Set dst (XorV src1 src2));
aoqi@0 5032 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %}
aoqi@0 5033 ins_encode %{
aoqi@0 5034 bool vector256 = false;
aoqi@0 5035 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 5036 %}
aoqi@0 5037 ins_pipe( pipe_slow );
aoqi@0 5038 %}
aoqi@0 5039
aoqi@0 5040 instruct vxor16B(vecX dst, vecX src) %{
aoqi@0 5041 predicate(n->as_Vector()->length_in_bytes() == 16);
aoqi@0 5042 match(Set dst (XorV dst src));
aoqi@0 5043 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %}
aoqi@0 5044 ins_encode %{
aoqi@0 5045 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 5046 %}
aoqi@0 5047 ins_pipe( pipe_slow );
aoqi@0 5048 %}
aoqi@0 5049
aoqi@0 5050 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 5051 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
aoqi@0 5052 match(Set dst (XorV src1 src2));
aoqi@0 5053 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %}
aoqi@0 5054 ins_encode %{
aoqi@0 5055 bool vector256 = false;
aoqi@0 5056 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 5057 %}
aoqi@0 5058 ins_pipe( pipe_slow );
aoqi@0 5059 %}
aoqi@0 5060
aoqi@0 5061 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 5062 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
aoqi@0 5063 match(Set dst (XorV src (LoadVector mem)));
aoqi@0 5064 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %}
aoqi@0 5065 ins_encode %{
aoqi@0 5066 bool vector256 = false;
aoqi@0 5067 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 5068 %}
aoqi@0 5069 ins_pipe( pipe_slow );
aoqi@0 5070 %}
aoqi@0 5071
aoqi@0 5072 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 5073 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
aoqi@0 5074 match(Set dst (XorV src1 src2));
aoqi@0 5075 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %}
aoqi@0 5076 ins_encode %{
aoqi@0 5077 bool vector256 = true;
aoqi@0 5078 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 5079 %}
aoqi@0 5080 ins_pipe( pipe_slow );
aoqi@0 5081 %}
aoqi@0 5082
aoqi@0 5083 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 5084 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
aoqi@0 5085 match(Set dst (XorV src (LoadVector mem)));
aoqi@0 5086 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %}
aoqi@0 5087 ins_encode %{
aoqi@0 5088 bool vector256 = true;
aoqi@0 5089 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 5090 %}
aoqi@0 5091 ins_pipe( pipe_slow );
aoqi@0 5092 %}
aoqi@0 5093

mercurial