src/cpu/x86/vm/x86.ad

Tue, 08 Aug 2017 15:57:29 +0800

author
aoqi
date
Tue, 08 Aug 2017 15:57:29 +0800
changeset 6876
710a3c8b516e
parent 6517
a433eb716ce1
parent 0
f90c822e73f8
child 7994
04ff2f6cd0eb
permissions
-rw-r--r--

merge

aoqi@0 1 //
aoqi@0 2 // Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
aoqi@0 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
aoqi@0 4 //
aoqi@0 5 // This code is free software; you can redistribute it and/or modify it
aoqi@0 6 // under the terms of the GNU General Public License version 2 only, as
aoqi@0 7 // published by the Free Software Foundation.
aoqi@0 8 //
aoqi@0 9 // This code is distributed in the hope that it will be useful, but WITHOUT
aoqi@0 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
aoqi@0 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
aoqi@0 12 // version 2 for more details (a copy is included in the LICENSE file that
aoqi@0 13 // accompanied this code).
aoqi@0 14 //
aoqi@0 15 // You should have received a copy of the GNU General Public License version
aoqi@0 16 // 2 along with this work; if not, write to the Free Software Foundation,
aoqi@0 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
aoqi@0 18 //
aoqi@0 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
aoqi@0 20 // or visit www.oracle.com if you need additional information or have any
aoqi@0 21 // questions.
aoqi@0 22 //
aoqi@0 23 //
aoqi@0 24
aoqi@0 25 // X86 Common Architecture Description File
aoqi@0 26
aoqi@0 27 //----------REGISTER DEFINITION BLOCK------------------------------------------
aoqi@0 28 // This information is used by the matcher and the register allocator to
aoqi@0 29 // describe individual registers and classes of registers within the target
aoqi@0 30 // archtecture.
aoqi@0 31
aoqi@0 32 register %{
aoqi@0 33 //----------Architecture Description Register Definitions----------------------
aoqi@0 34 // General Registers
aoqi@0 35 // "reg_def" name ( register save type, C convention save type,
aoqi@0 36 // ideal register type, encoding );
aoqi@0 37 // Register Save Types:
aoqi@0 38 //
aoqi@0 39 // NS = No-Save: The register allocator assumes that these registers
aoqi@0 40 // can be used without saving upon entry to the method, &
aoqi@0 41 // that they do not need to be saved at call sites.
aoqi@0 42 //
aoqi@0 43 // SOC = Save-On-Call: The register allocator assumes that these registers
aoqi@0 44 // can be used without saving upon entry to the method,
aoqi@0 45 // but that they must be saved at call sites.
aoqi@0 46 //
aoqi@0 47 // SOE = Save-On-Entry: The register allocator assumes that these registers
aoqi@0 48 // must be saved before using them upon entry to the
aoqi@0 49 // method, but they do not need to be saved at call
aoqi@0 50 // sites.
aoqi@0 51 //
aoqi@0 52 // AS = Always-Save: The register allocator assumes that these registers
aoqi@0 53 // must be saved before using them upon entry to the
aoqi@0 54 // method, & that they must be saved at call sites.
aoqi@0 55 //
aoqi@0 56 // Ideal Register Type is used to determine how to save & restore a
aoqi@0 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
aoqi@0 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
aoqi@0 59 //
aoqi@0 60 // The encoding number is the actual bit-pattern placed into the opcodes.
aoqi@0 61
aoqi@0 62 // XMM registers. 256-bit registers or 8 words each, labeled (a)-h.
aoqi@0 63 // Word a in each register holds a Float, words ab hold a Double.
aoqi@0 64 // The whole registers are used in SSE4.2 version intrinsics,
aoqi@0 65 // array copy stubs and superword operations (see UseSSE42Intrinsics,
aoqi@0 66 // UseXMMForArrayCopy and UseSuperword flags).
aoqi@0 67 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX).
aoqi@0 68 // Linux ABI: No register preserved across function calls
aoqi@0 69 // XMM0-XMM7 might hold parameters
aoqi@0 70 // Windows ABI: XMM6-XMM15 preserved across function calls
aoqi@0 71 // XMM0-XMM3 might hold parameters
aoqi@0 72
aoqi@0 73 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
aoqi@0 74 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
aoqi@0 75 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
aoqi@0 76 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
aoqi@0 77 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
aoqi@0 78 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
aoqi@0 79 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
aoqi@0 80 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
aoqi@0 81
aoqi@0 82 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
aoqi@0 83 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
aoqi@0 84 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
aoqi@0 85 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
aoqi@0 86 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
aoqi@0 87 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
aoqi@0 88 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
aoqi@0 89 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
aoqi@0 90
aoqi@0 91 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
aoqi@0 92 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
aoqi@0 93 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
aoqi@0 94 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
aoqi@0 95 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
aoqi@0 96 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
aoqi@0 97 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
aoqi@0 98 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
aoqi@0 99
aoqi@0 100 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
aoqi@0 101 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
aoqi@0 102 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
aoqi@0 103 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
aoqi@0 104 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
aoqi@0 105 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
aoqi@0 106 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
aoqi@0 107 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
aoqi@0 108
aoqi@0 109 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
aoqi@0 110 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
aoqi@0 111 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
aoqi@0 112 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
aoqi@0 113 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
aoqi@0 114 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
aoqi@0 115 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
aoqi@0 116 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
aoqi@0 117
aoqi@0 118 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
aoqi@0 119 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
aoqi@0 120 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
aoqi@0 121 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
aoqi@0 122 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
aoqi@0 123 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
aoqi@0 124 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
aoqi@0 125 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
aoqi@0 126
aoqi@0 127 #ifdef _WIN64
aoqi@0 128
aoqi@0 129 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg());
aoqi@0 130 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1));
aoqi@0 131 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2));
aoqi@0 132 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3));
aoqi@0 133 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4));
aoqi@0 134 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5));
aoqi@0 135 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6));
aoqi@0 136 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7));
aoqi@0 137
aoqi@0 138 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg());
aoqi@0 139 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1));
aoqi@0 140 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2));
aoqi@0 141 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3));
aoqi@0 142 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4));
aoqi@0 143 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5));
aoqi@0 144 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6));
aoqi@0 145 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7));
aoqi@0 146
aoqi@0 147 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg());
aoqi@0 148 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1));
aoqi@0 149 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2));
aoqi@0 150 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3));
aoqi@0 151 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4));
aoqi@0 152 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5));
aoqi@0 153 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6));
aoqi@0 154 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7));
aoqi@0 155
aoqi@0 156 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg());
aoqi@0 157 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1));
aoqi@0 158 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2));
aoqi@0 159 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3));
aoqi@0 160 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4));
aoqi@0 161 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5));
aoqi@0 162 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6));
aoqi@0 163 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7));
aoqi@0 164
aoqi@0 165 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
aoqi@0 166 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1));
aoqi@0 167 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2));
aoqi@0 168 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3));
aoqi@0 169 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4));
aoqi@0 170 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5));
aoqi@0 171 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6));
aoqi@0 172 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7));
aoqi@0 173
aoqi@0 174 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
aoqi@0 175 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1));
aoqi@0 176 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2));
aoqi@0 177 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3));
aoqi@0 178 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4));
aoqi@0 179 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5));
aoqi@0 180 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6));
aoqi@0 181 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7));
aoqi@0 182
aoqi@0 183 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
aoqi@0 184 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1));
aoqi@0 185 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2));
aoqi@0 186 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3));
aoqi@0 187 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4));
aoqi@0 188 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5));
aoqi@0 189 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6));
aoqi@0 190 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7));
aoqi@0 191
aoqi@0 192 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
aoqi@0 193 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1));
aoqi@0 194 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2));
aoqi@0 195 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3));
aoqi@0 196 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4));
aoqi@0 197 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5));
aoqi@0 198 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6));
aoqi@0 199 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7));
aoqi@0 200
aoqi@0 201 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
aoqi@0 202 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1));
aoqi@0 203 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2));
aoqi@0 204 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3));
aoqi@0 205 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4));
aoqi@0 206 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5));
aoqi@0 207 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6));
aoqi@0 208 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7));
aoqi@0 209
aoqi@0 210 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
aoqi@0 211 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1));
aoqi@0 212 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2));
aoqi@0 213 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3));
aoqi@0 214 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4));
aoqi@0 215 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5));
aoqi@0 216 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6));
aoqi@0 217 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7));
aoqi@0 218
aoqi@0 219 #else // _WIN64
aoqi@0 220
aoqi@0 221 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
aoqi@0 222 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
aoqi@0 223 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
aoqi@0 224 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
aoqi@0 225 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
aoqi@0 226 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
aoqi@0 227 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
aoqi@0 228 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
aoqi@0 229
aoqi@0 230 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
aoqi@0 231 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
aoqi@0 232 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
aoqi@0 233 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
aoqi@0 234 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
aoqi@0 235 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
aoqi@0 236 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
aoqi@0 237 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
aoqi@0 238
aoqi@0 239 #ifdef _LP64
aoqi@0 240
aoqi@0 241 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
aoqi@0 242 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
aoqi@0 243 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
aoqi@0 244 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
aoqi@0 245 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
aoqi@0 246 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
aoqi@0 247 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
aoqi@0 248 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
aoqi@0 249
aoqi@0 250 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
aoqi@0 251 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
aoqi@0 252 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
aoqi@0 253 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
aoqi@0 254 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
aoqi@0 255 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
aoqi@0 256 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
aoqi@0 257 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
aoqi@0 258
aoqi@0 259 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
aoqi@0 260 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
aoqi@0 261 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
aoqi@0 262 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
aoqi@0 263 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
aoqi@0 264 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
aoqi@0 265 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
aoqi@0 266 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
aoqi@0 267
aoqi@0 268 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
aoqi@0 269 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
aoqi@0 270 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
aoqi@0 271 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
aoqi@0 272 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
aoqi@0 273 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
aoqi@0 274 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
aoqi@0 275 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
aoqi@0 276
aoqi@0 277 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
aoqi@0 278 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
aoqi@0 279 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
aoqi@0 280 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
aoqi@0 281 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
aoqi@0 282 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
aoqi@0 283 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
aoqi@0 284 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
aoqi@0 285
aoqi@0 286 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
aoqi@0 287 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
aoqi@0 288 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
aoqi@0 289 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
aoqi@0 290 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
aoqi@0 291 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
aoqi@0 292 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
aoqi@0 293 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
aoqi@0 294
aoqi@0 295 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
aoqi@0 296 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
aoqi@0 297 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
aoqi@0 298 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
aoqi@0 299 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
aoqi@0 300 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
aoqi@0 301 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
aoqi@0 302 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
aoqi@0 303
aoqi@0 304 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
aoqi@0 305 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
aoqi@0 306 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
aoqi@0 307 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
aoqi@0 308 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
aoqi@0 309 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
aoqi@0 310 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
aoqi@0 311 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
aoqi@0 312
aoqi@0 313 #endif // _LP64
aoqi@0 314
aoqi@0 315 #endif // _WIN64
aoqi@0 316
aoqi@0 317 #ifdef _LP64
aoqi@0 318 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
aoqi@0 319 #else
aoqi@0 320 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
aoqi@0 321 #endif // _LP64
aoqi@0 322
aoqi@0 323 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
aoqi@0 324 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
aoqi@0 325 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
aoqi@0 326 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
aoqi@0 327 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
aoqi@0 328 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
aoqi@0 329 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
aoqi@0 330 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h
aoqi@0 331 #ifdef _LP64
aoqi@0 332 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
aoqi@0 333 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
aoqi@0 334 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
aoqi@0 335 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
aoqi@0 336 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
aoqi@0 337 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
aoqi@0 338 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
aoqi@0 339 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
aoqi@0 340 #endif
aoqi@0 341 );
aoqi@0 342
aoqi@0 343 // flags allocation class should be last.
aoqi@0 344 alloc_class chunk2(RFLAGS);
aoqi@0 345
aoqi@0 346 // Singleton class for condition codes
aoqi@0 347 reg_class int_flags(RFLAGS);
aoqi@0 348
aoqi@0 349 // Class for all float registers
aoqi@0 350 reg_class float_reg(XMM0,
aoqi@0 351 XMM1,
aoqi@0 352 XMM2,
aoqi@0 353 XMM3,
aoqi@0 354 XMM4,
aoqi@0 355 XMM5,
aoqi@0 356 XMM6,
aoqi@0 357 XMM7
aoqi@0 358 #ifdef _LP64
aoqi@0 359 ,XMM8,
aoqi@0 360 XMM9,
aoqi@0 361 XMM10,
aoqi@0 362 XMM11,
aoqi@0 363 XMM12,
aoqi@0 364 XMM13,
aoqi@0 365 XMM14,
aoqi@0 366 XMM15
aoqi@0 367 #endif
aoqi@0 368 );
aoqi@0 369
aoqi@0 370 // Class for all double registers
aoqi@0 371 reg_class double_reg(XMM0, XMM0b,
aoqi@0 372 XMM1, XMM1b,
aoqi@0 373 XMM2, XMM2b,
aoqi@0 374 XMM3, XMM3b,
aoqi@0 375 XMM4, XMM4b,
aoqi@0 376 XMM5, XMM5b,
aoqi@0 377 XMM6, XMM6b,
aoqi@0 378 XMM7, XMM7b
aoqi@0 379 #ifdef _LP64
aoqi@0 380 ,XMM8, XMM8b,
aoqi@0 381 XMM9, XMM9b,
aoqi@0 382 XMM10, XMM10b,
aoqi@0 383 XMM11, XMM11b,
aoqi@0 384 XMM12, XMM12b,
aoqi@0 385 XMM13, XMM13b,
aoqi@0 386 XMM14, XMM14b,
aoqi@0 387 XMM15, XMM15b
aoqi@0 388 #endif
aoqi@0 389 );
aoqi@0 390
aoqi@0 391 // Class for all 32bit vector registers
aoqi@0 392 reg_class vectors_reg(XMM0,
aoqi@0 393 XMM1,
aoqi@0 394 XMM2,
aoqi@0 395 XMM3,
aoqi@0 396 XMM4,
aoqi@0 397 XMM5,
aoqi@0 398 XMM6,
aoqi@0 399 XMM7
aoqi@0 400 #ifdef _LP64
aoqi@0 401 ,XMM8,
aoqi@0 402 XMM9,
aoqi@0 403 XMM10,
aoqi@0 404 XMM11,
aoqi@0 405 XMM12,
aoqi@0 406 XMM13,
aoqi@0 407 XMM14,
aoqi@0 408 XMM15
aoqi@0 409 #endif
aoqi@0 410 );
aoqi@0 411
aoqi@0 412 // Class for all 64bit vector registers
aoqi@0 413 reg_class vectord_reg(XMM0, XMM0b,
aoqi@0 414 XMM1, XMM1b,
aoqi@0 415 XMM2, XMM2b,
aoqi@0 416 XMM3, XMM3b,
aoqi@0 417 XMM4, XMM4b,
aoqi@0 418 XMM5, XMM5b,
aoqi@0 419 XMM6, XMM6b,
aoqi@0 420 XMM7, XMM7b
aoqi@0 421 #ifdef _LP64
aoqi@0 422 ,XMM8, XMM8b,
aoqi@0 423 XMM9, XMM9b,
aoqi@0 424 XMM10, XMM10b,
aoqi@0 425 XMM11, XMM11b,
aoqi@0 426 XMM12, XMM12b,
aoqi@0 427 XMM13, XMM13b,
aoqi@0 428 XMM14, XMM14b,
aoqi@0 429 XMM15, XMM15b
aoqi@0 430 #endif
aoqi@0 431 );
aoqi@0 432
aoqi@0 433 // Class for all 128bit vector registers
aoqi@0 434 reg_class vectorx_reg(XMM0, XMM0b, XMM0c, XMM0d,
aoqi@0 435 XMM1, XMM1b, XMM1c, XMM1d,
aoqi@0 436 XMM2, XMM2b, XMM2c, XMM2d,
aoqi@0 437 XMM3, XMM3b, XMM3c, XMM3d,
aoqi@0 438 XMM4, XMM4b, XMM4c, XMM4d,
aoqi@0 439 XMM5, XMM5b, XMM5c, XMM5d,
aoqi@0 440 XMM6, XMM6b, XMM6c, XMM6d,
aoqi@0 441 XMM7, XMM7b, XMM7c, XMM7d
aoqi@0 442 #ifdef _LP64
aoqi@0 443 ,XMM8, XMM8b, XMM8c, XMM8d,
aoqi@0 444 XMM9, XMM9b, XMM9c, XMM9d,
aoqi@0 445 XMM10, XMM10b, XMM10c, XMM10d,
aoqi@0 446 XMM11, XMM11b, XMM11c, XMM11d,
aoqi@0 447 XMM12, XMM12b, XMM12c, XMM12d,
aoqi@0 448 XMM13, XMM13b, XMM13c, XMM13d,
aoqi@0 449 XMM14, XMM14b, XMM14c, XMM14d,
aoqi@0 450 XMM15, XMM15b, XMM15c, XMM15d
aoqi@0 451 #endif
aoqi@0 452 );
aoqi@0 453
aoqi@0 454 // Class for all 256bit vector registers
aoqi@0 455 reg_class vectory_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
aoqi@0 456 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
aoqi@0 457 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
aoqi@0 458 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
aoqi@0 459 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
aoqi@0 460 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
aoqi@0 461 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
aoqi@0 462 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h
aoqi@0 463 #ifdef _LP64
aoqi@0 464 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
aoqi@0 465 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
aoqi@0 466 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
aoqi@0 467 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
aoqi@0 468 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
aoqi@0 469 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
aoqi@0 470 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
aoqi@0 471 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
aoqi@0 472 #endif
aoqi@0 473 );
aoqi@0 474
aoqi@0 475 %}
aoqi@0 476
aoqi@0 477
aoqi@0 478 //----------SOURCE BLOCK-------------------------------------------------------
aoqi@0 479 // This is a block of C++ code which provides values, functions, and
aoqi@0 480 // definitions necessary in the rest of the architecture description
aoqi@0 481
aoqi@0 482 source_hpp %{
aoqi@0 483 // Header information of the source block.
aoqi@0 484 // Method declarations/definitions which are used outside
aoqi@0 485 // the ad-scope can conveniently be defined here.
aoqi@0 486 //
aoqi@0 487 // To keep related declarations/definitions/uses close together,
aoqi@0 488 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
aoqi@0 489
aoqi@0 490 class CallStubImpl {
aoqi@0 491
aoqi@0 492 //--------------------------------------------------------------
aoqi@0 493 //---< Used for optimization in Compile::shorten_branches >---
aoqi@0 494 //--------------------------------------------------------------
aoqi@0 495
aoqi@0 496 public:
aoqi@0 497 // Size of call trampoline stub.
aoqi@0 498 static uint size_call_trampoline() {
aoqi@0 499 return 0; // no call trampolines on this platform
aoqi@0 500 }
aoqi@0 501
aoqi@0 502 // number of relocations needed by a call trampoline stub
aoqi@0 503 static uint reloc_call_trampoline() {
aoqi@0 504 return 0; // no call trampolines on this platform
aoqi@0 505 }
aoqi@0 506 };
aoqi@0 507
aoqi@0 508 class HandlerImpl {
aoqi@0 509
aoqi@0 510 public:
aoqi@0 511
aoqi@0 512 static int emit_exception_handler(CodeBuffer &cbuf);
aoqi@0 513 static int emit_deopt_handler(CodeBuffer& cbuf);
aoqi@0 514
aoqi@0 515 static uint size_exception_handler() {
aoqi@0 516 // NativeCall instruction size is the same as NativeJump.
aoqi@0 517 // exception handler starts out as jump and can be patched to
aoqi@0 518 // a call be deoptimization. (4932387)
aoqi@0 519 // Note that this value is also credited (in output.cpp) to
aoqi@0 520 // the size of the code section.
aoqi@0 521 return NativeJump::instruction_size;
aoqi@0 522 }
aoqi@0 523
aoqi@0 524 #ifdef _LP64
aoqi@0 525 static uint size_deopt_handler() {
aoqi@0 526 // three 5 byte instructions
aoqi@0 527 return 15;
aoqi@0 528 }
aoqi@0 529 #else
aoqi@0 530 static uint size_deopt_handler() {
aoqi@0 531 // NativeCall instruction size is the same as NativeJump.
aoqi@0 532 // exception handler starts out as jump and can be patched to
aoqi@0 533 // a call be deoptimization. (4932387)
aoqi@0 534 // Note that this value is also credited (in output.cpp) to
aoqi@0 535 // the size of the code section.
aoqi@0 536 return 5 + NativeJump::instruction_size; // pushl(); jmp;
aoqi@0 537 }
aoqi@0 538 #endif
aoqi@0 539 };
aoqi@0 540
aoqi@0 541 %} // end source_hpp
aoqi@0 542
aoqi@0 543 source %{
aoqi@0 544
aoqi@0 545 // Emit exception handler code.
aoqi@0 546 // Stuff framesize into a register and call a VM stub routine.
aoqi@0 547 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
aoqi@0 548
aoqi@0 549 // Note that the code buffer's insts_mark is always relative to insts.
aoqi@0 550 // That's why we must use the macroassembler to generate a handler.
aoqi@0 551 MacroAssembler _masm(&cbuf);
aoqi@0 552 address base = __ start_a_stub(size_exception_handler());
aoqi@0 553 if (base == NULL) return 0; // CodeBuffer::expand failed
aoqi@0 554 int offset = __ offset();
aoqi@0 555 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
aoqi@0 556 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
aoqi@0 557 __ end_a_stub();
aoqi@0 558 return offset;
aoqi@0 559 }
aoqi@0 560
aoqi@0 561 // Emit deopt handler code.
aoqi@0 562 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
aoqi@0 563
aoqi@0 564 // Note that the code buffer's insts_mark is always relative to insts.
aoqi@0 565 // That's why we must use the macroassembler to generate a handler.
aoqi@0 566 MacroAssembler _masm(&cbuf);
aoqi@0 567 address base = __ start_a_stub(size_deopt_handler());
aoqi@0 568 if (base == NULL) return 0; // CodeBuffer::expand failed
aoqi@0 569 int offset = __ offset();
aoqi@0 570
aoqi@0 571 #ifdef _LP64
aoqi@0 572 address the_pc = (address) __ pc();
aoqi@0 573 Label next;
aoqi@0 574 // push a "the_pc" on the stack without destroying any registers
aoqi@0 575 // as they all may be live.
aoqi@0 576
aoqi@0 577 // push address of "next"
aoqi@0 578 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
aoqi@0 579 __ bind(next);
aoqi@0 580 // adjust it so it matches "the_pc"
aoqi@0 581 __ subptr(Address(rsp, 0), __ offset() - offset);
aoqi@0 582 #else
aoqi@0 583 InternalAddress here(__ pc());
aoqi@0 584 __ pushptr(here.addr());
aoqi@0 585 #endif
aoqi@0 586
aoqi@0 587 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
aoqi@0 588 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
aoqi@0 589 __ end_a_stub();
aoqi@0 590 return offset;
aoqi@0 591 }
aoqi@0 592
aoqi@0 593
aoqi@0 594 //=============================================================================
aoqi@0 595
aoqi@0 596 // Float masks come from different places depending on platform.
aoqi@0 597 #ifdef _LP64
aoqi@0 598 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
aoqi@0 599 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
aoqi@0 600 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
aoqi@0 601 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
aoqi@0 602 #else
aoqi@0 603 static address float_signmask() { return (address)float_signmask_pool; }
aoqi@0 604 static address float_signflip() { return (address)float_signflip_pool; }
aoqi@0 605 static address double_signmask() { return (address)double_signmask_pool; }
aoqi@0 606 static address double_signflip() { return (address)double_signflip_pool; }
aoqi@0 607 #endif
aoqi@0 608
aoqi@0 609
aoqi@0 610 const bool Matcher::match_rule_supported(int opcode) {
aoqi@0 611 if (!has_match_rule(opcode))
aoqi@0 612 return false;
aoqi@0 613
aoqi@0 614 switch (opcode) {
aoqi@0 615 case Op_PopCountI:
aoqi@0 616 case Op_PopCountL:
aoqi@0 617 if (!UsePopCountInstruction)
aoqi@0 618 return false;
aoqi@0 619 break;
aoqi@0 620 case Op_MulVI:
aoqi@0 621 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
aoqi@0 622 return false;
aoqi@0 623 break;
aoqi@0 624 case Op_CompareAndSwapL:
aoqi@0 625 #ifdef _LP64
aoqi@0 626 case Op_CompareAndSwapP:
aoqi@0 627 #endif
aoqi@0 628 if (!VM_Version::supports_cx8())
aoqi@0 629 return false;
aoqi@0 630 break;
aoqi@0 631 }
aoqi@0 632
aoqi@0 633 return true; // Per default match rules are supported.
aoqi@0 634 }
aoqi@0 635
aoqi@0 636 // Max vector size in bytes. 0 if not supported.
aoqi@0 637 const int Matcher::vector_width_in_bytes(BasicType bt) {
aoqi@0 638 assert(is_java_primitive(bt), "only primitive type vectors");
aoqi@0 639 if (UseSSE < 2) return 0;
aoqi@0 640 // SSE2 supports 128bit vectors for all types.
aoqi@0 641 // AVX2 supports 256bit vectors for all types.
aoqi@0 642 int size = (UseAVX > 1) ? 32 : 16;
aoqi@0 643 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
aoqi@0 644 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
aoqi@0 645 size = 32;
aoqi@0 646 // Use flag to limit vector size.
aoqi@0 647 size = MIN2(size,(int)MaxVectorSize);
aoqi@0 648 // Minimum 2 values in vector (or 4 for bytes).
aoqi@0 649 switch (bt) {
aoqi@0 650 case T_DOUBLE:
aoqi@0 651 case T_LONG:
aoqi@0 652 if (size < 16) return 0;
aoqi@0 653 case T_FLOAT:
aoqi@0 654 case T_INT:
aoqi@0 655 if (size < 8) return 0;
aoqi@0 656 case T_BOOLEAN:
aoqi@0 657 case T_BYTE:
aoqi@0 658 case T_CHAR:
aoqi@0 659 case T_SHORT:
aoqi@0 660 if (size < 4) return 0;
aoqi@0 661 break;
aoqi@0 662 default:
aoqi@0 663 ShouldNotReachHere();
aoqi@0 664 }
aoqi@0 665 return size;
aoqi@0 666 }
aoqi@0 667
aoqi@0 668 // Limits on vector size (number of elements) loaded into vector.
aoqi@0 669 const int Matcher::max_vector_size(const BasicType bt) {
aoqi@0 670 return vector_width_in_bytes(bt)/type2aelembytes(bt);
aoqi@0 671 }
aoqi@0 672 const int Matcher::min_vector_size(const BasicType bt) {
aoqi@0 673 int max_size = max_vector_size(bt);
aoqi@0 674 // Min size which can be loaded into vector is 4 bytes.
aoqi@0 675 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
aoqi@0 676 return MIN2(size,max_size);
aoqi@0 677 }
aoqi@0 678
aoqi@0 679 // Vector ideal reg corresponding to specidied size in bytes
aoqi@0 680 const int Matcher::vector_ideal_reg(int size) {
aoqi@0 681 assert(MaxVectorSize >= size, "");
aoqi@0 682 switch(size) {
aoqi@0 683 case 4: return Op_VecS;
aoqi@0 684 case 8: return Op_VecD;
aoqi@0 685 case 16: return Op_VecX;
aoqi@0 686 case 32: return Op_VecY;
aoqi@0 687 }
aoqi@0 688 ShouldNotReachHere();
aoqi@0 689 return 0;
aoqi@0 690 }
aoqi@0 691
aoqi@0 692 // Only lowest bits of xmm reg are used for vector shift count.
aoqi@0 693 const int Matcher::vector_shift_count_ideal_reg(int size) {
aoqi@0 694 return Op_VecS;
aoqi@0 695 }
aoqi@0 696
aoqi@0 697 // x86 supports misaligned vectors store/load.
aoqi@0 698 const bool Matcher::misaligned_vectors_ok() {
aoqi@0 699 return !AlignVector; // can be changed by flag
aoqi@0 700 }
aoqi@0 701
aoqi@0 702 // x86 AES instructions are compatible with SunJCE expanded
aoqi@0 703 // keys, hence we do not need to pass the original key to stubs
aoqi@0 704 const bool Matcher::pass_original_key_for_aes() {
aoqi@0 705 return false;
aoqi@0 706 }
aoqi@0 707
aoqi@0 708 // Helper methods for MachSpillCopyNode::implementation().
aoqi@0 709 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
aoqi@0 710 int src_hi, int dst_hi, uint ireg, outputStream* st) {
aoqi@0 711 // In 64-bit VM size calculation is very complex. Emitting instructions
aoqi@0 712 // into scratch buffer is used to get size in 64-bit VM.
aoqi@0 713 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
aoqi@0 714 assert(ireg == Op_VecS || // 32bit vector
aoqi@0 715 (src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
aoqi@0 716 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
aoqi@0 717 "no non-adjacent vector moves" );
aoqi@0 718 if (cbuf) {
aoqi@0 719 MacroAssembler _masm(cbuf);
aoqi@0 720 int offset = __ offset();
aoqi@0 721 switch (ireg) {
aoqi@0 722 case Op_VecS: // copy whole register
aoqi@0 723 case Op_VecD:
aoqi@0 724 case Op_VecX:
aoqi@0 725 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
aoqi@0 726 break;
aoqi@0 727 case Op_VecY:
aoqi@0 728 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
aoqi@0 729 break;
aoqi@0 730 default:
aoqi@0 731 ShouldNotReachHere();
aoqi@0 732 }
aoqi@0 733 int size = __ offset() - offset;
aoqi@0 734 #ifdef ASSERT
aoqi@0 735 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
aoqi@0 736 assert(!do_size || size == 4, "incorrect size calculattion");
aoqi@0 737 #endif
aoqi@0 738 return size;
aoqi@0 739 #ifndef PRODUCT
aoqi@0 740 } else if (!do_size) {
aoqi@0 741 switch (ireg) {
aoqi@0 742 case Op_VecS:
aoqi@0 743 case Op_VecD:
aoqi@0 744 case Op_VecX:
aoqi@0 745 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
aoqi@0 746 break;
aoqi@0 747 case Op_VecY:
aoqi@0 748 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
aoqi@0 749 break;
aoqi@0 750 default:
aoqi@0 751 ShouldNotReachHere();
aoqi@0 752 }
aoqi@0 753 #endif
aoqi@0 754 }
aoqi@0 755 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
aoqi@0 756 return 4;
aoqi@0 757 }
aoqi@0 758
aoqi@0 759 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
aoqi@0 760 int stack_offset, int reg, uint ireg, outputStream* st) {
aoqi@0 761 // In 64-bit VM size calculation is very complex. Emitting instructions
aoqi@0 762 // into scratch buffer is used to get size in 64-bit VM.
aoqi@0 763 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
aoqi@0 764 if (cbuf) {
aoqi@0 765 MacroAssembler _masm(cbuf);
aoqi@0 766 int offset = __ offset();
aoqi@0 767 if (is_load) {
aoqi@0 768 switch (ireg) {
aoqi@0 769 case Op_VecS:
aoqi@0 770 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
aoqi@0 771 break;
aoqi@0 772 case Op_VecD:
aoqi@0 773 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
aoqi@0 774 break;
aoqi@0 775 case Op_VecX:
aoqi@0 776 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
aoqi@0 777 break;
aoqi@0 778 case Op_VecY:
aoqi@0 779 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
aoqi@0 780 break;
aoqi@0 781 default:
aoqi@0 782 ShouldNotReachHere();
aoqi@0 783 }
aoqi@0 784 } else { // store
aoqi@0 785 switch (ireg) {
aoqi@0 786 case Op_VecS:
aoqi@0 787 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
aoqi@0 788 break;
aoqi@0 789 case Op_VecD:
aoqi@0 790 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
aoqi@0 791 break;
aoqi@0 792 case Op_VecX:
aoqi@0 793 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
aoqi@0 794 break;
aoqi@0 795 case Op_VecY:
aoqi@0 796 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
aoqi@0 797 break;
aoqi@0 798 default:
aoqi@0 799 ShouldNotReachHere();
aoqi@0 800 }
aoqi@0 801 }
aoqi@0 802 int size = __ offset() - offset;
aoqi@0 803 #ifdef ASSERT
aoqi@0 804 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
aoqi@0 805 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
aoqi@0 806 assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
aoqi@0 807 #endif
aoqi@0 808 return size;
aoqi@0 809 #ifndef PRODUCT
aoqi@0 810 } else if (!do_size) {
aoqi@0 811 if (is_load) {
aoqi@0 812 switch (ireg) {
aoqi@0 813 case Op_VecS:
aoqi@0 814 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
aoqi@0 815 break;
aoqi@0 816 case Op_VecD:
aoqi@0 817 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
aoqi@0 818 break;
aoqi@0 819 case Op_VecX:
aoqi@0 820 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
aoqi@0 821 break;
aoqi@0 822 case Op_VecY:
aoqi@0 823 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
aoqi@0 824 break;
aoqi@0 825 default:
aoqi@0 826 ShouldNotReachHere();
aoqi@0 827 }
aoqi@0 828 } else { // store
aoqi@0 829 switch (ireg) {
aoqi@0 830 case Op_VecS:
aoqi@0 831 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
aoqi@0 832 break;
aoqi@0 833 case Op_VecD:
aoqi@0 834 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
aoqi@0 835 break;
aoqi@0 836 case Op_VecX:
aoqi@0 837 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
aoqi@0 838 break;
aoqi@0 839 case Op_VecY:
aoqi@0 840 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
aoqi@0 841 break;
aoqi@0 842 default:
aoqi@0 843 ShouldNotReachHere();
aoqi@0 844 }
aoqi@0 845 }
aoqi@0 846 #endif
aoqi@0 847 }
aoqi@0 848 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
aoqi@0 849 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
aoqi@0 850 return 5+offset_size;
aoqi@0 851 }
aoqi@0 852
aoqi@0 853 static inline jfloat replicate4_imm(int con, int width) {
aoqi@0 854 // Load a constant of "width" (in bytes) and replicate it to fill 32bit.
aoqi@0 855 assert(width == 1 || width == 2, "only byte or short types here");
aoqi@0 856 int bit_width = width * 8;
aoqi@0 857 jint val = con;
aoqi@0 858 val &= (1 << bit_width) - 1; // mask off sign bits
aoqi@0 859 while(bit_width < 32) {
aoqi@0 860 val |= (val << bit_width);
aoqi@0 861 bit_width <<= 1;
aoqi@0 862 }
aoqi@0 863 jfloat fval = *((jfloat*) &val); // coerce to float type
aoqi@0 864 return fval;
aoqi@0 865 }
aoqi@0 866
aoqi@0 867 static inline jdouble replicate8_imm(int con, int width) {
aoqi@0 868 // Load a constant of "width" (in bytes) and replicate it to fill 64bit.
aoqi@0 869 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here");
aoqi@0 870 int bit_width = width * 8;
aoqi@0 871 jlong val = con;
aoqi@0 872 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits
aoqi@0 873 while(bit_width < 64) {
aoqi@0 874 val |= (val << bit_width);
aoqi@0 875 bit_width <<= 1;
aoqi@0 876 }
aoqi@0 877 jdouble dval = *((jdouble*) &val); // coerce to double type
aoqi@0 878 return dval;
aoqi@0 879 }
aoqi@0 880
aoqi@0 881 #ifndef PRODUCT
aoqi@0 882 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
aoqi@0 883 st->print("nop \t# %d bytes pad for loops and calls", _count);
aoqi@0 884 }
aoqi@0 885 #endif
aoqi@0 886
aoqi@0 887 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
aoqi@0 888 MacroAssembler _masm(&cbuf);
aoqi@0 889 __ nop(_count);
aoqi@0 890 }
aoqi@0 891
aoqi@0 892 uint MachNopNode::size(PhaseRegAlloc*) const {
aoqi@0 893 return _count;
aoqi@0 894 }
aoqi@0 895
aoqi@0 896 #ifndef PRODUCT
aoqi@0 897 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
aoqi@0 898 st->print("# breakpoint");
aoqi@0 899 }
aoqi@0 900 #endif
aoqi@0 901
aoqi@0 902 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
aoqi@0 903 MacroAssembler _masm(&cbuf);
aoqi@0 904 __ int3();
aoqi@0 905 }
aoqi@0 906
aoqi@0 907 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
aoqi@0 908 return MachNode::size(ra_);
aoqi@0 909 }
aoqi@0 910
aoqi@0 911 %}
aoqi@0 912
aoqi@0 913 encode %{
aoqi@0 914
aoqi@0 915 enc_class preserve_SP %{
aoqi@0 916 debug_only(int off0 = cbuf.insts_size());
aoqi@0 917 MacroAssembler _masm(&cbuf);
aoqi@0 918 // RBP is preserved across all calls, even compiled calls.
aoqi@0 919 // Use it to preserve RSP in places where the callee might change the SP.
aoqi@0 920 __ movptr(rbp_mh_SP_save, rsp);
aoqi@0 921 debug_only(int off1 = cbuf.insts_size());
aoqi@0 922 assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
aoqi@0 923 %}
aoqi@0 924
aoqi@0 925 enc_class restore_SP %{
aoqi@0 926 MacroAssembler _masm(&cbuf);
aoqi@0 927 __ movptr(rsp, rbp_mh_SP_save);
aoqi@0 928 %}
aoqi@0 929
aoqi@0 930 enc_class call_epilog %{
aoqi@0 931 if (VerifyStackAtCalls) {
aoqi@0 932 // Check that stack depth is unchanged: find majik cookie on stack
aoqi@0 933 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
aoqi@0 934 MacroAssembler _masm(&cbuf);
aoqi@0 935 Label L;
aoqi@0 936 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
aoqi@0 937 __ jccb(Assembler::equal, L);
aoqi@0 938 // Die if stack mismatch
aoqi@0 939 __ int3();
aoqi@0 940 __ bind(L);
aoqi@0 941 }
aoqi@0 942 %}
aoqi@0 943
aoqi@0 944 %}
aoqi@0 945
aoqi@0 946
aoqi@0 947 //----------OPERANDS-----------------------------------------------------------
aoqi@0 948 // Operand definitions must precede instruction definitions for correct parsing
aoqi@0 949 // in the ADLC because operands constitute user defined types which are used in
aoqi@0 950 // instruction definitions.
aoqi@0 951
aoqi@0 952 // Vectors
aoqi@0 953 operand vecS() %{
aoqi@0 954 constraint(ALLOC_IN_RC(vectors_reg));
aoqi@0 955 match(VecS);
aoqi@0 956
aoqi@0 957 format %{ %}
aoqi@0 958 interface(REG_INTER);
aoqi@0 959 %}
aoqi@0 960
aoqi@0 961 operand vecD() %{
aoqi@0 962 constraint(ALLOC_IN_RC(vectord_reg));
aoqi@0 963 match(VecD);
aoqi@0 964
aoqi@0 965 format %{ %}
aoqi@0 966 interface(REG_INTER);
aoqi@0 967 %}
aoqi@0 968
aoqi@0 969 operand vecX() %{
aoqi@0 970 constraint(ALLOC_IN_RC(vectorx_reg));
aoqi@0 971 match(VecX);
aoqi@0 972
aoqi@0 973 format %{ %}
aoqi@0 974 interface(REG_INTER);
aoqi@0 975 %}
aoqi@0 976
aoqi@0 977 operand vecY() %{
aoqi@0 978 constraint(ALLOC_IN_RC(vectory_reg));
aoqi@0 979 match(VecY);
aoqi@0 980
aoqi@0 981 format %{ %}
aoqi@0 982 interface(REG_INTER);
aoqi@0 983 %}
aoqi@0 984
aoqi@0 985
aoqi@0 986 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
aoqi@0 987
aoqi@0 988 // ============================================================================
aoqi@0 989
aoqi@0 990 instruct ShouldNotReachHere() %{
aoqi@0 991 match(Halt);
aoqi@0 992 format %{ "int3\t# ShouldNotReachHere" %}
aoqi@0 993 ins_encode %{
aoqi@0 994 __ int3();
aoqi@0 995 %}
aoqi@0 996 ins_pipe(pipe_slow);
aoqi@0 997 %}
aoqi@0 998
aoqi@0 999 // ============================================================================
aoqi@0 1000
aoqi@0 1001 instruct addF_reg(regF dst, regF src) %{
aoqi@0 1002 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1003 match(Set dst (AddF dst src));
aoqi@0 1004
aoqi@0 1005 format %{ "addss $dst, $src" %}
aoqi@0 1006 ins_cost(150);
aoqi@0 1007 ins_encode %{
aoqi@0 1008 __ addss($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 1009 %}
aoqi@0 1010 ins_pipe(pipe_slow);
aoqi@0 1011 %}
aoqi@0 1012
aoqi@0 1013 instruct addF_mem(regF dst, memory src) %{
aoqi@0 1014 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1015 match(Set dst (AddF dst (LoadF src)));
aoqi@0 1016
aoqi@0 1017 format %{ "addss $dst, $src" %}
aoqi@0 1018 ins_cost(150);
aoqi@0 1019 ins_encode %{
aoqi@0 1020 __ addss($dst$$XMMRegister, $src$$Address);
aoqi@0 1021 %}
aoqi@0 1022 ins_pipe(pipe_slow);
aoqi@0 1023 %}
aoqi@0 1024
aoqi@0 1025 instruct addF_imm(regF dst, immF con) %{
aoqi@0 1026 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1027 match(Set dst (AddF dst con));
aoqi@0 1028 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
aoqi@0 1029 ins_cost(150);
aoqi@0 1030 ins_encode %{
aoqi@0 1031 __ addss($dst$$XMMRegister, $constantaddress($con));
aoqi@0 1032 %}
aoqi@0 1033 ins_pipe(pipe_slow);
aoqi@0 1034 %}
aoqi@0 1035
aoqi@0 1036 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
aoqi@0 1037 predicate(UseAVX > 0);
aoqi@0 1038 match(Set dst (AddF src1 src2));
aoqi@0 1039
aoqi@0 1040 format %{ "vaddss $dst, $src1, $src2" %}
aoqi@0 1041 ins_cost(150);
aoqi@0 1042 ins_encode %{
aoqi@0 1043 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
aoqi@0 1044 %}
aoqi@0 1045 ins_pipe(pipe_slow);
aoqi@0 1046 %}
aoqi@0 1047
aoqi@0 1048 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
aoqi@0 1049 predicate(UseAVX > 0);
aoqi@0 1050 match(Set dst (AddF src1 (LoadF src2)));
aoqi@0 1051
aoqi@0 1052 format %{ "vaddss $dst, $src1, $src2" %}
aoqi@0 1053 ins_cost(150);
aoqi@0 1054 ins_encode %{
aoqi@0 1055 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
aoqi@0 1056 %}
aoqi@0 1057 ins_pipe(pipe_slow);
aoqi@0 1058 %}
aoqi@0 1059
aoqi@0 1060 instruct addF_reg_imm(regF dst, regF src, immF con) %{
aoqi@0 1061 predicate(UseAVX > 0);
aoqi@0 1062 match(Set dst (AddF src con));
aoqi@0 1063
aoqi@0 1064 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
aoqi@0 1065 ins_cost(150);
aoqi@0 1066 ins_encode %{
aoqi@0 1067 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
aoqi@0 1068 %}
aoqi@0 1069 ins_pipe(pipe_slow);
aoqi@0 1070 %}
aoqi@0 1071
aoqi@0 1072 instruct addD_reg(regD dst, regD src) %{
aoqi@0 1073 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1074 match(Set dst (AddD dst src));
aoqi@0 1075
aoqi@0 1076 format %{ "addsd $dst, $src" %}
aoqi@0 1077 ins_cost(150);
aoqi@0 1078 ins_encode %{
aoqi@0 1079 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 1080 %}
aoqi@0 1081 ins_pipe(pipe_slow);
aoqi@0 1082 %}
aoqi@0 1083
aoqi@0 1084 instruct addD_mem(regD dst, memory src) %{
aoqi@0 1085 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1086 match(Set dst (AddD dst (LoadD src)));
aoqi@0 1087
aoqi@0 1088 format %{ "addsd $dst, $src" %}
aoqi@0 1089 ins_cost(150);
aoqi@0 1090 ins_encode %{
aoqi@0 1091 __ addsd($dst$$XMMRegister, $src$$Address);
aoqi@0 1092 %}
aoqi@0 1093 ins_pipe(pipe_slow);
aoqi@0 1094 %}
aoqi@0 1095
aoqi@0 1096 instruct addD_imm(regD dst, immD con) %{
aoqi@0 1097 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1098 match(Set dst (AddD dst con));
aoqi@0 1099 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
aoqi@0 1100 ins_cost(150);
aoqi@0 1101 ins_encode %{
aoqi@0 1102 __ addsd($dst$$XMMRegister, $constantaddress($con));
aoqi@0 1103 %}
aoqi@0 1104 ins_pipe(pipe_slow);
aoqi@0 1105 %}
aoqi@0 1106
aoqi@0 1107 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
aoqi@0 1108 predicate(UseAVX > 0);
aoqi@0 1109 match(Set dst (AddD src1 src2));
aoqi@0 1110
aoqi@0 1111 format %{ "vaddsd $dst, $src1, $src2" %}
aoqi@0 1112 ins_cost(150);
aoqi@0 1113 ins_encode %{
aoqi@0 1114 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
aoqi@0 1115 %}
aoqi@0 1116 ins_pipe(pipe_slow);
aoqi@0 1117 %}
aoqi@0 1118
aoqi@0 1119 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
aoqi@0 1120 predicate(UseAVX > 0);
aoqi@0 1121 match(Set dst (AddD src1 (LoadD src2)));
aoqi@0 1122
aoqi@0 1123 format %{ "vaddsd $dst, $src1, $src2" %}
aoqi@0 1124 ins_cost(150);
aoqi@0 1125 ins_encode %{
aoqi@0 1126 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
aoqi@0 1127 %}
aoqi@0 1128 ins_pipe(pipe_slow);
aoqi@0 1129 %}
aoqi@0 1130
aoqi@0 1131 instruct addD_reg_imm(regD dst, regD src, immD con) %{
aoqi@0 1132 predicate(UseAVX > 0);
aoqi@0 1133 match(Set dst (AddD src con));
aoqi@0 1134
aoqi@0 1135 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
aoqi@0 1136 ins_cost(150);
aoqi@0 1137 ins_encode %{
aoqi@0 1138 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
aoqi@0 1139 %}
aoqi@0 1140 ins_pipe(pipe_slow);
aoqi@0 1141 %}
aoqi@0 1142
aoqi@0 1143 instruct subF_reg(regF dst, regF src) %{
aoqi@0 1144 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1145 match(Set dst (SubF dst src));
aoqi@0 1146
aoqi@0 1147 format %{ "subss $dst, $src" %}
aoqi@0 1148 ins_cost(150);
aoqi@0 1149 ins_encode %{
aoqi@0 1150 __ subss($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 1151 %}
aoqi@0 1152 ins_pipe(pipe_slow);
aoqi@0 1153 %}
aoqi@0 1154
aoqi@0 1155 instruct subF_mem(regF dst, memory src) %{
aoqi@0 1156 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1157 match(Set dst (SubF dst (LoadF src)));
aoqi@0 1158
aoqi@0 1159 format %{ "subss $dst, $src" %}
aoqi@0 1160 ins_cost(150);
aoqi@0 1161 ins_encode %{
aoqi@0 1162 __ subss($dst$$XMMRegister, $src$$Address);
aoqi@0 1163 %}
aoqi@0 1164 ins_pipe(pipe_slow);
aoqi@0 1165 %}
aoqi@0 1166
aoqi@0 1167 instruct subF_imm(regF dst, immF con) %{
aoqi@0 1168 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1169 match(Set dst (SubF dst con));
aoqi@0 1170 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
aoqi@0 1171 ins_cost(150);
aoqi@0 1172 ins_encode %{
aoqi@0 1173 __ subss($dst$$XMMRegister, $constantaddress($con));
aoqi@0 1174 %}
aoqi@0 1175 ins_pipe(pipe_slow);
aoqi@0 1176 %}
aoqi@0 1177
aoqi@0 1178 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
aoqi@0 1179 predicate(UseAVX > 0);
aoqi@0 1180 match(Set dst (SubF src1 src2));
aoqi@0 1181
aoqi@0 1182 format %{ "vsubss $dst, $src1, $src2" %}
aoqi@0 1183 ins_cost(150);
aoqi@0 1184 ins_encode %{
aoqi@0 1185 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
aoqi@0 1186 %}
aoqi@0 1187 ins_pipe(pipe_slow);
aoqi@0 1188 %}
aoqi@0 1189
aoqi@0 1190 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
aoqi@0 1191 predicate(UseAVX > 0);
aoqi@0 1192 match(Set dst (SubF src1 (LoadF src2)));
aoqi@0 1193
aoqi@0 1194 format %{ "vsubss $dst, $src1, $src2" %}
aoqi@0 1195 ins_cost(150);
aoqi@0 1196 ins_encode %{
aoqi@0 1197 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
aoqi@0 1198 %}
aoqi@0 1199 ins_pipe(pipe_slow);
aoqi@0 1200 %}
aoqi@0 1201
aoqi@0 1202 instruct subF_reg_imm(regF dst, regF src, immF con) %{
aoqi@0 1203 predicate(UseAVX > 0);
aoqi@0 1204 match(Set dst (SubF src con));
aoqi@0 1205
aoqi@0 1206 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
aoqi@0 1207 ins_cost(150);
aoqi@0 1208 ins_encode %{
aoqi@0 1209 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
aoqi@0 1210 %}
aoqi@0 1211 ins_pipe(pipe_slow);
aoqi@0 1212 %}
aoqi@0 1213
aoqi@0 1214 instruct subD_reg(regD dst, regD src) %{
aoqi@0 1215 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1216 match(Set dst (SubD dst src));
aoqi@0 1217
aoqi@0 1218 format %{ "subsd $dst, $src" %}
aoqi@0 1219 ins_cost(150);
aoqi@0 1220 ins_encode %{
aoqi@0 1221 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 1222 %}
aoqi@0 1223 ins_pipe(pipe_slow);
aoqi@0 1224 %}
aoqi@0 1225
aoqi@0 1226 instruct subD_mem(regD dst, memory src) %{
aoqi@0 1227 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1228 match(Set dst (SubD dst (LoadD src)));
aoqi@0 1229
aoqi@0 1230 format %{ "subsd $dst, $src" %}
aoqi@0 1231 ins_cost(150);
aoqi@0 1232 ins_encode %{
aoqi@0 1233 __ subsd($dst$$XMMRegister, $src$$Address);
aoqi@0 1234 %}
aoqi@0 1235 ins_pipe(pipe_slow);
aoqi@0 1236 %}
aoqi@0 1237
aoqi@0 1238 instruct subD_imm(regD dst, immD con) %{
aoqi@0 1239 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1240 match(Set dst (SubD dst con));
aoqi@0 1241 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
aoqi@0 1242 ins_cost(150);
aoqi@0 1243 ins_encode %{
aoqi@0 1244 __ subsd($dst$$XMMRegister, $constantaddress($con));
aoqi@0 1245 %}
aoqi@0 1246 ins_pipe(pipe_slow);
aoqi@0 1247 %}
aoqi@0 1248
aoqi@0 1249 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
aoqi@0 1250 predicate(UseAVX > 0);
aoqi@0 1251 match(Set dst (SubD src1 src2));
aoqi@0 1252
aoqi@0 1253 format %{ "vsubsd $dst, $src1, $src2" %}
aoqi@0 1254 ins_cost(150);
aoqi@0 1255 ins_encode %{
aoqi@0 1256 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
aoqi@0 1257 %}
aoqi@0 1258 ins_pipe(pipe_slow);
aoqi@0 1259 %}
aoqi@0 1260
aoqi@0 1261 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
aoqi@0 1262 predicate(UseAVX > 0);
aoqi@0 1263 match(Set dst (SubD src1 (LoadD src2)));
aoqi@0 1264
aoqi@0 1265 format %{ "vsubsd $dst, $src1, $src2" %}
aoqi@0 1266 ins_cost(150);
aoqi@0 1267 ins_encode %{
aoqi@0 1268 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
aoqi@0 1269 %}
aoqi@0 1270 ins_pipe(pipe_slow);
aoqi@0 1271 %}
aoqi@0 1272
aoqi@0 1273 instruct subD_reg_imm(regD dst, regD src, immD con) %{
aoqi@0 1274 predicate(UseAVX > 0);
aoqi@0 1275 match(Set dst (SubD src con));
aoqi@0 1276
aoqi@0 1277 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
aoqi@0 1278 ins_cost(150);
aoqi@0 1279 ins_encode %{
aoqi@0 1280 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
aoqi@0 1281 %}
aoqi@0 1282 ins_pipe(pipe_slow);
aoqi@0 1283 %}
aoqi@0 1284
aoqi@0 1285 instruct mulF_reg(regF dst, regF src) %{
aoqi@0 1286 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1287 match(Set dst (MulF dst src));
aoqi@0 1288
aoqi@0 1289 format %{ "mulss $dst, $src" %}
aoqi@0 1290 ins_cost(150);
aoqi@0 1291 ins_encode %{
aoqi@0 1292 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 1293 %}
aoqi@0 1294 ins_pipe(pipe_slow);
aoqi@0 1295 %}
aoqi@0 1296
aoqi@0 1297 instruct mulF_mem(regF dst, memory src) %{
aoqi@0 1298 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1299 match(Set dst (MulF dst (LoadF src)));
aoqi@0 1300
aoqi@0 1301 format %{ "mulss $dst, $src" %}
aoqi@0 1302 ins_cost(150);
aoqi@0 1303 ins_encode %{
aoqi@0 1304 __ mulss($dst$$XMMRegister, $src$$Address);
aoqi@0 1305 %}
aoqi@0 1306 ins_pipe(pipe_slow);
aoqi@0 1307 %}
aoqi@0 1308
aoqi@0 1309 instruct mulF_imm(regF dst, immF con) %{
aoqi@0 1310 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1311 match(Set dst (MulF dst con));
aoqi@0 1312 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
aoqi@0 1313 ins_cost(150);
aoqi@0 1314 ins_encode %{
aoqi@0 1315 __ mulss($dst$$XMMRegister, $constantaddress($con));
aoqi@0 1316 %}
aoqi@0 1317 ins_pipe(pipe_slow);
aoqi@0 1318 %}
aoqi@0 1319
aoqi@0 1320 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
aoqi@0 1321 predicate(UseAVX > 0);
aoqi@0 1322 match(Set dst (MulF src1 src2));
aoqi@0 1323
aoqi@0 1324 format %{ "vmulss $dst, $src1, $src2" %}
aoqi@0 1325 ins_cost(150);
aoqi@0 1326 ins_encode %{
aoqi@0 1327 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
aoqi@0 1328 %}
aoqi@0 1329 ins_pipe(pipe_slow);
aoqi@0 1330 %}
aoqi@0 1331
aoqi@0 1332 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
aoqi@0 1333 predicate(UseAVX > 0);
aoqi@0 1334 match(Set dst (MulF src1 (LoadF src2)));
aoqi@0 1335
aoqi@0 1336 format %{ "vmulss $dst, $src1, $src2" %}
aoqi@0 1337 ins_cost(150);
aoqi@0 1338 ins_encode %{
aoqi@0 1339 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
aoqi@0 1340 %}
aoqi@0 1341 ins_pipe(pipe_slow);
aoqi@0 1342 %}
aoqi@0 1343
aoqi@0 1344 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
aoqi@0 1345 predicate(UseAVX > 0);
aoqi@0 1346 match(Set dst (MulF src con));
aoqi@0 1347
aoqi@0 1348 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
aoqi@0 1349 ins_cost(150);
aoqi@0 1350 ins_encode %{
aoqi@0 1351 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
aoqi@0 1352 %}
aoqi@0 1353 ins_pipe(pipe_slow);
aoqi@0 1354 %}
aoqi@0 1355
aoqi@0 1356 instruct mulD_reg(regD dst, regD src) %{
aoqi@0 1357 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1358 match(Set dst (MulD dst src));
aoqi@0 1359
aoqi@0 1360 format %{ "mulsd $dst, $src" %}
aoqi@0 1361 ins_cost(150);
aoqi@0 1362 ins_encode %{
aoqi@0 1363 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 1364 %}
aoqi@0 1365 ins_pipe(pipe_slow);
aoqi@0 1366 %}
aoqi@0 1367
aoqi@0 1368 instruct mulD_mem(regD dst, memory src) %{
aoqi@0 1369 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1370 match(Set dst (MulD dst (LoadD src)));
aoqi@0 1371
aoqi@0 1372 format %{ "mulsd $dst, $src" %}
aoqi@0 1373 ins_cost(150);
aoqi@0 1374 ins_encode %{
aoqi@0 1375 __ mulsd($dst$$XMMRegister, $src$$Address);
aoqi@0 1376 %}
aoqi@0 1377 ins_pipe(pipe_slow);
aoqi@0 1378 %}
aoqi@0 1379
aoqi@0 1380 instruct mulD_imm(regD dst, immD con) %{
aoqi@0 1381 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1382 match(Set dst (MulD dst con));
aoqi@0 1383 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
aoqi@0 1384 ins_cost(150);
aoqi@0 1385 ins_encode %{
aoqi@0 1386 __ mulsd($dst$$XMMRegister, $constantaddress($con));
aoqi@0 1387 %}
aoqi@0 1388 ins_pipe(pipe_slow);
aoqi@0 1389 %}
aoqi@0 1390
aoqi@0 1391 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
aoqi@0 1392 predicate(UseAVX > 0);
aoqi@0 1393 match(Set dst (MulD src1 src2));
aoqi@0 1394
aoqi@0 1395 format %{ "vmulsd $dst, $src1, $src2" %}
aoqi@0 1396 ins_cost(150);
aoqi@0 1397 ins_encode %{
aoqi@0 1398 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
aoqi@0 1399 %}
aoqi@0 1400 ins_pipe(pipe_slow);
aoqi@0 1401 %}
aoqi@0 1402
aoqi@0 1403 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
aoqi@0 1404 predicate(UseAVX > 0);
aoqi@0 1405 match(Set dst (MulD src1 (LoadD src2)));
aoqi@0 1406
aoqi@0 1407 format %{ "vmulsd $dst, $src1, $src2" %}
aoqi@0 1408 ins_cost(150);
aoqi@0 1409 ins_encode %{
aoqi@0 1410 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
aoqi@0 1411 %}
aoqi@0 1412 ins_pipe(pipe_slow);
aoqi@0 1413 %}
aoqi@0 1414
aoqi@0 1415 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
aoqi@0 1416 predicate(UseAVX > 0);
aoqi@0 1417 match(Set dst (MulD src con));
aoqi@0 1418
aoqi@0 1419 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
aoqi@0 1420 ins_cost(150);
aoqi@0 1421 ins_encode %{
aoqi@0 1422 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
aoqi@0 1423 %}
aoqi@0 1424 ins_pipe(pipe_slow);
aoqi@0 1425 %}
aoqi@0 1426
aoqi@0 1427 instruct divF_reg(regF dst, regF src) %{
aoqi@0 1428 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1429 match(Set dst (DivF dst src));
aoqi@0 1430
aoqi@0 1431 format %{ "divss $dst, $src" %}
aoqi@0 1432 ins_cost(150);
aoqi@0 1433 ins_encode %{
aoqi@0 1434 __ divss($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 1435 %}
aoqi@0 1436 ins_pipe(pipe_slow);
aoqi@0 1437 %}
aoqi@0 1438
aoqi@0 1439 instruct divF_mem(regF dst, memory src) %{
aoqi@0 1440 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1441 match(Set dst (DivF dst (LoadF src)));
aoqi@0 1442
aoqi@0 1443 format %{ "divss $dst, $src" %}
aoqi@0 1444 ins_cost(150);
aoqi@0 1445 ins_encode %{
aoqi@0 1446 __ divss($dst$$XMMRegister, $src$$Address);
aoqi@0 1447 %}
aoqi@0 1448 ins_pipe(pipe_slow);
aoqi@0 1449 %}
aoqi@0 1450
aoqi@0 1451 instruct divF_imm(regF dst, immF con) %{
aoqi@0 1452 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1453 match(Set dst (DivF dst con));
aoqi@0 1454 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
aoqi@0 1455 ins_cost(150);
aoqi@0 1456 ins_encode %{
aoqi@0 1457 __ divss($dst$$XMMRegister, $constantaddress($con));
aoqi@0 1458 %}
aoqi@0 1459 ins_pipe(pipe_slow);
aoqi@0 1460 %}
aoqi@0 1461
aoqi@0 1462 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
aoqi@0 1463 predicate(UseAVX > 0);
aoqi@0 1464 match(Set dst (DivF src1 src2));
aoqi@0 1465
aoqi@0 1466 format %{ "vdivss $dst, $src1, $src2" %}
aoqi@0 1467 ins_cost(150);
aoqi@0 1468 ins_encode %{
aoqi@0 1469 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
aoqi@0 1470 %}
aoqi@0 1471 ins_pipe(pipe_slow);
aoqi@0 1472 %}
aoqi@0 1473
aoqi@0 1474 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
aoqi@0 1475 predicate(UseAVX > 0);
aoqi@0 1476 match(Set dst (DivF src1 (LoadF src2)));
aoqi@0 1477
aoqi@0 1478 format %{ "vdivss $dst, $src1, $src2" %}
aoqi@0 1479 ins_cost(150);
aoqi@0 1480 ins_encode %{
aoqi@0 1481 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
aoqi@0 1482 %}
aoqi@0 1483 ins_pipe(pipe_slow);
aoqi@0 1484 %}
aoqi@0 1485
aoqi@0 1486 instruct divF_reg_imm(regF dst, regF src, immF con) %{
aoqi@0 1487 predicate(UseAVX > 0);
aoqi@0 1488 match(Set dst (DivF src con));
aoqi@0 1489
aoqi@0 1490 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
aoqi@0 1491 ins_cost(150);
aoqi@0 1492 ins_encode %{
aoqi@0 1493 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
aoqi@0 1494 %}
aoqi@0 1495 ins_pipe(pipe_slow);
aoqi@0 1496 %}
aoqi@0 1497
aoqi@0 1498 instruct divD_reg(regD dst, regD src) %{
aoqi@0 1499 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1500 match(Set dst (DivD dst src));
aoqi@0 1501
aoqi@0 1502 format %{ "divsd $dst, $src" %}
aoqi@0 1503 ins_cost(150);
aoqi@0 1504 ins_encode %{
aoqi@0 1505 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 1506 %}
aoqi@0 1507 ins_pipe(pipe_slow);
aoqi@0 1508 %}
aoqi@0 1509
aoqi@0 1510 instruct divD_mem(regD dst, memory src) %{
aoqi@0 1511 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1512 match(Set dst (DivD dst (LoadD src)));
aoqi@0 1513
aoqi@0 1514 format %{ "divsd $dst, $src" %}
aoqi@0 1515 ins_cost(150);
aoqi@0 1516 ins_encode %{
aoqi@0 1517 __ divsd($dst$$XMMRegister, $src$$Address);
aoqi@0 1518 %}
aoqi@0 1519 ins_pipe(pipe_slow);
aoqi@0 1520 %}
aoqi@0 1521
aoqi@0 1522 instruct divD_imm(regD dst, immD con) %{
aoqi@0 1523 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1524 match(Set dst (DivD dst con));
aoqi@0 1525 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
aoqi@0 1526 ins_cost(150);
aoqi@0 1527 ins_encode %{
aoqi@0 1528 __ divsd($dst$$XMMRegister, $constantaddress($con));
aoqi@0 1529 %}
aoqi@0 1530 ins_pipe(pipe_slow);
aoqi@0 1531 %}
aoqi@0 1532
aoqi@0 1533 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
aoqi@0 1534 predicate(UseAVX > 0);
aoqi@0 1535 match(Set dst (DivD src1 src2));
aoqi@0 1536
aoqi@0 1537 format %{ "vdivsd $dst, $src1, $src2" %}
aoqi@0 1538 ins_cost(150);
aoqi@0 1539 ins_encode %{
aoqi@0 1540 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
aoqi@0 1541 %}
aoqi@0 1542 ins_pipe(pipe_slow);
aoqi@0 1543 %}
aoqi@0 1544
aoqi@0 1545 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
aoqi@0 1546 predicate(UseAVX > 0);
aoqi@0 1547 match(Set dst (DivD src1 (LoadD src2)));
aoqi@0 1548
aoqi@0 1549 format %{ "vdivsd $dst, $src1, $src2" %}
aoqi@0 1550 ins_cost(150);
aoqi@0 1551 ins_encode %{
aoqi@0 1552 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
aoqi@0 1553 %}
aoqi@0 1554 ins_pipe(pipe_slow);
aoqi@0 1555 %}
aoqi@0 1556
aoqi@0 1557 instruct divD_reg_imm(regD dst, regD src, immD con) %{
aoqi@0 1558 predicate(UseAVX > 0);
aoqi@0 1559 match(Set dst (DivD src con));
aoqi@0 1560
aoqi@0 1561 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
aoqi@0 1562 ins_cost(150);
aoqi@0 1563 ins_encode %{
aoqi@0 1564 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
aoqi@0 1565 %}
aoqi@0 1566 ins_pipe(pipe_slow);
aoqi@0 1567 %}
aoqi@0 1568
aoqi@0 1569 instruct absF_reg(regF dst) %{
aoqi@0 1570 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1571 match(Set dst (AbsF dst));
aoqi@0 1572 ins_cost(150);
aoqi@0 1573 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
aoqi@0 1574 ins_encode %{
aoqi@0 1575 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
aoqi@0 1576 %}
aoqi@0 1577 ins_pipe(pipe_slow);
aoqi@0 1578 %}
aoqi@0 1579
aoqi@0 1580 instruct absF_reg_reg(regF dst, regF src) %{
aoqi@0 1581 predicate(UseAVX > 0);
aoqi@0 1582 match(Set dst (AbsF src));
aoqi@0 1583 ins_cost(150);
aoqi@0 1584 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
aoqi@0 1585 ins_encode %{
aoqi@0 1586 bool vector256 = false;
aoqi@0 1587 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
aoqi@0 1588 ExternalAddress(float_signmask()), vector256);
aoqi@0 1589 %}
aoqi@0 1590 ins_pipe(pipe_slow);
aoqi@0 1591 %}
aoqi@0 1592
aoqi@0 1593 instruct absD_reg(regD dst) %{
aoqi@0 1594 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1595 match(Set dst (AbsD dst));
aoqi@0 1596 ins_cost(150);
aoqi@0 1597 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
aoqi@0 1598 "# abs double by sign masking" %}
aoqi@0 1599 ins_encode %{
aoqi@0 1600 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
aoqi@0 1601 %}
aoqi@0 1602 ins_pipe(pipe_slow);
aoqi@0 1603 %}
aoqi@0 1604
aoqi@0 1605 instruct absD_reg_reg(regD dst, regD src) %{
aoqi@0 1606 predicate(UseAVX > 0);
aoqi@0 1607 match(Set dst (AbsD src));
aoqi@0 1608 ins_cost(150);
aoqi@0 1609 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
aoqi@0 1610 "# abs double by sign masking" %}
aoqi@0 1611 ins_encode %{
aoqi@0 1612 bool vector256 = false;
aoqi@0 1613 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
aoqi@0 1614 ExternalAddress(double_signmask()), vector256);
aoqi@0 1615 %}
aoqi@0 1616 ins_pipe(pipe_slow);
aoqi@0 1617 %}
aoqi@0 1618
aoqi@0 1619 instruct negF_reg(regF dst) %{
aoqi@0 1620 predicate((UseSSE>=1) && (UseAVX == 0));
aoqi@0 1621 match(Set dst (NegF dst));
aoqi@0 1622 ins_cost(150);
aoqi@0 1623 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
aoqi@0 1624 ins_encode %{
aoqi@0 1625 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
aoqi@0 1626 %}
aoqi@0 1627 ins_pipe(pipe_slow);
aoqi@0 1628 %}
aoqi@0 1629
aoqi@0 1630 instruct negF_reg_reg(regF dst, regF src) %{
aoqi@0 1631 predicate(UseAVX > 0);
aoqi@0 1632 match(Set dst (NegF src));
aoqi@0 1633 ins_cost(150);
aoqi@0 1634 format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
aoqi@0 1635 ins_encode %{
aoqi@0 1636 bool vector256 = false;
aoqi@0 1637 __ vxorps($dst$$XMMRegister, $src$$XMMRegister,
aoqi@0 1638 ExternalAddress(float_signflip()), vector256);
aoqi@0 1639 %}
aoqi@0 1640 ins_pipe(pipe_slow);
aoqi@0 1641 %}
aoqi@0 1642
aoqi@0 1643 instruct negD_reg(regD dst) %{
aoqi@0 1644 predicate((UseSSE>=2) && (UseAVX == 0));
aoqi@0 1645 match(Set dst (NegD dst));
aoqi@0 1646 ins_cost(150);
aoqi@0 1647 format %{ "xorpd $dst, [0x8000000000000000]\t"
aoqi@0 1648 "# neg double by sign flipping" %}
aoqi@0 1649 ins_encode %{
aoqi@0 1650 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
aoqi@0 1651 %}
aoqi@0 1652 ins_pipe(pipe_slow);
aoqi@0 1653 %}
aoqi@0 1654
aoqi@0 1655 instruct negD_reg_reg(regD dst, regD src) %{
aoqi@0 1656 predicate(UseAVX > 0);
aoqi@0 1657 match(Set dst (NegD src));
aoqi@0 1658 ins_cost(150);
aoqi@0 1659 format %{ "vxorpd $dst, $src, [0x8000000000000000]\t"
aoqi@0 1660 "# neg double by sign flipping" %}
aoqi@0 1661 ins_encode %{
aoqi@0 1662 bool vector256 = false;
aoqi@0 1663 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister,
aoqi@0 1664 ExternalAddress(double_signflip()), vector256);
aoqi@0 1665 %}
aoqi@0 1666 ins_pipe(pipe_slow);
aoqi@0 1667 %}
aoqi@0 1668
aoqi@0 1669 instruct sqrtF_reg(regF dst, regF src) %{
aoqi@0 1670 predicate(UseSSE>=1);
aoqi@0 1671 match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
aoqi@0 1672
aoqi@0 1673 format %{ "sqrtss $dst, $src" %}
aoqi@0 1674 ins_cost(150);
aoqi@0 1675 ins_encode %{
aoqi@0 1676 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 1677 %}
aoqi@0 1678 ins_pipe(pipe_slow);
aoqi@0 1679 %}
aoqi@0 1680
aoqi@0 1681 instruct sqrtF_mem(regF dst, memory src) %{
aoqi@0 1682 predicate(UseSSE>=1);
aoqi@0 1683 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
aoqi@0 1684
aoqi@0 1685 format %{ "sqrtss $dst, $src" %}
aoqi@0 1686 ins_cost(150);
aoqi@0 1687 ins_encode %{
aoqi@0 1688 __ sqrtss($dst$$XMMRegister, $src$$Address);
aoqi@0 1689 %}
aoqi@0 1690 ins_pipe(pipe_slow);
aoqi@0 1691 %}
aoqi@0 1692
aoqi@0 1693 instruct sqrtF_imm(regF dst, immF con) %{
aoqi@0 1694 predicate(UseSSE>=1);
aoqi@0 1695 match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
aoqi@0 1696 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
aoqi@0 1697 ins_cost(150);
aoqi@0 1698 ins_encode %{
aoqi@0 1699 __ sqrtss($dst$$XMMRegister, $constantaddress($con));
aoqi@0 1700 %}
aoqi@0 1701 ins_pipe(pipe_slow);
aoqi@0 1702 %}
aoqi@0 1703
aoqi@0 1704 instruct sqrtD_reg(regD dst, regD src) %{
aoqi@0 1705 predicate(UseSSE>=2);
aoqi@0 1706 match(Set dst (SqrtD src));
aoqi@0 1707
aoqi@0 1708 format %{ "sqrtsd $dst, $src" %}
aoqi@0 1709 ins_cost(150);
aoqi@0 1710 ins_encode %{
aoqi@0 1711 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 1712 %}
aoqi@0 1713 ins_pipe(pipe_slow);
aoqi@0 1714 %}
aoqi@0 1715
aoqi@0 1716 instruct sqrtD_mem(regD dst, memory src) %{
aoqi@0 1717 predicate(UseSSE>=2);
aoqi@0 1718 match(Set dst (SqrtD (LoadD src)));
aoqi@0 1719
aoqi@0 1720 format %{ "sqrtsd $dst, $src" %}
aoqi@0 1721 ins_cost(150);
aoqi@0 1722 ins_encode %{
aoqi@0 1723 __ sqrtsd($dst$$XMMRegister, $src$$Address);
aoqi@0 1724 %}
aoqi@0 1725 ins_pipe(pipe_slow);
aoqi@0 1726 %}
aoqi@0 1727
aoqi@0 1728 instruct sqrtD_imm(regD dst, immD con) %{
aoqi@0 1729 predicate(UseSSE>=2);
aoqi@0 1730 match(Set dst (SqrtD con));
aoqi@0 1731 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
aoqi@0 1732 ins_cost(150);
aoqi@0 1733 ins_encode %{
aoqi@0 1734 __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
aoqi@0 1735 %}
aoqi@0 1736 ins_pipe(pipe_slow);
aoqi@0 1737 %}
aoqi@0 1738
aoqi@0 1739
aoqi@0 1740 // ====================VECTOR INSTRUCTIONS=====================================
aoqi@0 1741
aoqi@0 1742 // Load vectors (4 bytes long)
aoqi@0 1743 instruct loadV4(vecS dst, memory mem) %{
aoqi@0 1744 predicate(n->as_LoadVector()->memory_size() == 4);
aoqi@0 1745 match(Set dst (LoadVector mem));
aoqi@0 1746 ins_cost(125);
aoqi@0 1747 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %}
aoqi@0 1748 ins_encode %{
aoqi@0 1749 __ movdl($dst$$XMMRegister, $mem$$Address);
aoqi@0 1750 %}
aoqi@0 1751 ins_pipe( pipe_slow );
aoqi@0 1752 %}
aoqi@0 1753
aoqi@0 1754 // Load vectors (8 bytes long)
aoqi@0 1755 instruct loadV8(vecD dst, memory mem) %{
aoqi@0 1756 predicate(n->as_LoadVector()->memory_size() == 8);
aoqi@0 1757 match(Set dst (LoadVector mem));
aoqi@0 1758 ins_cost(125);
aoqi@0 1759 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %}
aoqi@0 1760 ins_encode %{
aoqi@0 1761 __ movq($dst$$XMMRegister, $mem$$Address);
aoqi@0 1762 %}
aoqi@0 1763 ins_pipe( pipe_slow );
aoqi@0 1764 %}
aoqi@0 1765
aoqi@0 1766 // Load vectors (16 bytes long)
aoqi@0 1767 instruct loadV16(vecX dst, memory mem) %{
aoqi@0 1768 predicate(n->as_LoadVector()->memory_size() == 16);
aoqi@0 1769 match(Set dst (LoadVector mem));
aoqi@0 1770 ins_cost(125);
aoqi@0 1771 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %}
aoqi@0 1772 ins_encode %{
aoqi@0 1773 __ movdqu($dst$$XMMRegister, $mem$$Address);
aoqi@0 1774 %}
aoqi@0 1775 ins_pipe( pipe_slow );
aoqi@0 1776 %}
aoqi@0 1777
aoqi@0 1778 // Load vectors (32 bytes long)
aoqi@0 1779 instruct loadV32(vecY dst, memory mem) %{
aoqi@0 1780 predicate(n->as_LoadVector()->memory_size() == 32);
aoqi@0 1781 match(Set dst (LoadVector mem));
aoqi@0 1782 ins_cost(125);
aoqi@0 1783 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %}
aoqi@0 1784 ins_encode %{
aoqi@0 1785 __ vmovdqu($dst$$XMMRegister, $mem$$Address);
aoqi@0 1786 %}
aoqi@0 1787 ins_pipe( pipe_slow );
aoqi@0 1788 %}
aoqi@0 1789
aoqi@0 1790 // Store vectors
aoqi@0 1791 instruct storeV4(memory mem, vecS src) %{
aoqi@0 1792 predicate(n->as_StoreVector()->memory_size() == 4);
aoqi@0 1793 match(Set mem (StoreVector mem src));
aoqi@0 1794 ins_cost(145);
aoqi@0 1795 format %{ "movd $mem,$src\t! store vector (4 bytes)" %}
aoqi@0 1796 ins_encode %{
aoqi@0 1797 __ movdl($mem$$Address, $src$$XMMRegister);
aoqi@0 1798 %}
aoqi@0 1799 ins_pipe( pipe_slow );
aoqi@0 1800 %}
aoqi@0 1801
aoqi@0 1802 instruct storeV8(memory mem, vecD src) %{
aoqi@0 1803 predicate(n->as_StoreVector()->memory_size() == 8);
aoqi@0 1804 match(Set mem (StoreVector mem src));
aoqi@0 1805 ins_cost(145);
aoqi@0 1806 format %{ "movq $mem,$src\t! store vector (8 bytes)" %}
aoqi@0 1807 ins_encode %{
aoqi@0 1808 __ movq($mem$$Address, $src$$XMMRegister);
aoqi@0 1809 %}
aoqi@0 1810 ins_pipe( pipe_slow );
aoqi@0 1811 %}
aoqi@0 1812
aoqi@0 1813 instruct storeV16(memory mem, vecX src) %{
aoqi@0 1814 predicate(n->as_StoreVector()->memory_size() == 16);
aoqi@0 1815 match(Set mem (StoreVector mem src));
aoqi@0 1816 ins_cost(145);
aoqi@0 1817 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %}
aoqi@0 1818 ins_encode %{
aoqi@0 1819 __ movdqu($mem$$Address, $src$$XMMRegister);
aoqi@0 1820 %}
aoqi@0 1821 ins_pipe( pipe_slow );
aoqi@0 1822 %}
aoqi@0 1823
aoqi@0 1824 instruct storeV32(memory mem, vecY src) %{
aoqi@0 1825 predicate(n->as_StoreVector()->memory_size() == 32);
aoqi@0 1826 match(Set mem (StoreVector mem src));
aoqi@0 1827 ins_cost(145);
aoqi@0 1828 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %}
aoqi@0 1829 ins_encode %{
aoqi@0 1830 __ vmovdqu($mem$$Address, $src$$XMMRegister);
aoqi@0 1831 %}
aoqi@0 1832 ins_pipe( pipe_slow );
aoqi@0 1833 %}
aoqi@0 1834
aoqi@0 1835 // Replicate byte scalar to be vector
aoqi@0 1836 instruct Repl4B(vecS dst, rRegI src) %{
aoqi@0 1837 predicate(n->as_Vector()->length() == 4);
aoqi@0 1838 match(Set dst (ReplicateB src));
aoqi@0 1839 format %{ "movd $dst,$src\n\t"
aoqi@0 1840 "punpcklbw $dst,$dst\n\t"
aoqi@0 1841 "pshuflw $dst,$dst,0x00\t! replicate4B" %}
aoqi@0 1842 ins_encode %{
aoqi@0 1843 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 1844 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1845 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 1846 %}
aoqi@0 1847 ins_pipe( pipe_slow );
aoqi@0 1848 %}
aoqi@0 1849
aoqi@0 1850 instruct Repl8B(vecD dst, rRegI src) %{
aoqi@0 1851 predicate(n->as_Vector()->length() == 8);
aoqi@0 1852 match(Set dst (ReplicateB src));
aoqi@0 1853 format %{ "movd $dst,$src\n\t"
aoqi@0 1854 "punpcklbw $dst,$dst\n\t"
aoqi@0 1855 "pshuflw $dst,$dst,0x00\t! replicate8B" %}
aoqi@0 1856 ins_encode %{
aoqi@0 1857 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 1858 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1859 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 1860 %}
aoqi@0 1861 ins_pipe( pipe_slow );
aoqi@0 1862 %}
aoqi@0 1863
aoqi@0 1864 instruct Repl16B(vecX dst, rRegI src) %{
aoqi@0 1865 predicate(n->as_Vector()->length() == 16);
aoqi@0 1866 match(Set dst (ReplicateB src));
aoqi@0 1867 format %{ "movd $dst,$src\n\t"
aoqi@0 1868 "punpcklbw $dst,$dst\n\t"
aoqi@0 1869 "pshuflw $dst,$dst,0x00\n\t"
aoqi@0 1870 "punpcklqdq $dst,$dst\t! replicate16B" %}
aoqi@0 1871 ins_encode %{
aoqi@0 1872 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 1873 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1874 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 1875 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1876 %}
aoqi@0 1877 ins_pipe( pipe_slow );
aoqi@0 1878 %}
aoqi@0 1879
aoqi@0 1880 instruct Repl32B(vecY dst, rRegI src) %{
aoqi@0 1881 predicate(n->as_Vector()->length() == 32);
aoqi@0 1882 match(Set dst (ReplicateB src));
aoqi@0 1883 format %{ "movd $dst,$src\n\t"
aoqi@0 1884 "punpcklbw $dst,$dst\n\t"
aoqi@0 1885 "pshuflw $dst,$dst,0x00\n\t"
aoqi@0 1886 "punpcklqdq $dst,$dst\n\t"
aoqi@0 1887 "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
aoqi@0 1888 ins_encode %{
aoqi@0 1889 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 1890 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1891 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 1892 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1893 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1894 %}
aoqi@0 1895 ins_pipe( pipe_slow );
aoqi@0 1896 %}
aoqi@0 1897
aoqi@0 1898 // Replicate byte scalar immediate to be vector by loading from const table.
aoqi@0 1899 instruct Repl4B_imm(vecS dst, immI con) %{
aoqi@0 1900 predicate(n->as_Vector()->length() == 4);
aoqi@0 1901 match(Set dst (ReplicateB con));
aoqi@0 1902 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %}
aoqi@0 1903 ins_encode %{
aoqi@0 1904 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1)));
aoqi@0 1905 %}
aoqi@0 1906 ins_pipe( pipe_slow );
aoqi@0 1907 %}
aoqi@0 1908
aoqi@0 1909 instruct Repl8B_imm(vecD dst, immI con) %{
aoqi@0 1910 predicate(n->as_Vector()->length() == 8);
aoqi@0 1911 match(Set dst (ReplicateB con));
aoqi@0 1912 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %}
aoqi@0 1913 ins_encode %{
aoqi@0 1914 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
aoqi@0 1915 %}
aoqi@0 1916 ins_pipe( pipe_slow );
aoqi@0 1917 %}
aoqi@0 1918
aoqi@0 1919 instruct Repl16B_imm(vecX dst, immI con) %{
aoqi@0 1920 predicate(n->as_Vector()->length() == 16);
aoqi@0 1921 match(Set dst (ReplicateB con));
aoqi@0 1922 format %{ "movq $dst,[$constantaddress]\n\t"
aoqi@0 1923 "punpcklqdq $dst,$dst\t! replicate16B($con)" %}
aoqi@0 1924 ins_encode %{
aoqi@0 1925 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
aoqi@0 1926 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1927 %}
aoqi@0 1928 ins_pipe( pipe_slow );
aoqi@0 1929 %}
aoqi@0 1930
aoqi@0 1931 instruct Repl32B_imm(vecY dst, immI con) %{
aoqi@0 1932 predicate(n->as_Vector()->length() == 32);
aoqi@0 1933 match(Set dst (ReplicateB con));
aoqi@0 1934 format %{ "movq $dst,[$constantaddress]\n\t"
aoqi@0 1935 "punpcklqdq $dst,$dst\n\t"
aoqi@0 1936 "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %}
aoqi@0 1937 ins_encode %{
aoqi@0 1938 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
aoqi@0 1939 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1940 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1941 %}
aoqi@0 1942 ins_pipe( pipe_slow );
aoqi@0 1943 %}
aoqi@0 1944
aoqi@0 1945 // Replicate byte scalar zero to be vector
aoqi@0 1946 instruct Repl4B_zero(vecS dst, immI0 zero) %{
aoqi@0 1947 predicate(n->as_Vector()->length() == 4);
aoqi@0 1948 match(Set dst (ReplicateB zero));
aoqi@0 1949 format %{ "pxor $dst,$dst\t! replicate4B zero" %}
aoqi@0 1950 ins_encode %{
aoqi@0 1951 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1952 %}
aoqi@0 1953 ins_pipe( fpu_reg_reg );
aoqi@0 1954 %}
aoqi@0 1955
aoqi@0 1956 instruct Repl8B_zero(vecD dst, immI0 zero) %{
aoqi@0 1957 predicate(n->as_Vector()->length() == 8);
aoqi@0 1958 match(Set dst (ReplicateB zero));
aoqi@0 1959 format %{ "pxor $dst,$dst\t! replicate8B zero" %}
aoqi@0 1960 ins_encode %{
aoqi@0 1961 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1962 %}
aoqi@0 1963 ins_pipe( fpu_reg_reg );
aoqi@0 1964 %}
aoqi@0 1965
aoqi@0 1966 instruct Repl16B_zero(vecX dst, immI0 zero) %{
aoqi@0 1967 predicate(n->as_Vector()->length() == 16);
aoqi@0 1968 match(Set dst (ReplicateB zero));
aoqi@0 1969 format %{ "pxor $dst,$dst\t! replicate16B zero" %}
aoqi@0 1970 ins_encode %{
aoqi@0 1971 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 1972 %}
aoqi@0 1973 ins_pipe( fpu_reg_reg );
aoqi@0 1974 %}
aoqi@0 1975
aoqi@0 1976 instruct Repl32B_zero(vecY dst, immI0 zero) %{
aoqi@0 1977 predicate(n->as_Vector()->length() == 32);
aoqi@0 1978 match(Set dst (ReplicateB zero));
aoqi@0 1979 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %}
aoqi@0 1980 ins_encode %{
aoqi@0 1981 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
aoqi@0 1982 bool vector256 = true;
aoqi@0 1983 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
aoqi@0 1984 %}
aoqi@0 1985 ins_pipe( fpu_reg_reg );
aoqi@0 1986 %}
aoqi@0 1987
aoqi@0 1988 // Replicate char/short (2 byte) scalar to be vector
aoqi@0 1989 instruct Repl2S(vecS dst, rRegI src) %{
aoqi@0 1990 predicate(n->as_Vector()->length() == 2);
aoqi@0 1991 match(Set dst (ReplicateS src));
aoqi@0 1992 format %{ "movd $dst,$src\n\t"
aoqi@0 1993 "pshuflw $dst,$dst,0x00\t! replicate2S" %}
aoqi@0 1994 ins_encode %{
aoqi@0 1995 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 1996 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 1997 %}
aoqi@0 1998 ins_pipe( fpu_reg_reg );
aoqi@0 1999 %}
aoqi@0 2000
aoqi@0 2001 instruct Repl4S(vecD dst, rRegI src) %{
aoqi@0 2002 predicate(n->as_Vector()->length() == 4);
aoqi@0 2003 match(Set dst (ReplicateS src));
aoqi@0 2004 format %{ "movd $dst,$src\n\t"
aoqi@0 2005 "pshuflw $dst,$dst,0x00\t! replicate4S" %}
aoqi@0 2006 ins_encode %{
aoqi@0 2007 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 2008 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 2009 %}
aoqi@0 2010 ins_pipe( fpu_reg_reg );
aoqi@0 2011 %}
aoqi@0 2012
aoqi@0 2013 instruct Repl8S(vecX dst, rRegI src) %{
aoqi@0 2014 predicate(n->as_Vector()->length() == 8);
aoqi@0 2015 match(Set dst (ReplicateS src));
aoqi@0 2016 format %{ "movd $dst,$src\n\t"
aoqi@0 2017 "pshuflw $dst,$dst,0x00\n\t"
aoqi@0 2018 "punpcklqdq $dst,$dst\t! replicate8S" %}
aoqi@0 2019 ins_encode %{
aoqi@0 2020 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 2021 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 2022 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2023 %}
aoqi@0 2024 ins_pipe( pipe_slow );
aoqi@0 2025 %}
aoqi@0 2026
aoqi@0 2027 instruct Repl16S(vecY dst, rRegI src) %{
aoqi@0 2028 predicate(n->as_Vector()->length() == 16);
aoqi@0 2029 match(Set dst (ReplicateS src));
aoqi@0 2030 format %{ "movd $dst,$src\n\t"
aoqi@0 2031 "pshuflw $dst,$dst,0x00\n\t"
aoqi@0 2032 "punpcklqdq $dst,$dst\n\t"
aoqi@0 2033 "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
aoqi@0 2034 ins_encode %{
aoqi@0 2035 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 2036 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 2037 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2038 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2039 %}
aoqi@0 2040 ins_pipe( pipe_slow );
aoqi@0 2041 %}
aoqi@0 2042
aoqi@0 2043 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table.
aoqi@0 2044 instruct Repl2S_imm(vecS dst, immI con) %{
aoqi@0 2045 predicate(n->as_Vector()->length() == 2);
aoqi@0 2046 match(Set dst (ReplicateS con));
aoqi@0 2047 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %}
aoqi@0 2048 ins_encode %{
aoqi@0 2049 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2)));
aoqi@0 2050 %}
aoqi@0 2051 ins_pipe( fpu_reg_reg );
aoqi@0 2052 %}
aoqi@0 2053
aoqi@0 2054 instruct Repl4S_imm(vecD dst, immI con) %{
aoqi@0 2055 predicate(n->as_Vector()->length() == 4);
aoqi@0 2056 match(Set dst (ReplicateS con));
aoqi@0 2057 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %}
aoqi@0 2058 ins_encode %{
aoqi@0 2059 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
aoqi@0 2060 %}
aoqi@0 2061 ins_pipe( fpu_reg_reg );
aoqi@0 2062 %}
aoqi@0 2063
aoqi@0 2064 instruct Repl8S_imm(vecX dst, immI con) %{
aoqi@0 2065 predicate(n->as_Vector()->length() == 8);
aoqi@0 2066 match(Set dst (ReplicateS con));
aoqi@0 2067 format %{ "movq $dst,[$constantaddress]\n\t"
aoqi@0 2068 "punpcklqdq $dst,$dst\t! replicate8S($con)" %}
aoqi@0 2069 ins_encode %{
aoqi@0 2070 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
aoqi@0 2071 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2072 %}
aoqi@0 2073 ins_pipe( pipe_slow );
aoqi@0 2074 %}
aoqi@0 2075
aoqi@0 2076 instruct Repl16S_imm(vecY dst, immI con) %{
aoqi@0 2077 predicate(n->as_Vector()->length() == 16);
aoqi@0 2078 match(Set dst (ReplicateS con));
aoqi@0 2079 format %{ "movq $dst,[$constantaddress]\n\t"
aoqi@0 2080 "punpcklqdq $dst,$dst\n\t"
aoqi@0 2081 "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %}
aoqi@0 2082 ins_encode %{
aoqi@0 2083 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
aoqi@0 2084 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2085 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2086 %}
aoqi@0 2087 ins_pipe( pipe_slow );
aoqi@0 2088 %}
aoqi@0 2089
aoqi@0 2090 // Replicate char/short (2 byte) scalar zero to be vector
aoqi@0 2091 instruct Repl2S_zero(vecS dst, immI0 zero) %{
aoqi@0 2092 predicate(n->as_Vector()->length() == 2);
aoqi@0 2093 match(Set dst (ReplicateS zero));
aoqi@0 2094 format %{ "pxor $dst,$dst\t! replicate2S zero" %}
aoqi@0 2095 ins_encode %{
aoqi@0 2096 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2097 %}
aoqi@0 2098 ins_pipe( fpu_reg_reg );
aoqi@0 2099 %}
aoqi@0 2100
aoqi@0 2101 instruct Repl4S_zero(vecD dst, immI0 zero) %{
aoqi@0 2102 predicate(n->as_Vector()->length() == 4);
aoqi@0 2103 match(Set dst (ReplicateS zero));
aoqi@0 2104 format %{ "pxor $dst,$dst\t! replicate4S zero" %}
aoqi@0 2105 ins_encode %{
aoqi@0 2106 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2107 %}
aoqi@0 2108 ins_pipe( fpu_reg_reg );
aoqi@0 2109 %}
aoqi@0 2110
aoqi@0 2111 instruct Repl8S_zero(vecX dst, immI0 zero) %{
aoqi@0 2112 predicate(n->as_Vector()->length() == 8);
aoqi@0 2113 match(Set dst (ReplicateS zero));
aoqi@0 2114 format %{ "pxor $dst,$dst\t! replicate8S zero" %}
aoqi@0 2115 ins_encode %{
aoqi@0 2116 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2117 %}
aoqi@0 2118 ins_pipe( fpu_reg_reg );
aoqi@0 2119 %}
aoqi@0 2120
aoqi@0 2121 instruct Repl16S_zero(vecY dst, immI0 zero) %{
aoqi@0 2122 predicate(n->as_Vector()->length() == 16);
aoqi@0 2123 match(Set dst (ReplicateS zero));
aoqi@0 2124 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %}
aoqi@0 2125 ins_encode %{
aoqi@0 2126 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
aoqi@0 2127 bool vector256 = true;
aoqi@0 2128 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
aoqi@0 2129 %}
aoqi@0 2130 ins_pipe( fpu_reg_reg );
aoqi@0 2131 %}
aoqi@0 2132
aoqi@0 2133 // Replicate integer (4 byte) scalar to be vector
aoqi@0 2134 instruct Repl2I(vecD dst, rRegI src) %{
aoqi@0 2135 predicate(n->as_Vector()->length() == 2);
aoqi@0 2136 match(Set dst (ReplicateI src));
aoqi@0 2137 format %{ "movd $dst,$src\n\t"
aoqi@0 2138 "pshufd $dst,$dst,0x00\t! replicate2I" %}
aoqi@0 2139 ins_encode %{
aoqi@0 2140 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 2141 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 2142 %}
aoqi@0 2143 ins_pipe( fpu_reg_reg );
aoqi@0 2144 %}
aoqi@0 2145
aoqi@0 2146 instruct Repl4I(vecX dst, rRegI src) %{
aoqi@0 2147 predicate(n->as_Vector()->length() == 4);
aoqi@0 2148 match(Set dst (ReplicateI src));
aoqi@0 2149 format %{ "movd $dst,$src\n\t"
aoqi@0 2150 "pshufd $dst,$dst,0x00\t! replicate4I" %}
aoqi@0 2151 ins_encode %{
aoqi@0 2152 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 2153 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 2154 %}
aoqi@0 2155 ins_pipe( pipe_slow );
aoqi@0 2156 %}
aoqi@0 2157
aoqi@0 2158 instruct Repl8I(vecY dst, rRegI src) %{
aoqi@0 2159 predicate(n->as_Vector()->length() == 8);
aoqi@0 2160 match(Set dst (ReplicateI src));
aoqi@0 2161 format %{ "movd $dst,$src\n\t"
aoqi@0 2162 "pshufd $dst,$dst,0x00\n\t"
aoqi@0 2163 "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
aoqi@0 2164 ins_encode %{
aoqi@0 2165 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 2166 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 2167 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2168 %}
aoqi@0 2169 ins_pipe( pipe_slow );
aoqi@0 2170 %}
aoqi@0 2171
aoqi@0 2172 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
aoqi@0 2173 instruct Repl2I_imm(vecD dst, immI con) %{
aoqi@0 2174 predicate(n->as_Vector()->length() == 2);
aoqi@0 2175 match(Set dst (ReplicateI con));
aoqi@0 2176 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %}
aoqi@0 2177 ins_encode %{
aoqi@0 2178 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
aoqi@0 2179 %}
aoqi@0 2180 ins_pipe( fpu_reg_reg );
aoqi@0 2181 %}
aoqi@0 2182
aoqi@0 2183 instruct Repl4I_imm(vecX dst, immI con) %{
aoqi@0 2184 predicate(n->as_Vector()->length() == 4);
aoqi@0 2185 match(Set dst (ReplicateI con));
aoqi@0 2186 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t"
aoqi@0 2187 "punpcklqdq $dst,$dst" %}
aoqi@0 2188 ins_encode %{
aoqi@0 2189 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
aoqi@0 2190 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2191 %}
aoqi@0 2192 ins_pipe( pipe_slow );
aoqi@0 2193 %}
aoqi@0 2194
aoqi@0 2195 instruct Repl8I_imm(vecY dst, immI con) %{
aoqi@0 2196 predicate(n->as_Vector()->length() == 8);
aoqi@0 2197 match(Set dst (ReplicateI con));
aoqi@0 2198 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t"
aoqi@0 2199 "punpcklqdq $dst,$dst\n\t"
aoqi@0 2200 "vinserti128h $dst,$dst,$dst" %}
aoqi@0 2201 ins_encode %{
aoqi@0 2202 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
aoqi@0 2203 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2204 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2205 %}
aoqi@0 2206 ins_pipe( pipe_slow );
aoqi@0 2207 %}
aoqi@0 2208
aoqi@0 2209 // Integer could be loaded into xmm register directly from memory.
aoqi@0 2210 instruct Repl2I_mem(vecD dst, memory mem) %{
aoqi@0 2211 predicate(n->as_Vector()->length() == 2);
aoqi@0 2212 match(Set dst (ReplicateI (LoadI mem)));
aoqi@0 2213 format %{ "movd $dst,$mem\n\t"
aoqi@0 2214 "pshufd $dst,$dst,0x00\t! replicate2I" %}
aoqi@0 2215 ins_encode %{
aoqi@0 2216 __ movdl($dst$$XMMRegister, $mem$$Address);
aoqi@0 2217 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 2218 %}
aoqi@0 2219 ins_pipe( fpu_reg_reg );
aoqi@0 2220 %}
aoqi@0 2221
aoqi@0 2222 instruct Repl4I_mem(vecX dst, memory mem) %{
aoqi@0 2223 predicate(n->as_Vector()->length() == 4);
aoqi@0 2224 match(Set dst (ReplicateI (LoadI mem)));
aoqi@0 2225 format %{ "movd $dst,$mem\n\t"
aoqi@0 2226 "pshufd $dst,$dst,0x00\t! replicate4I" %}
aoqi@0 2227 ins_encode %{
aoqi@0 2228 __ movdl($dst$$XMMRegister, $mem$$Address);
aoqi@0 2229 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 2230 %}
aoqi@0 2231 ins_pipe( pipe_slow );
aoqi@0 2232 %}
aoqi@0 2233
aoqi@0 2234 instruct Repl8I_mem(vecY dst, memory mem) %{
aoqi@0 2235 predicate(n->as_Vector()->length() == 8);
aoqi@0 2236 match(Set dst (ReplicateI (LoadI mem)));
aoqi@0 2237 format %{ "movd $dst,$mem\n\t"
aoqi@0 2238 "pshufd $dst,$dst,0x00\n\t"
aoqi@0 2239 "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
aoqi@0 2240 ins_encode %{
aoqi@0 2241 __ movdl($dst$$XMMRegister, $mem$$Address);
aoqi@0 2242 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
aoqi@0 2243 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2244 %}
aoqi@0 2245 ins_pipe( pipe_slow );
aoqi@0 2246 %}
aoqi@0 2247
aoqi@0 2248 // Replicate integer (4 byte) scalar zero to be vector
aoqi@0 2249 instruct Repl2I_zero(vecD dst, immI0 zero) %{
aoqi@0 2250 predicate(n->as_Vector()->length() == 2);
aoqi@0 2251 match(Set dst (ReplicateI zero));
aoqi@0 2252 format %{ "pxor $dst,$dst\t! replicate2I" %}
aoqi@0 2253 ins_encode %{
aoqi@0 2254 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2255 %}
aoqi@0 2256 ins_pipe( fpu_reg_reg );
aoqi@0 2257 %}
aoqi@0 2258
aoqi@0 2259 instruct Repl4I_zero(vecX dst, immI0 zero) %{
aoqi@0 2260 predicate(n->as_Vector()->length() == 4);
aoqi@0 2261 match(Set dst (ReplicateI zero));
aoqi@0 2262 format %{ "pxor $dst,$dst\t! replicate4I zero)" %}
aoqi@0 2263 ins_encode %{
aoqi@0 2264 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2265 %}
aoqi@0 2266 ins_pipe( fpu_reg_reg );
aoqi@0 2267 %}
aoqi@0 2268
aoqi@0 2269 instruct Repl8I_zero(vecY dst, immI0 zero) %{
aoqi@0 2270 predicate(n->as_Vector()->length() == 8);
aoqi@0 2271 match(Set dst (ReplicateI zero));
aoqi@0 2272 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %}
aoqi@0 2273 ins_encode %{
aoqi@0 2274 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
aoqi@0 2275 bool vector256 = true;
aoqi@0 2276 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
aoqi@0 2277 %}
aoqi@0 2278 ins_pipe( fpu_reg_reg );
aoqi@0 2279 %}
aoqi@0 2280
aoqi@0 2281 // Replicate long (8 byte) scalar to be vector
aoqi@0 2282 #ifdef _LP64
aoqi@0 2283 instruct Repl2L(vecX dst, rRegL src) %{
aoqi@0 2284 predicate(n->as_Vector()->length() == 2);
aoqi@0 2285 match(Set dst (ReplicateL src));
aoqi@0 2286 format %{ "movdq $dst,$src\n\t"
aoqi@0 2287 "punpcklqdq $dst,$dst\t! replicate2L" %}
aoqi@0 2288 ins_encode %{
aoqi@0 2289 __ movdq($dst$$XMMRegister, $src$$Register);
aoqi@0 2290 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2291 %}
aoqi@0 2292 ins_pipe( pipe_slow );
aoqi@0 2293 %}
aoqi@0 2294
aoqi@0 2295 instruct Repl4L(vecY dst, rRegL src) %{
aoqi@0 2296 predicate(n->as_Vector()->length() == 4);
aoqi@0 2297 match(Set dst (ReplicateL src));
aoqi@0 2298 format %{ "movdq $dst,$src\n\t"
aoqi@0 2299 "punpcklqdq $dst,$dst\n\t"
aoqi@0 2300 "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
aoqi@0 2301 ins_encode %{
aoqi@0 2302 __ movdq($dst$$XMMRegister, $src$$Register);
aoqi@0 2303 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2304 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2305 %}
aoqi@0 2306 ins_pipe( pipe_slow );
aoqi@0 2307 %}
aoqi@0 2308 #else // _LP64
aoqi@0 2309 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{
aoqi@0 2310 predicate(n->as_Vector()->length() == 2);
aoqi@0 2311 match(Set dst (ReplicateL src));
aoqi@0 2312 effect(TEMP dst, USE src, TEMP tmp);
aoqi@0 2313 format %{ "movdl $dst,$src.lo\n\t"
aoqi@0 2314 "movdl $tmp,$src.hi\n\t"
aoqi@0 2315 "punpckldq $dst,$tmp\n\t"
aoqi@0 2316 "punpcklqdq $dst,$dst\t! replicate2L"%}
aoqi@0 2317 ins_encode %{
aoqi@0 2318 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 2319 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
aoqi@0 2320 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
aoqi@0 2321 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2322 %}
aoqi@0 2323 ins_pipe( pipe_slow );
aoqi@0 2324 %}
aoqi@0 2325
aoqi@0 2326 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
aoqi@0 2327 predicate(n->as_Vector()->length() == 4);
aoqi@0 2328 match(Set dst (ReplicateL src));
aoqi@0 2329 effect(TEMP dst, USE src, TEMP tmp);
aoqi@0 2330 format %{ "movdl $dst,$src.lo\n\t"
aoqi@0 2331 "movdl $tmp,$src.hi\n\t"
aoqi@0 2332 "punpckldq $dst,$tmp\n\t"
aoqi@0 2333 "punpcklqdq $dst,$dst\n\t"
aoqi@0 2334 "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
aoqi@0 2335 ins_encode %{
aoqi@0 2336 __ movdl($dst$$XMMRegister, $src$$Register);
aoqi@0 2337 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
aoqi@0 2338 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
aoqi@0 2339 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2340 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2341 %}
aoqi@0 2342 ins_pipe( pipe_slow );
aoqi@0 2343 %}
aoqi@0 2344 #endif // _LP64
aoqi@0 2345
aoqi@0 2346 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
aoqi@0 2347 instruct Repl2L_imm(vecX dst, immL con) %{
aoqi@0 2348 predicate(n->as_Vector()->length() == 2);
aoqi@0 2349 match(Set dst (ReplicateL con));
aoqi@0 2350 format %{ "movq $dst,[$constantaddress]\n\t"
aoqi@0 2351 "punpcklqdq $dst,$dst\t! replicate2L($con)" %}
aoqi@0 2352 ins_encode %{
aoqi@0 2353 __ movq($dst$$XMMRegister, $constantaddress($con));
aoqi@0 2354 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2355 %}
aoqi@0 2356 ins_pipe( pipe_slow );
aoqi@0 2357 %}
aoqi@0 2358
aoqi@0 2359 instruct Repl4L_imm(vecY dst, immL con) %{
aoqi@0 2360 predicate(n->as_Vector()->length() == 4);
aoqi@0 2361 match(Set dst (ReplicateL con));
aoqi@0 2362 format %{ "movq $dst,[$constantaddress]\n\t"
aoqi@0 2363 "punpcklqdq $dst,$dst\n\t"
aoqi@0 2364 "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %}
aoqi@0 2365 ins_encode %{
aoqi@0 2366 __ movq($dst$$XMMRegister, $constantaddress($con));
aoqi@0 2367 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2368 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2369 %}
aoqi@0 2370 ins_pipe( pipe_slow );
aoqi@0 2371 %}
aoqi@0 2372
aoqi@0 2373 // Long could be loaded into xmm register directly from memory.
aoqi@0 2374 instruct Repl2L_mem(vecX dst, memory mem) %{
aoqi@0 2375 predicate(n->as_Vector()->length() == 2);
aoqi@0 2376 match(Set dst (ReplicateL (LoadL mem)));
aoqi@0 2377 format %{ "movq $dst,$mem\n\t"
aoqi@0 2378 "punpcklqdq $dst,$dst\t! replicate2L" %}
aoqi@0 2379 ins_encode %{
aoqi@0 2380 __ movq($dst$$XMMRegister, $mem$$Address);
aoqi@0 2381 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2382 %}
aoqi@0 2383 ins_pipe( pipe_slow );
aoqi@0 2384 %}
aoqi@0 2385
aoqi@0 2386 instruct Repl4L_mem(vecY dst, memory mem) %{
aoqi@0 2387 predicate(n->as_Vector()->length() == 4);
aoqi@0 2388 match(Set dst (ReplicateL (LoadL mem)));
aoqi@0 2389 format %{ "movq $dst,$mem\n\t"
aoqi@0 2390 "punpcklqdq $dst,$dst\n\t"
aoqi@0 2391 "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
aoqi@0 2392 ins_encode %{
aoqi@0 2393 __ movq($dst$$XMMRegister, $mem$$Address);
aoqi@0 2394 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2395 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2396 %}
aoqi@0 2397 ins_pipe( pipe_slow );
aoqi@0 2398 %}
aoqi@0 2399
aoqi@0 2400 // Replicate long (8 byte) scalar zero to be vector
aoqi@0 2401 instruct Repl2L_zero(vecX dst, immL0 zero) %{
aoqi@0 2402 predicate(n->as_Vector()->length() == 2);
aoqi@0 2403 match(Set dst (ReplicateL zero));
aoqi@0 2404 format %{ "pxor $dst,$dst\t! replicate2L zero" %}
aoqi@0 2405 ins_encode %{
aoqi@0 2406 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2407 %}
aoqi@0 2408 ins_pipe( fpu_reg_reg );
aoqi@0 2409 %}
aoqi@0 2410
aoqi@0 2411 instruct Repl4L_zero(vecY dst, immL0 zero) %{
aoqi@0 2412 predicate(n->as_Vector()->length() == 4);
aoqi@0 2413 match(Set dst (ReplicateL zero));
aoqi@0 2414 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %}
aoqi@0 2415 ins_encode %{
aoqi@0 2416 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
aoqi@0 2417 bool vector256 = true;
aoqi@0 2418 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
aoqi@0 2419 %}
aoqi@0 2420 ins_pipe( fpu_reg_reg );
aoqi@0 2421 %}
aoqi@0 2422
aoqi@0 2423 // Replicate float (4 byte) scalar to be vector
aoqi@0 2424 instruct Repl2F(vecD dst, regF src) %{
aoqi@0 2425 predicate(n->as_Vector()->length() == 2);
aoqi@0 2426 match(Set dst (ReplicateF src));
aoqi@0 2427 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %}
aoqi@0 2428 ins_encode %{
aoqi@0 2429 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
aoqi@0 2430 %}
aoqi@0 2431 ins_pipe( fpu_reg_reg );
aoqi@0 2432 %}
aoqi@0 2433
aoqi@0 2434 instruct Repl4F(vecX dst, regF src) %{
aoqi@0 2435 predicate(n->as_Vector()->length() == 4);
aoqi@0 2436 match(Set dst (ReplicateF src));
aoqi@0 2437 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %}
aoqi@0 2438 ins_encode %{
aoqi@0 2439 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
aoqi@0 2440 %}
aoqi@0 2441 ins_pipe( pipe_slow );
aoqi@0 2442 %}
aoqi@0 2443
aoqi@0 2444 instruct Repl8F(vecY dst, regF src) %{
aoqi@0 2445 predicate(n->as_Vector()->length() == 8);
aoqi@0 2446 match(Set dst (ReplicateF src));
aoqi@0 2447 format %{ "pshufd $dst,$src,0x00\n\t"
aoqi@0 2448 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
aoqi@0 2449 ins_encode %{
aoqi@0 2450 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
aoqi@0 2451 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2452 %}
aoqi@0 2453 ins_pipe( pipe_slow );
aoqi@0 2454 %}
aoqi@0 2455
aoqi@0 2456 // Replicate float (4 byte) scalar zero to be vector
aoqi@0 2457 instruct Repl2F_zero(vecD dst, immF0 zero) %{
aoqi@0 2458 predicate(n->as_Vector()->length() == 2);
aoqi@0 2459 match(Set dst (ReplicateF zero));
aoqi@0 2460 format %{ "xorps $dst,$dst\t! replicate2F zero" %}
aoqi@0 2461 ins_encode %{
aoqi@0 2462 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2463 %}
aoqi@0 2464 ins_pipe( fpu_reg_reg );
aoqi@0 2465 %}
aoqi@0 2466
aoqi@0 2467 instruct Repl4F_zero(vecX dst, immF0 zero) %{
aoqi@0 2468 predicate(n->as_Vector()->length() == 4);
aoqi@0 2469 match(Set dst (ReplicateF zero));
aoqi@0 2470 format %{ "xorps $dst,$dst\t! replicate4F zero" %}
aoqi@0 2471 ins_encode %{
aoqi@0 2472 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2473 %}
aoqi@0 2474 ins_pipe( fpu_reg_reg );
aoqi@0 2475 %}
aoqi@0 2476
aoqi@0 2477 instruct Repl8F_zero(vecY dst, immF0 zero) %{
aoqi@0 2478 predicate(n->as_Vector()->length() == 8);
aoqi@0 2479 match(Set dst (ReplicateF zero));
aoqi@0 2480 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %}
aoqi@0 2481 ins_encode %{
aoqi@0 2482 bool vector256 = true;
aoqi@0 2483 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
aoqi@0 2484 %}
aoqi@0 2485 ins_pipe( fpu_reg_reg );
aoqi@0 2486 %}
aoqi@0 2487
aoqi@0 2488 // Replicate double (8 bytes) scalar to be vector
aoqi@0 2489 instruct Repl2D(vecX dst, regD src) %{
aoqi@0 2490 predicate(n->as_Vector()->length() == 2);
aoqi@0 2491 match(Set dst (ReplicateD src));
aoqi@0 2492 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %}
aoqi@0 2493 ins_encode %{
aoqi@0 2494 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
aoqi@0 2495 %}
aoqi@0 2496 ins_pipe( pipe_slow );
aoqi@0 2497 %}
aoqi@0 2498
aoqi@0 2499 instruct Repl4D(vecY dst, regD src) %{
aoqi@0 2500 predicate(n->as_Vector()->length() == 4);
aoqi@0 2501 match(Set dst (ReplicateD src));
aoqi@0 2502 format %{ "pshufd $dst,$src,0x44\n\t"
aoqi@0 2503 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
aoqi@0 2504 ins_encode %{
aoqi@0 2505 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
aoqi@0 2506 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2507 %}
aoqi@0 2508 ins_pipe( pipe_slow );
aoqi@0 2509 %}
aoqi@0 2510
aoqi@0 2511 // Replicate double (8 byte) scalar zero to be vector
aoqi@0 2512 instruct Repl2D_zero(vecX dst, immD0 zero) %{
aoqi@0 2513 predicate(n->as_Vector()->length() == 2);
aoqi@0 2514 match(Set dst (ReplicateD zero));
aoqi@0 2515 format %{ "xorpd $dst,$dst\t! replicate2D zero" %}
aoqi@0 2516 ins_encode %{
aoqi@0 2517 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
aoqi@0 2518 %}
aoqi@0 2519 ins_pipe( fpu_reg_reg );
aoqi@0 2520 %}
aoqi@0 2521
aoqi@0 2522 instruct Repl4D_zero(vecY dst, immD0 zero) %{
aoqi@0 2523 predicate(n->as_Vector()->length() == 4);
aoqi@0 2524 match(Set dst (ReplicateD zero));
aoqi@0 2525 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %}
aoqi@0 2526 ins_encode %{
aoqi@0 2527 bool vector256 = true;
aoqi@0 2528 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
aoqi@0 2529 %}
aoqi@0 2530 ins_pipe( fpu_reg_reg );
aoqi@0 2531 %}
aoqi@0 2532
aoqi@0 2533 // ====================VECTOR ARITHMETIC=======================================
aoqi@0 2534
aoqi@0 2535 // --------------------------------- ADD --------------------------------------
aoqi@0 2536
aoqi@0 2537 // Bytes vector add
aoqi@0 2538 instruct vadd4B(vecS dst, vecS src) %{
aoqi@0 2539 predicate(n->as_Vector()->length() == 4);
aoqi@0 2540 match(Set dst (AddVB dst src));
aoqi@0 2541 format %{ "paddb $dst,$src\t! add packed4B" %}
aoqi@0 2542 ins_encode %{
aoqi@0 2543 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2544 %}
aoqi@0 2545 ins_pipe( pipe_slow );
aoqi@0 2546 %}
aoqi@0 2547
aoqi@0 2548 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{
aoqi@0 2549 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 2550 match(Set dst (AddVB src1 src2));
aoqi@0 2551 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %}
aoqi@0 2552 ins_encode %{
aoqi@0 2553 bool vector256 = false;
aoqi@0 2554 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2555 %}
aoqi@0 2556 ins_pipe( pipe_slow );
aoqi@0 2557 %}
aoqi@0 2558
aoqi@0 2559 instruct vadd8B(vecD dst, vecD src) %{
aoqi@0 2560 predicate(n->as_Vector()->length() == 8);
aoqi@0 2561 match(Set dst (AddVB dst src));
aoqi@0 2562 format %{ "paddb $dst,$src\t! add packed8B" %}
aoqi@0 2563 ins_encode %{
aoqi@0 2564 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2565 %}
aoqi@0 2566 ins_pipe( pipe_slow );
aoqi@0 2567 %}
aoqi@0 2568
aoqi@0 2569 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 2570 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 2571 match(Set dst (AddVB src1 src2));
aoqi@0 2572 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %}
aoqi@0 2573 ins_encode %{
aoqi@0 2574 bool vector256 = false;
aoqi@0 2575 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2576 %}
aoqi@0 2577 ins_pipe( pipe_slow );
aoqi@0 2578 %}
aoqi@0 2579
aoqi@0 2580 instruct vadd16B(vecX dst, vecX src) %{
aoqi@0 2581 predicate(n->as_Vector()->length() == 16);
aoqi@0 2582 match(Set dst (AddVB dst src));
aoqi@0 2583 format %{ "paddb $dst,$src\t! add packed16B" %}
aoqi@0 2584 ins_encode %{
aoqi@0 2585 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2586 %}
aoqi@0 2587 ins_pipe( pipe_slow );
aoqi@0 2588 %}
aoqi@0 2589
aoqi@0 2590 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 2591 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
aoqi@0 2592 match(Set dst (AddVB src1 src2));
aoqi@0 2593 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %}
aoqi@0 2594 ins_encode %{
aoqi@0 2595 bool vector256 = false;
aoqi@0 2596 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2597 %}
aoqi@0 2598 ins_pipe( pipe_slow );
aoqi@0 2599 %}
aoqi@0 2600
aoqi@0 2601 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 2602 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
aoqi@0 2603 match(Set dst (AddVB src (LoadVector mem)));
aoqi@0 2604 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %}
aoqi@0 2605 ins_encode %{
aoqi@0 2606 bool vector256 = false;
aoqi@0 2607 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2608 %}
aoqi@0 2609 ins_pipe( pipe_slow );
aoqi@0 2610 %}
aoqi@0 2611
aoqi@0 2612 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 2613 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
aoqi@0 2614 match(Set dst (AddVB src1 src2));
aoqi@0 2615 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %}
aoqi@0 2616 ins_encode %{
aoqi@0 2617 bool vector256 = true;
aoqi@0 2618 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2619 %}
aoqi@0 2620 ins_pipe( pipe_slow );
aoqi@0 2621 %}
aoqi@0 2622
aoqi@0 2623 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 2624 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
aoqi@0 2625 match(Set dst (AddVB src (LoadVector mem)));
aoqi@0 2626 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %}
aoqi@0 2627 ins_encode %{
aoqi@0 2628 bool vector256 = true;
aoqi@0 2629 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2630 %}
aoqi@0 2631 ins_pipe( pipe_slow );
aoqi@0 2632 %}
aoqi@0 2633
aoqi@0 2634 // Shorts/Chars vector add
aoqi@0 2635 instruct vadd2S(vecS dst, vecS src) %{
aoqi@0 2636 predicate(n->as_Vector()->length() == 2);
aoqi@0 2637 match(Set dst (AddVS dst src));
aoqi@0 2638 format %{ "paddw $dst,$src\t! add packed2S" %}
aoqi@0 2639 ins_encode %{
aoqi@0 2640 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2641 %}
aoqi@0 2642 ins_pipe( pipe_slow );
aoqi@0 2643 %}
aoqi@0 2644
aoqi@0 2645 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{
aoqi@0 2646 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 2647 match(Set dst (AddVS src1 src2));
aoqi@0 2648 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %}
aoqi@0 2649 ins_encode %{
aoqi@0 2650 bool vector256 = false;
aoqi@0 2651 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2652 %}
aoqi@0 2653 ins_pipe( pipe_slow );
aoqi@0 2654 %}
aoqi@0 2655
aoqi@0 2656 instruct vadd4S(vecD dst, vecD src) %{
aoqi@0 2657 predicate(n->as_Vector()->length() == 4);
aoqi@0 2658 match(Set dst (AddVS dst src));
aoqi@0 2659 format %{ "paddw $dst,$src\t! add packed4S" %}
aoqi@0 2660 ins_encode %{
aoqi@0 2661 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2662 %}
aoqi@0 2663 ins_pipe( pipe_slow );
aoqi@0 2664 %}
aoqi@0 2665
aoqi@0 2666 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 2667 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 2668 match(Set dst (AddVS src1 src2));
aoqi@0 2669 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %}
aoqi@0 2670 ins_encode %{
aoqi@0 2671 bool vector256 = false;
aoqi@0 2672 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2673 %}
aoqi@0 2674 ins_pipe( pipe_slow );
aoqi@0 2675 %}
aoqi@0 2676
aoqi@0 2677 instruct vadd8S(vecX dst, vecX src) %{
aoqi@0 2678 predicate(n->as_Vector()->length() == 8);
aoqi@0 2679 match(Set dst (AddVS dst src));
aoqi@0 2680 format %{ "paddw $dst,$src\t! add packed8S" %}
aoqi@0 2681 ins_encode %{
aoqi@0 2682 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2683 %}
aoqi@0 2684 ins_pipe( pipe_slow );
aoqi@0 2685 %}
aoqi@0 2686
aoqi@0 2687 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 2688 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 2689 match(Set dst (AddVS src1 src2));
aoqi@0 2690 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %}
aoqi@0 2691 ins_encode %{
aoqi@0 2692 bool vector256 = false;
aoqi@0 2693 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2694 %}
aoqi@0 2695 ins_pipe( pipe_slow );
aoqi@0 2696 %}
aoqi@0 2697
aoqi@0 2698 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 2699 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 2700 match(Set dst (AddVS src (LoadVector mem)));
aoqi@0 2701 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %}
aoqi@0 2702 ins_encode %{
aoqi@0 2703 bool vector256 = false;
aoqi@0 2704 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2705 %}
aoqi@0 2706 ins_pipe( pipe_slow );
aoqi@0 2707 %}
aoqi@0 2708
aoqi@0 2709 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 2710 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 2711 match(Set dst (AddVS src1 src2));
aoqi@0 2712 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %}
aoqi@0 2713 ins_encode %{
aoqi@0 2714 bool vector256 = true;
aoqi@0 2715 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2716 %}
aoqi@0 2717 ins_pipe( pipe_slow );
aoqi@0 2718 %}
aoqi@0 2719
aoqi@0 2720 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 2721 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 2722 match(Set dst (AddVS src (LoadVector mem)));
aoqi@0 2723 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %}
aoqi@0 2724 ins_encode %{
aoqi@0 2725 bool vector256 = true;
aoqi@0 2726 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2727 %}
aoqi@0 2728 ins_pipe( pipe_slow );
aoqi@0 2729 %}
aoqi@0 2730
aoqi@0 2731 // Integers vector add
aoqi@0 2732 instruct vadd2I(vecD dst, vecD src) %{
aoqi@0 2733 predicate(n->as_Vector()->length() == 2);
aoqi@0 2734 match(Set dst (AddVI dst src));
aoqi@0 2735 format %{ "paddd $dst,$src\t! add packed2I" %}
aoqi@0 2736 ins_encode %{
aoqi@0 2737 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2738 %}
aoqi@0 2739 ins_pipe( pipe_slow );
aoqi@0 2740 %}
aoqi@0 2741
aoqi@0 2742 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 2743 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 2744 match(Set dst (AddVI src1 src2));
aoqi@0 2745 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %}
aoqi@0 2746 ins_encode %{
aoqi@0 2747 bool vector256 = false;
aoqi@0 2748 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2749 %}
aoqi@0 2750 ins_pipe( pipe_slow );
aoqi@0 2751 %}
aoqi@0 2752
aoqi@0 2753 instruct vadd4I(vecX dst, vecX src) %{
aoqi@0 2754 predicate(n->as_Vector()->length() == 4);
aoqi@0 2755 match(Set dst (AddVI dst src));
aoqi@0 2756 format %{ "paddd $dst,$src\t! add packed4I" %}
aoqi@0 2757 ins_encode %{
aoqi@0 2758 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2759 %}
aoqi@0 2760 ins_pipe( pipe_slow );
aoqi@0 2761 %}
aoqi@0 2762
aoqi@0 2763 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 2764 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 2765 match(Set dst (AddVI src1 src2));
aoqi@0 2766 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %}
aoqi@0 2767 ins_encode %{
aoqi@0 2768 bool vector256 = false;
aoqi@0 2769 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2770 %}
aoqi@0 2771 ins_pipe( pipe_slow );
aoqi@0 2772 %}
aoqi@0 2773
aoqi@0 2774 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 2775 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 2776 match(Set dst (AddVI src (LoadVector mem)));
aoqi@0 2777 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %}
aoqi@0 2778 ins_encode %{
aoqi@0 2779 bool vector256 = false;
aoqi@0 2780 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2781 %}
aoqi@0 2782 ins_pipe( pipe_slow );
aoqi@0 2783 %}
aoqi@0 2784
aoqi@0 2785 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 2786 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 2787 match(Set dst (AddVI src1 src2));
aoqi@0 2788 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %}
aoqi@0 2789 ins_encode %{
aoqi@0 2790 bool vector256 = true;
aoqi@0 2791 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2792 %}
aoqi@0 2793 ins_pipe( pipe_slow );
aoqi@0 2794 %}
aoqi@0 2795
aoqi@0 2796 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 2797 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 2798 match(Set dst (AddVI src (LoadVector mem)));
aoqi@0 2799 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %}
aoqi@0 2800 ins_encode %{
aoqi@0 2801 bool vector256 = true;
aoqi@0 2802 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2803 %}
aoqi@0 2804 ins_pipe( pipe_slow );
aoqi@0 2805 %}
aoqi@0 2806
aoqi@0 2807 // Longs vector add
aoqi@0 2808 instruct vadd2L(vecX dst, vecX src) %{
aoqi@0 2809 predicate(n->as_Vector()->length() == 2);
aoqi@0 2810 match(Set dst (AddVL dst src));
aoqi@0 2811 format %{ "paddq $dst,$src\t! add packed2L" %}
aoqi@0 2812 ins_encode %{
aoqi@0 2813 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2814 %}
aoqi@0 2815 ins_pipe( pipe_slow );
aoqi@0 2816 %}
aoqi@0 2817
aoqi@0 2818 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 2819 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 2820 match(Set dst (AddVL src1 src2));
aoqi@0 2821 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %}
aoqi@0 2822 ins_encode %{
aoqi@0 2823 bool vector256 = false;
aoqi@0 2824 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2825 %}
aoqi@0 2826 ins_pipe( pipe_slow );
aoqi@0 2827 %}
aoqi@0 2828
aoqi@0 2829 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 2830 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 2831 match(Set dst (AddVL src (LoadVector mem)));
aoqi@0 2832 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %}
aoqi@0 2833 ins_encode %{
aoqi@0 2834 bool vector256 = false;
aoqi@0 2835 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2836 %}
aoqi@0 2837 ins_pipe( pipe_slow );
aoqi@0 2838 %}
aoqi@0 2839
aoqi@0 2840 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 2841 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
aoqi@0 2842 match(Set dst (AddVL src1 src2));
aoqi@0 2843 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %}
aoqi@0 2844 ins_encode %{
aoqi@0 2845 bool vector256 = true;
aoqi@0 2846 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2847 %}
aoqi@0 2848 ins_pipe( pipe_slow );
aoqi@0 2849 %}
aoqi@0 2850
aoqi@0 2851 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 2852 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
aoqi@0 2853 match(Set dst (AddVL src (LoadVector mem)));
aoqi@0 2854 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %}
aoqi@0 2855 ins_encode %{
aoqi@0 2856 bool vector256 = true;
aoqi@0 2857 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2858 %}
aoqi@0 2859 ins_pipe( pipe_slow );
aoqi@0 2860 %}
aoqi@0 2861
aoqi@0 2862 // Floats vector add
aoqi@0 2863 instruct vadd2F(vecD dst, vecD src) %{
aoqi@0 2864 predicate(n->as_Vector()->length() == 2);
aoqi@0 2865 match(Set dst (AddVF dst src));
aoqi@0 2866 format %{ "addps $dst,$src\t! add packed2F" %}
aoqi@0 2867 ins_encode %{
aoqi@0 2868 __ addps($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2869 %}
aoqi@0 2870 ins_pipe( pipe_slow );
aoqi@0 2871 %}
aoqi@0 2872
aoqi@0 2873 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 2874 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 2875 match(Set dst (AddVF src1 src2));
aoqi@0 2876 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %}
aoqi@0 2877 ins_encode %{
aoqi@0 2878 bool vector256 = false;
aoqi@0 2879 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2880 %}
aoqi@0 2881 ins_pipe( pipe_slow );
aoqi@0 2882 %}
aoqi@0 2883
aoqi@0 2884 instruct vadd4F(vecX dst, vecX src) %{
aoqi@0 2885 predicate(n->as_Vector()->length() == 4);
aoqi@0 2886 match(Set dst (AddVF dst src));
aoqi@0 2887 format %{ "addps $dst,$src\t! add packed4F" %}
aoqi@0 2888 ins_encode %{
aoqi@0 2889 __ addps($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2890 %}
aoqi@0 2891 ins_pipe( pipe_slow );
aoqi@0 2892 %}
aoqi@0 2893
aoqi@0 2894 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 2895 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 2896 match(Set dst (AddVF src1 src2));
aoqi@0 2897 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %}
aoqi@0 2898 ins_encode %{
aoqi@0 2899 bool vector256 = false;
aoqi@0 2900 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2901 %}
aoqi@0 2902 ins_pipe( pipe_slow );
aoqi@0 2903 %}
aoqi@0 2904
aoqi@0 2905 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 2906 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 2907 match(Set dst (AddVF src (LoadVector mem)));
aoqi@0 2908 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %}
aoqi@0 2909 ins_encode %{
aoqi@0 2910 bool vector256 = false;
aoqi@0 2911 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2912 %}
aoqi@0 2913 ins_pipe( pipe_slow );
aoqi@0 2914 %}
aoqi@0 2915
aoqi@0 2916 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 2917 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 2918 match(Set dst (AddVF src1 src2));
aoqi@0 2919 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %}
aoqi@0 2920 ins_encode %{
aoqi@0 2921 bool vector256 = true;
aoqi@0 2922 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2923 %}
aoqi@0 2924 ins_pipe( pipe_slow );
aoqi@0 2925 %}
aoqi@0 2926
aoqi@0 2927 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 2928 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 2929 match(Set dst (AddVF src (LoadVector mem)));
aoqi@0 2930 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %}
aoqi@0 2931 ins_encode %{
aoqi@0 2932 bool vector256 = true;
aoqi@0 2933 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2934 %}
aoqi@0 2935 ins_pipe( pipe_slow );
aoqi@0 2936 %}
aoqi@0 2937
aoqi@0 2938 // Doubles vector add
aoqi@0 2939 instruct vadd2D(vecX dst, vecX src) %{
aoqi@0 2940 predicate(n->as_Vector()->length() == 2);
aoqi@0 2941 match(Set dst (AddVD dst src));
aoqi@0 2942 format %{ "addpd $dst,$src\t! add packed2D" %}
aoqi@0 2943 ins_encode %{
aoqi@0 2944 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 2945 %}
aoqi@0 2946 ins_pipe( pipe_slow );
aoqi@0 2947 %}
aoqi@0 2948
aoqi@0 2949 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 2950 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 2951 match(Set dst (AddVD src1 src2));
aoqi@0 2952 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %}
aoqi@0 2953 ins_encode %{
aoqi@0 2954 bool vector256 = false;
aoqi@0 2955 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2956 %}
aoqi@0 2957 ins_pipe( pipe_slow );
aoqi@0 2958 %}
aoqi@0 2959
aoqi@0 2960 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 2961 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 2962 match(Set dst (AddVD src (LoadVector mem)));
aoqi@0 2963 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %}
aoqi@0 2964 ins_encode %{
aoqi@0 2965 bool vector256 = false;
aoqi@0 2966 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2967 %}
aoqi@0 2968 ins_pipe( pipe_slow );
aoqi@0 2969 %}
aoqi@0 2970
aoqi@0 2971 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 2972 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 2973 match(Set dst (AddVD src1 src2));
aoqi@0 2974 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %}
aoqi@0 2975 ins_encode %{
aoqi@0 2976 bool vector256 = true;
aoqi@0 2977 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 2978 %}
aoqi@0 2979 ins_pipe( pipe_slow );
aoqi@0 2980 %}
aoqi@0 2981
aoqi@0 2982 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 2983 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 2984 match(Set dst (AddVD src (LoadVector mem)));
aoqi@0 2985 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %}
aoqi@0 2986 ins_encode %{
aoqi@0 2987 bool vector256 = true;
aoqi@0 2988 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 2989 %}
aoqi@0 2990 ins_pipe( pipe_slow );
aoqi@0 2991 %}
aoqi@0 2992
aoqi@0 2993 // --------------------------------- SUB --------------------------------------
aoqi@0 2994
aoqi@0 2995 // Bytes vector sub
aoqi@0 2996 instruct vsub4B(vecS dst, vecS src) %{
aoqi@0 2997 predicate(n->as_Vector()->length() == 4);
aoqi@0 2998 match(Set dst (SubVB dst src));
aoqi@0 2999 format %{ "psubb $dst,$src\t! sub packed4B" %}
aoqi@0 3000 ins_encode %{
aoqi@0 3001 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3002 %}
aoqi@0 3003 ins_pipe( pipe_slow );
aoqi@0 3004 %}
aoqi@0 3005
aoqi@0 3006 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{
aoqi@0 3007 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3008 match(Set dst (SubVB src1 src2));
aoqi@0 3009 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %}
aoqi@0 3010 ins_encode %{
aoqi@0 3011 bool vector256 = false;
aoqi@0 3012 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3013 %}
aoqi@0 3014 ins_pipe( pipe_slow );
aoqi@0 3015 %}
aoqi@0 3016
aoqi@0 3017 instruct vsub8B(vecD dst, vecD src) %{
aoqi@0 3018 predicate(n->as_Vector()->length() == 8);
aoqi@0 3019 match(Set dst (SubVB dst src));
aoqi@0 3020 format %{ "psubb $dst,$src\t! sub packed8B" %}
aoqi@0 3021 ins_encode %{
aoqi@0 3022 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3023 %}
aoqi@0 3024 ins_pipe( pipe_slow );
aoqi@0 3025 %}
aoqi@0 3026
aoqi@0 3027 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 3028 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 3029 match(Set dst (SubVB src1 src2));
aoqi@0 3030 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %}
aoqi@0 3031 ins_encode %{
aoqi@0 3032 bool vector256 = false;
aoqi@0 3033 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3034 %}
aoqi@0 3035 ins_pipe( pipe_slow );
aoqi@0 3036 %}
aoqi@0 3037
aoqi@0 3038 instruct vsub16B(vecX dst, vecX src) %{
aoqi@0 3039 predicate(n->as_Vector()->length() == 16);
aoqi@0 3040 match(Set dst (SubVB dst src));
aoqi@0 3041 format %{ "psubb $dst,$src\t! sub packed16B" %}
aoqi@0 3042 ins_encode %{
aoqi@0 3043 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3044 %}
aoqi@0 3045 ins_pipe( pipe_slow );
aoqi@0 3046 %}
aoqi@0 3047
aoqi@0 3048 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3049 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
aoqi@0 3050 match(Set dst (SubVB src1 src2));
aoqi@0 3051 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %}
aoqi@0 3052 ins_encode %{
aoqi@0 3053 bool vector256 = false;
aoqi@0 3054 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3055 %}
aoqi@0 3056 ins_pipe( pipe_slow );
aoqi@0 3057 %}
aoqi@0 3058
aoqi@0 3059 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3060 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
aoqi@0 3061 match(Set dst (SubVB src (LoadVector mem)));
aoqi@0 3062 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %}
aoqi@0 3063 ins_encode %{
aoqi@0 3064 bool vector256 = false;
aoqi@0 3065 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3066 %}
aoqi@0 3067 ins_pipe( pipe_slow );
aoqi@0 3068 %}
aoqi@0 3069
aoqi@0 3070 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3071 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
aoqi@0 3072 match(Set dst (SubVB src1 src2));
aoqi@0 3073 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %}
aoqi@0 3074 ins_encode %{
aoqi@0 3075 bool vector256 = true;
aoqi@0 3076 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3077 %}
aoqi@0 3078 ins_pipe( pipe_slow );
aoqi@0 3079 %}
aoqi@0 3080
aoqi@0 3081 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3082 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
aoqi@0 3083 match(Set dst (SubVB src (LoadVector mem)));
aoqi@0 3084 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %}
aoqi@0 3085 ins_encode %{
aoqi@0 3086 bool vector256 = true;
aoqi@0 3087 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3088 %}
aoqi@0 3089 ins_pipe( pipe_slow );
aoqi@0 3090 %}
aoqi@0 3091
aoqi@0 3092 // Shorts/Chars vector sub
aoqi@0 3093 instruct vsub2S(vecS dst, vecS src) %{
aoqi@0 3094 predicate(n->as_Vector()->length() == 2);
aoqi@0 3095 match(Set dst (SubVS dst src));
aoqi@0 3096 format %{ "psubw $dst,$src\t! sub packed2S" %}
aoqi@0 3097 ins_encode %{
aoqi@0 3098 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3099 %}
aoqi@0 3100 ins_pipe( pipe_slow );
aoqi@0 3101 %}
aoqi@0 3102
aoqi@0 3103 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{
aoqi@0 3104 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3105 match(Set dst (SubVS src1 src2));
aoqi@0 3106 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %}
aoqi@0 3107 ins_encode %{
aoqi@0 3108 bool vector256 = false;
aoqi@0 3109 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3110 %}
aoqi@0 3111 ins_pipe( pipe_slow );
aoqi@0 3112 %}
aoqi@0 3113
aoqi@0 3114 instruct vsub4S(vecD dst, vecD src) %{
aoqi@0 3115 predicate(n->as_Vector()->length() == 4);
aoqi@0 3116 match(Set dst (SubVS dst src));
aoqi@0 3117 format %{ "psubw $dst,$src\t! sub packed4S" %}
aoqi@0 3118 ins_encode %{
aoqi@0 3119 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3120 %}
aoqi@0 3121 ins_pipe( pipe_slow );
aoqi@0 3122 %}
aoqi@0 3123
aoqi@0 3124 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 3125 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3126 match(Set dst (SubVS src1 src2));
aoqi@0 3127 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %}
aoqi@0 3128 ins_encode %{
aoqi@0 3129 bool vector256 = false;
aoqi@0 3130 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3131 %}
aoqi@0 3132 ins_pipe( pipe_slow );
aoqi@0 3133 %}
aoqi@0 3134
aoqi@0 3135 instruct vsub8S(vecX dst, vecX src) %{
aoqi@0 3136 predicate(n->as_Vector()->length() == 8);
aoqi@0 3137 match(Set dst (SubVS dst src));
aoqi@0 3138 format %{ "psubw $dst,$src\t! sub packed8S" %}
aoqi@0 3139 ins_encode %{
aoqi@0 3140 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3141 %}
aoqi@0 3142 ins_pipe( pipe_slow );
aoqi@0 3143 %}
aoqi@0 3144
aoqi@0 3145 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3146 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 3147 match(Set dst (SubVS src1 src2));
aoqi@0 3148 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %}
aoqi@0 3149 ins_encode %{
aoqi@0 3150 bool vector256 = false;
aoqi@0 3151 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3152 %}
aoqi@0 3153 ins_pipe( pipe_slow );
aoqi@0 3154 %}
aoqi@0 3155
aoqi@0 3156 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3157 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 3158 match(Set dst (SubVS src (LoadVector mem)));
aoqi@0 3159 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %}
aoqi@0 3160 ins_encode %{
aoqi@0 3161 bool vector256 = false;
aoqi@0 3162 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3163 %}
aoqi@0 3164 ins_pipe( pipe_slow );
aoqi@0 3165 %}
aoqi@0 3166
aoqi@0 3167 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3168 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 3169 match(Set dst (SubVS src1 src2));
aoqi@0 3170 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %}
aoqi@0 3171 ins_encode %{
aoqi@0 3172 bool vector256 = true;
aoqi@0 3173 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3174 %}
aoqi@0 3175 ins_pipe( pipe_slow );
aoqi@0 3176 %}
aoqi@0 3177
aoqi@0 3178 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3179 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 3180 match(Set dst (SubVS src (LoadVector mem)));
aoqi@0 3181 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %}
aoqi@0 3182 ins_encode %{
aoqi@0 3183 bool vector256 = true;
aoqi@0 3184 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3185 %}
aoqi@0 3186 ins_pipe( pipe_slow );
aoqi@0 3187 %}
aoqi@0 3188
aoqi@0 3189 // Integers vector sub
aoqi@0 3190 instruct vsub2I(vecD dst, vecD src) %{
aoqi@0 3191 predicate(n->as_Vector()->length() == 2);
aoqi@0 3192 match(Set dst (SubVI dst src));
aoqi@0 3193 format %{ "psubd $dst,$src\t! sub packed2I" %}
aoqi@0 3194 ins_encode %{
aoqi@0 3195 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3196 %}
aoqi@0 3197 ins_pipe( pipe_slow );
aoqi@0 3198 %}
aoqi@0 3199
aoqi@0 3200 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 3201 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3202 match(Set dst (SubVI src1 src2));
aoqi@0 3203 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %}
aoqi@0 3204 ins_encode %{
aoqi@0 3205 bool vector256 = false;
aoqi@0 3206 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3207 %}
aoqi@0 3208 ins_pipe( pipe_slow );
aoqi@0 3209 %}
aoqi@0 3210
aoqi@0 3211 instruct vsub4I(vecX dst, vecX src) %{
aoqi@0 3212 predicate(n->as_Vector()->length() == 4);
aoqi@0 3213 match(Set dst (SubVI dst src));
aoqi@0 3214 format %{ "psubd $dst,$src\t! sub packed4I" %}
aoqi@0 3215 ins_encode %{
aoqi@0 3216 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3217 %}
aoqi@0 3218 ins_pipe( pipe_slow );
aoqi@0 3219 %}
aoqi@0 3220
aoqi@0 3221 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3222 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3223 match(Set dst (SubVI src1 src2));
aoqi@0 3224 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %}
aoqi@0 3225 ins_encode %{
aoqi@0 3226 bool vector256 = false;
aoqi@0 3227 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3228 %}
aoqi@0 3229 ins_pipe( pipe_slow );
aoqi@0 3230 %}
aoqi@0 3231
aoqi@0 3232 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3233 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3234 match(Set dst (SubVI src (LoadVector mem)));
aoqi@0 3235 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %}
aoqi@0 3236 ins_encode %{
aoqi@0 3237 bool vector256 = false;
aoqi@0 3238 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3239 %}
aoqi@0 3240 ins_pipe( pipe_slow );
aoqi@0 3241 %}
aoqi@0 3242
aoqi@0 3243 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3244 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 3245 match(Set dst (SubVI src1 src2));
aoqi@0 3246 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %}
aoqi@0 3247 ins_encode %{
aoqi@0 3248 bool vector256 = true;
aoqi@0 3249 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3250 %}
aoqi@0 3251 ins_pipe( pipe_slow );
aoqi@0 3252 %}
aoqi@0 3253
aoqi@0 3254 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3255 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 3256 match(Set dst (SubVI src (LoadVector mem)));
aoqi@0 3257 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %}
aoqi@0 3258 ins_encode %{
aoqi@0 3259 bool vector256 = true;
aoqi@0 3260 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3261 %}
aoqi@0 3262 ins_pipe( pipe_slow );
aoqi@0 3263 %}
aoqi@0 3264
aoqi@0 3265 // Longs vector sub
aoqi@0 3266 instruct vsub2L(vecX dst, vecX src) %{
aoqi@0 3267 predicate(n->as_Vector()->length() == 2);
aoqi@0 3268 match(Set dst (SubVL dst src));
aoqi@0 3269 format %{ "psubq $dst,$src\t! sub packed2L" %}
aoqi@0 3270 ins_encode %{
aoqi@0 3271 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3272 %}
aoqi@0 3273 ins_pipe( pipe_slow );
aoqi@0 3274 %}
aoqi@0 3275
aoqi@0 3276 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3277 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3278 match(Set dst (SubVL src1 src2));
aoqi@0 3279 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %}
aoqi@0 3280 ins_encode %{
aoqi@0 3281 bool vector256 = false;
aoqi@0 3282 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3283 %}
aoqi@0 3284 ins_pipe( pipe_slow );
aoqi@0 3285 %}
aoqi@0 3286
aoqi@0 3287 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3288 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3289 match(Set dst (SubVL src (LoadVector mem)));
aoqi@0 3290 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %}
aoqi@0 3291 ins_encode %{
aoqi@0 3292 bool vector256 = false;
aoqi@0 3293 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3294 %}
aoqi@0 3295 ins_pipe( pipe_slow );
aoqi@0 3296 %}
aoqi@0 3297
aoqi@0 3298 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3299 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
aoqi@0 3300 match(Set dst (SubVL src1 src2));
aoqi@0 3301 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %}
aoqi@0 3302 ins_encode %{
aoqi@0 3303 bool vector256 = true;
aoqi@0 3304 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3305 %}
aoqi@0 3306 ins_pipe( pipe_slow );
aoqi@0 3307 %}
aoqi@0 3308
aoqi@0 3309 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3310 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
aoqi@0 3311 match(Set dst (SubVL src (LoadVector mem)));
aoqi@0 3312 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %}
aoqi@0 3313 ins_encode %{
aoqi@0 3314 bool vector256 = true;
aoqi@0 3315 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3316 %}
aoqi@0 3317 ins_pipe( pipe_slow );
aoqi@0 3318 %}
aoqi@0 3319
aoqi@0 3320 // Floats vector sub
aoqi@0 3321 instruct vsub2F(vecD dst, vecD src) %{
aoqi@0 3322 predicate(n->as_Vector()->length() == 2);
aoqi@0 3323 match(Set dst (SubVF dst src));
aoqi@0 3324 format %{ "subps $dst,$src\t! sub packed2F" %}
aoqi@0 3325 ins_encode %{
aoqi@0 3326 __ subps($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3327 %}
aoqi@0 3328 ins_pipe( pipe_slow );
aoqi@0 3329 %}
aoqi@0 3330
aoqi@0 3331 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 3332 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3333 match(Set dst (SubVF src1 src2));
aoqi@0 3334 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %}
aoqi@0 3335 ins_encode %{
aoqi@0 3336 bool vector256 = false;
aoqi@0 3337 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3338 %}
aoqi@0 3339 ins_pipe( pipe_slow );
aoqi@0 3340 %}
aoqi@0 3341
aoqi@0 3342 instruct vsub4F(vecX dst, vecX src) %{
aoqi@0 3343 predicate(n->as_Vector()->length() == 4);
aoqi@0 3344 match(Set dst (SubVF dst src));
aoqi@0 3345 format %{ "subps $dst,$src\t! sub packed4F" %}
aoqi@0 3346 ins_encode %{
aoqi@0 3347 __ subps($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3348 %}
aoqi@0 3349 ins_pipe( pipe_slow );
aoqi@0 3350 %}
aoqi@0 3351
aoqi@0 3352 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3353 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3354 match(Set dst (SubVF src1 src2));
aoqi@0 3355 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %}
aoqi@0 3356 ins_encode %{
aoqi@0 3357 bool vector256 = false;
aoqi@0 3358 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3359 %}
aoqi@0 3360 ins_pipe( pipe_slow );
aoqi@0 3361 %}
aoqi@0 3362
aoqi@0 3363 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3364 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3365 match(Set dst (SubVF src (LoadVector mem)));
aoqi@0 3366 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %}
aoqi@0 3367 ins_encode %{
aoqi@0 3368 bool vector256 = false;
aoqi@0 3369 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3370 %}
aoqi@0 3371 ins_pipe( pipe_slow );
aoqi@0 3372 %}
aoqi@0 3373
aoqi@0 3374 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3375 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 3376 match(Set dst (SubVF src1 src2));
aoqi@0 3377 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %}
aoqi@0 3378 ins_encode %{
aoqi@0 3379 bool vector256 = true;
aoqi@0 3380 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3381 %}
aoqi@0 3382 ins_pipe( pipe_slow );
aoqi@0 3383 %}
aoqi@0 3384
aoqi@0 3385 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3386 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 3387 match(Set dst (SubVF src (LoadVector mem)));
aoqi@0 3388 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %}
aoqi@0 3389 ins_encode %{
aoqi@0 3390 bool vector256 = true;
aoqi@0 3391 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3392 %}
aoqi@0 3393 ins_pipe( pipe_slow );
aoqi@0 3394 %}
aoqi@0 3395
aoqi@0 3396 // Doubles vector sub
aoqi@0 3397 instruct vsub2D(vecX dst, vecX src) %{
aoqi@0 3398 predicate(n->as_Vector()->length() == 2);
aoqi@0 3399 match(Set dst (SubVD dst src));
aoqi@0 3400 format %{ "subpd $dst,$src\t! sub packed2D" %}
aoqi@0 3401 ins_encode %{
aoqi@0 3402 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3403 %}
aoqi@0 3404 ins_pipe( pipe_slow );
aoqi@0 3405 %}
aoqi@0 3406
aoqi@0 3407 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3408 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3409 match(Set dst (SubVD src1 src2));
aoqi@0 3410 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %}
aoqi@0 3411 ins_encode %{
aoqi@0 3412 bool vector256 = false;
aoqi@0 3413 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3414 %}
aoqi@0 3415 ins_pipe( pipe_slow );
aoqi@0 3416 %}
aoqi@0 3417
aoqi@0 3418 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3419 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3420 match(Set dst (SubVD src (LoadVector mem)));
aoqi@0 3421 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %}
aoqi@0 3422 ins_encode %{
aoqi@0 3423 bool vector256 = false;
aoqi@0 3424 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3425 %}
aoqi@0 3426 ins_pipe( pipe_slow );
aoqi@0 3427 %}
aoqi@0 3428
aoqi@0 3429 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3430 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3431 match(Set dst (SubVD src1 src2));
aoqi@0 3432 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %}
aoqi@0 3433 ins_encode %{
aoqi@0 3434 bool vector256 = true;
aoqi@0 3435 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3436 %}
aoqi@0 3437 ins_pipe( pipe_slow );
aoqi@0 3438 %}
aoqi@0 3439
aoqi@0 3440 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3441 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3442 match(Set dst (SubVD src (LoadVector mem)));
aoqi@0 3443 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %}
aoqi@0 3444 ins_encode %{
aoqi@0 3445 bool vector256 = true;
aoqi@0 3446 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3447 %}
aoqi@0 3448 ins_pipe( pipe_slow );
aoqi@0 3449 %}
aoqi@0 3450
aoqi@0 3451 // --------------------------------- MUL --------------------------------------
aoqi@0 3452
aoqi@0 3453 // Shorts/Chars vector mul
aoqi@0 3454 instruct vmul2S(vecS dst, vecS src) %{
aoqi@0 3455 predicate(n->as_Vector()->length() == 2);
aoqi@0 3456 match(Set dst (MulVS dst src));
aoqi@0 3457 format %{ "pmullw $dst,$src\t! mul packed2S" %}
aoqi@0 3458 ins_encode %{
aoqi@0 3459 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3460 %}
aoqi@0 3461 ins_pipe( pipe_slow );
aoqi@0 3462 %}
aoqi@0 3463
aoqi@0 3464 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{
aoqi@0 3465 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3466 match(Set dst (MulVS src1 src2));
aoqi@0 3467 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
aoqi@0 3468 ins_encode %{
aoqi@0 3469 bool vector256 = false;
aoqi@0 3470 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3471 %}
aoqi@0 3472 ins_pipe( pipe_slow );
aoqi@0 3473 %}
aoqi@0 3474
aoqi@0 3475 instruct vmul4S(vecD dst, vecD src) %{
aoqi@0 3476 predicate(n->as_Vector()->length() == 4);
aoqi@0 3477 match(Set dst (MulVS dst src));
aoqi@0 3478 format %{ "pmullw $dst,$src\t! mul packed4S" %}
aoqi@0 3479 ins_encode %{
aoqi@0 3480 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3481 %}
aoqi@0 3482 ins_pipe( pipe_slow );
aoqi@0 3483 %}
aoqi@0 3484
aoqi@0 3485 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 3486 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3487 match(Set dst (MulVS src1 src2));
aoqi@0 3488 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
aoqi@0 3489 ins_encode %{
aoqi@0 3490 bool vector256 = false;
aoqi@0 3491 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3492 %}
aoqi@0 3493 ins_pipe( pipe_slow );
aoqi@0 3494 %}
aoqi@0 3495
aoqi@0 3496 instruct vmul8S(vecX dst, vecX src) %{
aoqi@0 3497 predicate(n->as_Vector()->length() == 8);
aoqi@0 3498 match(Set dst (MulVS dst src));
aoqi@0 3499 format %{ "pmullw $dst,$src\t! mul packed8S" %}
aoqi@0 3500 ins_encode %{
aoqi@0 3501 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3502 %}
aoqi@0 3503 ins_pipe( pipe_slow );
aoqi@0 3504 %}
aoqi@0 3505
aoqi@0 3506 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3507 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 3508 match(Set dst (MulVS src1 src2));
aoqi@0 3509 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
aoqi@0 3510 ins_encode %{
aoqi@0 3511 bool vector256 = false;
aoqi@0 3512 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3513 %}
aoqi@0 3514 ins_pipe( pipe_slow );
aoqi@0 3515 %}
aoqi@0 3516
aoqi@0 3517 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3518 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 3519 match(Set dst (MulVS src (LoadVector mem)));
aoqi@0 3520 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
aoqi@0 3521 ins_encode %{
aoqi@0 3522 bool vector256 = false;
aoqi@0 3523 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3524 %}
aoqi@0 3525 ins_pipe( pipe_slow );
aoqi@0 3526 %}
aoqi@0 3527
aoqi@0 3528 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3529 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 3530 match(Set dst (MulVS src1 src2));
aoqi@0 3531 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
aoqi@0 3532 ins_encode %{
aoqi@0 3533 bool vector256 = true;
aoqi@0 3534 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3535 %}
aoqi@0 3536 ins_pipe( pipe_slow );
aoqi@0 3537 %}
aoqi@0 3538
aoqi@0 3539 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3540 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 3541 match(Set dst (MulVS src (LoadVector mem)));
aoqi@0 3542 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
aoqi@0 3543 ins_encode %{
aoqi@0 3544 bool vector256 = true;
aoqi@0 3545 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3546 %}
aoqi@0 3547 ins_pipe( pipe_slow );
aoqi@0 3548 %}
aoqi@0 3549
aoqi@0 3550 // Integers vector mul (sse4_1)
aoqi@0 3551 instruct vmul2I(vecD dst, vecD src) %{
aoqi@0 3552 predicate(UseSSE > 3 && n->as_Vector()->length() == 2);
aoqi@0 3553 match(Set dst (MulVI dst src));
aoqi@0 3554 format %{ "pmulld $dst,$src\t! mul packed2I" %}
aoqi@0 3555 ins_encode %{
aoqi@0 3556 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3557 %}
aoqi@0 3558 ins_pipe( pipe_slow );
aoqi@0 3559 %}
aoqi@0 3560
aoqi@0 3561 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 3562 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3563 match(Set dst (MulVI src1 src2));
aoqi@0 3564 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %}
aoqi@0 3565 ins_encode %{
aoqi@0 3566 bool vector256 = false;
aoqi@0 3567 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3568 %}
aoqi@0 3569 ins_pipe( pipe_slow );
aoqi@0 3570 %}
aoqi@0 3571
aoqi@0 3572 instruct vmul4I(vecX dst, vecX src) %{
aoqi@0 3573 predicate(UseSSE > 3 && n->as_Vector()->length() == 4);
aoqi@0 3574 match(Set dst (MulVI dst src));
aoqi@0 3575 format %{ "pmulld $dst,$src\t! mul packed4I" %}
aoqi@0 3576 ins_encode %{
aoqi@0 3577 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3578 %}
aoqi@0 3579 ins_pipe( pipe_slow );
aoqi@0 3580 %}
aoqi@0 3581
aoqi@0 3582 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3583 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3584 match(Set dst (MulVI src1 src2));
aoqi@0 3585 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %}
aoqi@0 3586 ins_encode %{
aoqi@0 3587 bool vector256 = false;
aoqi@0 3588 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3589 %}
aoqi@0 3590 ins_pipe( pipe_slow );
aoqi@0 3591 %}
aoqi@0 3592
aoqi@0 3593 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3594 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3595 match(Set dst (MulVI src (LoadVector mem)));
aoqi@0 3596 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %}
aoqi@0 3597 ins_encode %{
aoqi@0 3598 bool vector256 = false;
aoqi@0 3599 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3600 %}
aoqi@0 3601 ins_pipe( pipe_slow );
aoqi@0 3602 %}
aoqi@0 3603
aoqi@0 3604 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3605 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 3606 match(Set dst (MulVI src1 src2));
aoqi@0 3607 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %}
aoqi@0 3608 ins_encode %{
aoqi@0 3609 bool vector256 = true;
aoqi@0 3610 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3611 %}
aoqi@0 3612 ins_pipe( pipe_slow );
aoqi@0 3613 %}
aoqi@0 3614
aoqi@0 3615 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3616 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 3617 match(Set dst (MulVI src (LoadVector mem)));
aoqi@0 3618 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %}
aoqi@0 3619 ins_encode %{
aoqi@0 3620 bool vector256 = true;
aoqi@0 3621 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3622 %}
aoqi@0 3623 ins_pipe( pipe_slow );
aoqi@0 3624 %}
aoqi@0 3625
aoqi@0 3626 // Floats vector mul
aoqi@0 3627 instruct vmul2F(vecD dst, vecD src) %{
aoqi@0 3628 predicate(n->as_Vector()->length() == 2);
aoqi@0 3629 match(Set dst (MulVF dst src));
aoqi@0 3630 format %{ "mulps $dst,$src\t! mul packed2F" %}
aoqi@0 3631 ins_encode %{
aoqi@0 3632 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3633 %}
aoqi@0 3634 ins_pipe( pipe_slow );
aoqi@0 3635 %}
aoqi@0 3636
aoqi@0 3637 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 3638 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3639 match(Set dst (MulVF src1 src2));
aoqi@0 3640 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %}
aoqi@0 3641 ins_encode %{
aoqi@0 3642 bool vector256 = false;
aoqi@0 3643 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3644 %}
aoqi@0 3645 ins_pipe( pipe_slow );
aoqi@0 3646 %}
aoqi@0 3647
aoqi@0 3648 instruct vmul4F(vecX dst, vecX src) %{
aoqi@0 3649 predicate(n->as_Vector()->length() == 4);
aoqi@0 3650 match(Set dst (MulVF dst src));
aoqi@0 3651 format %{ "mulps $dst,$src\t! mul packed4F" %}
aoqi@0 3652 ins_encode %{
aoqi@0 3653 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3654 %}
aoqi@0 3655 ins_pipe( pipe_slow );
aoqi@0 3656 %}
aoqi@0 3657
aoqi@0 3658 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3659 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3660 match(Set dst (MulVF src1 src2));
aoqi@0 3661 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %}
aoqi@0 3662 ins_encode %{
aoqi@0 3663 bool vector256 = false;
aoqi@0 3664 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3665 %}
aoqi@0 3666 ins_pipe( pipe_slow );
aoqi@0 3667 %}
aoqi@0 3668
aoqi@0 3669 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3670 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3671 match(Set dst (MulVF src (LoadVector mem)));
aoqi@0 3672 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %}
aoqi@0 3673 ins_encode %{
aoqi@0 3674 bool vector256 = false;
aoqi@0 3675 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3676 %}
aoqi@0 3677 ins_pipe( pipe_slow );
aoqi@0 3678 %}
aoqi@0 3679
aoqi@0 3680 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3681 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 3682 match(Set dst (MulVF src1 src2));
aoqi@0 3683 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %}
aoqi@0 3684 ins_encode %{
aoqi@0 3685 bool vector256 = true;
aoqi@0 3686 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3687 %}
aoqi@0 3688 ins_pipe( pipe_slow );
aoqi@0 3689 %}
aoqi@0 3690
aoqi@0 3691 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3692 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 3693 match(Set dst (MulVF src (LoadVector mem)));
aoqi@0 3694 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %}
aoqi@0 3695 ins_encode %{
aoqi@0 3696 bool vector256 = true;
aoqi@0 3697 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3698 %}
aoqi@0 3699 ins_pipe( pipe_slow );
aoqi@0 3700 %}
aoqi@0 3701
aoqi@0 3702 // Doubles vector mul
aoqi@0 3703 instruct vmul2D(vecX dst, vecX src) %{
aoqi@0 3704 predicate(n->as_Vector()->length() == 2);
aoqi@0 3705 match(Set dst (MulVD dst src));
aoqi@0 3706 format %{ "mulpd $dst,$src\t! mul packed2D" %}
aoqi@0 3707 ins_encode %{
aoqi@0 3708 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3709 %}
aoqi@0 3710 ins_pipe( pipe_slow );
aoqi@0 3711 %}
aoqi@0 3712
aoqi@0 3713 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3714 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3715 match(Set dst (MulVD src1 src2));
aoqi@0 3716 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %}
aoqi@0 3717 ins_encode %{
aoqi@0 3718 bool vector256 = false;
aoqi@0 3719 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3720 %}
aoqi@0 3721 ins_pipe( pipe_slow );
aoqi@0 3722 %}
aoqi@0 3723
aoqi@0 3724 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3725 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3726 match(Set dst (MulVD src (LoadVector mem)));
aoqi@0 3727 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %}
aoqi@0 3728 ins_encode %{
aoqi@0 3729 bool vector256 = false;
aoqi@0 3730 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3731 %}
aoqi@0 3732 ins_pipe( pipe_slow );
aoqi@0 3733 %}
aoqi@0 3734
aoqi@0 3735 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3736 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3737 match(Set dst (MulVD src1 src2));
aoqi@0 3738 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %}
aoqi@0 3739 ins_encode %{
aoqi@0 3740 bool vector256 = true;
aoqi@0 3741 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3742 %}
aoqi@0 3743 ins_pipe( pipe_slow );
aoqi@0 3744 %}
aoqi@0 3745
aoqi@0 3746 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3747 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3748 match(Set dst (MulVD src (LoadVector mem)));
aoqi@0 3749 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %}
aoqi@0 3750 ins_encode %{
aoqi@0 3751 bool vector256 = true;
aoqi@0 3752 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3753 %}
aoqi@0 3754 ins_pipe( pipe_slow );
aoqi@0 3755 %}
aoqi@0 3756
aoqi@0 3757 // --------------------------------- DIV --------------------------------------
aoqi@0 3758
aoqi@0 3759 // Floats vector div
aoqi@0 3760 instruct vdiv2F(vecD dst, vecD src) %{
aoqi@0 3761 predicate(n->as_Vector()->length() == 2);
aoqi@0 3762 match(Set dst (DivVF dst src));
aoqi@0 3763 format %{ "divps $dst,$src\t! div packed2F" %}
aoqi@0 3764 ins_encode %{
aoqi@0 3765 __ divps($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3766 %}
aoqi@0 3767 ins_pipe( pipe_slow );
aoqi@0 3768 %}
aoqi@0 3769
aoqi@0 3770 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 3771 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3772 match(Set dst (DivVF src1 src2));
aoqi@0 3773 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %}
aoqi@0 3774 ins_encode %{
aoqi@0 3775 bool vector256 = false;
aoqi@0 3776 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3777 %}
aoqi@0 3778 ins_pipe( pipe_slow );
aoqi@0 3779 %}
aoqi@0 3780
aoqi@0 3781 instruct vdiv4F(vecX dst, vecX src) %{
aoqi@0 3782 predicate(n->as_Vector()->length() == 4);
aoqi@0 3783 match(Set dst (DivVF dst src));
aoqi@0 3784 format %{ "divps $dst,$src\t! div packed4F" %}
aoqi@0 3785 ins_encode %{
aoqi@0 3786 __ divps($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3787 %}
aoqi@0 3788 ins_pipe( pipe_slow );
aoqi@0 3789 %}
aoqi@0 3790
aoqi@0 3791 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3792 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3793 match(Set dst (DivVF src1 src2));
aoqi@0 3794 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %}
aoqi@0 3795 ins_encode %{
aoqi@0 3796 bool vector256 = false;
aoqi@0 3797 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3798 %}
aoqi@0 3799 ins_pipe( pipe_slow );
aoqi@0 3800 %}
aoqi@0 3801
aoqi@0 3802 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3803 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3804 match(Set dst (DivVF src (LoadVector mem)));
aoqi@0 3805 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %}
aoqi@0 3806 ins_encode %{
aoqi@0 3807 bool vector256 = false;
aoqi@0 3808 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3809 %}
aoqi@0 3810 ins_pipe( pipe_slow );
aoqi@0 3811 %}
aoqi@0 3812
aoqi@0 3813 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3814 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 3815 match(Set dst (DivVF src1 src2));
aoqi@0 3816 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %}
aoqi@0 3817 ins_encode %{
aoqi@0 3818 bool vector256 = true;
aoqi@0 3819 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3820 %}
aoqi@0 3821 ins_pipe( pipe_slow );
aoqi@0 3822 %}
aoqi@0 3823
aoqi@0 3824 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3825 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 3826 match(Set dst (DivVF src (LoadVector mem)));
aoqi@0 3827 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %}
aoqi@0 3828 ins_encode %{
aoqi@0 3829 bool vector256 = true;
aoqi@0 3830 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3831 %}
aoqi@0 3832 ins_pipe( pipe_slow );
aoqi@0 3833 %}
aoqi@0 3834
aoqi@0 3835 // Doubles vector div
aoqi@0 3836 instruct vdiv2D(vecX dst, vecX src) %{
aoqi@0 3837 predicate(n->as_Vector()->length() == 2);
aoqi@0 3838 match(Set dst (DivVD dst src));
aoqi@0 3839 format %{ "divpd $dst,$src\t! div packed2D" %}
aoqi@0 3840 ins_encode %{
aoqi@0 3841 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 3842 %}
aoqi@0 3843 ins_pipe( pipe_slow );
aoqi@0 3844 %}
aoqi@0 3845
aoqi@0 3846 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 3847 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3848 match(Set dst (DivVD src1 src2));
aoqi@0 3849 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %}
aoqi@0 3850 ins_encode %{
aoqi@0 3851 bool vector256 = false;
aoqi@0 3852 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3853 %}
aoqi@0 3854 ins_pipe( pipe_slow );
aoqi@0 3855 %}
aoqi@0 3856
aoqi@0 3857 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 3858 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3859 match(Set dst (DivVD src (LoadVector mem)));
aoqi@0 3860 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %}
aoqi@0 3861 ins_encode %{
aoqi@0 3862 bool vector256 = false;
aoqi@0 3863 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3864 %}
aoqi@0 3865 ins_pipe( pipe_slow );
aoqi@0 3866 %}
aoqi@0 3867
aoqi@0 3868 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 3869 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3870 match(Set dst (DivVD src1 src2));
aoqi@0 3871 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %}
aoqi@0 3872 ins_encode %{
aoqi@0 3873 bool vector256 = true;
aoqi@0 3874 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 3875 %}
aoqi@0 3876 ins_pipe( pipe_slow );
aoqi@0 3877 %}
aoqi@0 3878
aoqi@0 3879 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 3880 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3881 match(Set dst (DivVD src (LoadVector mem)));
aoqi@0 3882 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %}
aoqi@0 3883 ins_encode %{
aoqi@0 3884 bool vector256 = true;
aoqi@0 3885 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 3886 %}
aoqi@0 3887 ins_pipe( pipe_slow );
aoqi@0 3888 %}
aoqi@0 3889
aoqi@0 3890 // ------------------------------ Shift ---------------------------------------
aoqi@0 3891
aoqi@0 3892 // Left and right shift count vectors are the same on x86
aoqi@0 3893 // (only lowest bits of xmm reg are used for count).
aoqi@0 3894 instruct vshiftcnt(vecS dst, rRegI cnt) %{
aoqi@0 3895 match(Set dst (LShiftCntV cnt));
aoqi@0 3896 match(Set dst (RShiftCntV cnt));
aoqi@0 3897 format %{ "movd $dst,$cnt\t! load shift count" %}
aoqi@0 3898 ins_encode %{
aoqi@0 3899 __ movdl($dst$$XMMRegister, $cnt$$Register);
aoqi@0 3900 %}
aoqi@0 3901 ins_pipe( pipe_slow );
aoqi@0 3902 %}
aoqi@0 3903
aoqi@0 3904 // ------------------------------ LeftShift -----------------------------------
aoqi@0 3905
aoqi@0 3906 // Shorts/Chars vector left shift
aoqi@0 3907 instruct vsll2S(vecS dst, vecS shift) %{
aoqi@0 3908 predicate(n->as_Vector()->length() == 2);
aoqi@0 3909 match(Set dst (LShiftVS dst shift));
aoqi@0 3910 format %{ "psllw $dst,$shift\t! left shift packed2S" %}
aoqi@0 3911 ins_encode %{
aoqi@0 3912 __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 3913 %}
aoqi@0 3914 ins_pipe( pipe_slow );
aoqi@0 3915 %}
aoqi@0 3916
aoqi@0 3917 instruct vsll2S_imm(vecS dst, immI8 shift) %{
aoqi@0 3918 predicate(n->as_Vector()->length() == 2);
aoqi@0 3919 match(Set dst (LShiftVS dst shift));
aoqi@0 3920 format %{ "psllw $dst,$shift\t! left shift packed2S" %}
aoqi@0 3921 ins_encode %{
aoqi@0 3922 __ psllw($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 3923 %}
aoqi@0 3924 ins_pipe( pipe_slow );
aoqi@0 3925 %}
aoqi@0 3926
aoqi@0 3927 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{
aoqi@0 3928 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3929 match(Set dst (LShiftVS src shift));
aoqi@0 3930 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
aoqi@0 3931 ins_encode %{
aoqi@0 3932 bool vector256 = false;
aoqi@0 3933 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 3934 %}
aoqi@0 3935 ins_pipe( pipe_slow );
aoqi@0 3936 %}
aoqi@0 3937
aoqi@0 3938 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
aoqi@0 3939 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 3940 match(Set dst (LShiftVS src shift));
aoqi@0 3941 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
aoqi@0 3942 ins_encode %{
aoqi@0 3943 bool vector256 = false;
aoqi@0 3944 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 3945 %}
aoqi@0 3946 ins_pipe( pipe_slow );
aoqi@0 3947 %}
aoqi@0 3948
aoqi@0 3949 instruct vsll4S(vecD dst, vecS shift) %{
aoqi@0 3950 predicate(n->as_Vector()->length() == 4);
aoqi@0 3951 match(Set dst (LShiftVS dst shift));
aoqi@0 3952 format %{ "psllw $dst,$shift\t! left shift packed4S" %}
aoqi@0 3953 ins_encode %{
aoqi@0 3954 __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 3955 %}
aoqi@0 3956 ins_pipe( pipe_slow );
aoqi@0 3957 %}
aoqi@0 3958
aoqi@0 3959 instruct vsll4S_imm(vecD dst, immI8 shift) %{
aoqi@0 3960 predicate(n->as_Vector()->length() == 4);
aoqi@0 3961 match(Set dst (LShiftVS dst shift));
aoqi@0 3962 format %{ "psllw $dst,$shift\t! left shift packed4S" %}
aoqi@0 3963 ins_encode %{
aoqi@0 3964 __ psllw($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 3965 %}
aoqi@0 3966 ins_pipe( pipe_slow );
aoqi@0 3967 %}
aoqi@0 3968
aoqi@0 3969 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{
aoqi@0 3970 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3971 match(Set dst (LShiftVS src shift));
aoqi@0 3972 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
aoqi@0 3973 ins_encode %{
aoqi@0 3974 bool vector256 = false;
aoqi@0 3975 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 3976 %}
aoqi@0 3977 ins_pipe( pipe_slow );
aoqi@0 3978 %}
aoqi@0 3979
aoqi@0 3980 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
aoqi@0 3981 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 3982 match(Set dst (LShiftVS src shift));
aoqi@0 3983 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
aoqi@0 3984 ins_encode %{
aoqi@0 3985 bool vector256 = false;
aoqi@0 3986 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 3987 %}
aoqi@0 3988 ins_pipe( pipe_slow );
aoqi@0 3989 %}
aoqi@0 3990
aoqi@0 3991 instruct vsll8S(vecX dst, vecS shift) %{
aoqi@0 3992 predicate(n->as_Vector()->length() == 8);
aoqi@0 3993 match(Set dst (LShiftVS dst shift));
aoqi@0 3994 format %{ "psllw $dst,$shift\t! left shift packed8S" %}
aoqi@0 3995 ins_encode %{
aoqi@0 3996 __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 3997 %}
aoqi@0 3998 ins_pipe( pipe_slow );
aoqi@0 3999 %}
aoqi@0 4000
aoqi@0 4001 instruct vsll8S_imm(vecX dst, immI8 shift) %{
aoqi@0 4002 predicate(n->as_Vector()->length() == 8);
aoqi@0 4003 match(Set dst (LShiftVS dst shift));
aoqi@0 4004 format %{ "psllw $dst,$shift\t! left shift packed8S" %}
aoqi@0 4005 ins_encode %{
aoqi@0 4006 __ psllw($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4007 %}
aoqi@0 4008 ins_pipe( pipe_slow );
aoqi@0 4009 %}
aoqi@0 4010
aoqi@0 4011 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{
aoqi@0 4012 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 4013 match(Set dst (LShiftVS src shift));
aoqi@0 4014 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
aoqi@0 4015 ins_encode %{
aoqi@0 4016 bool vector256 = false;
aoqi@0 4017 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4018 %}
aoqi@0 4019 ins_pipe( pipe_slow );
aoqi@0 4020 %}
aoqi@0 4021
aoqi@0 4022 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
aoqi@0 4023 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 4024 match(Set dst (LShiftVS src shift));
aoqi@0 4025 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
aoqi@0 4026 ins_encode %{
aoqi@0 4027 bool vector256 = false;
aoqi@0 4028 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4029 %}
aoqi@0 4030 ins_pipe( pipe_slow );
aoqi@0 4031 %}
aoqi@0 4032
aoqi@0 4033 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{
aoqi@0 4034 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 4035 match(Set dst (LShiftVS src shift));
aoqi@0 4036 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
aoqi@0 4037 ins_encode %{
aoqi@0 4038 bool vector256 = true;
aoqi@0 4039 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4040 %}
aoqi@0 4041 ins_pipe( pipe_slow );
aoqi@0 4042 %}
aoqi@0 4043
aoqi@0 4044 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
aoqi@0 4045 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 4046 match(Set dst (LShiftVS src shift));
aoqi@0 4047 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
aoqi@0 4048 ins_encode %{
aoqi@0 4049 bool vector256 = true;
aoqi@0 4050 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4051 %}
aoqi@0 4052 ins_pipe( pipe_slow );
aoqi@0 4053 %}
aoqi@0 4054
aoqi@0 4055 // Integers vector left shift
aoqi@0 4056 instruct vsll2I(vecD dst, vecS shift) %{
aoqi@0 4057 predicate(n->as_Vector()->length() == 2);
aoqi@0 4058 match(Set dst (LShiftVI dst shift));
aoqi@0 4059 format %{ "pslld $dst,$shift\t! left shift packed2I" %}
aoqi@0 4060 ins_encode %{
aoqi@0 4061 __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4062 %}
aoqi@0 4063 ins_pipe( pipe_slow );
aoqi@0 4064 %}
aoqi@0 4065
aoqi@0 4066 instruct vsll2I_imm(vecD dst, immI8 shift) %{
aoqi@0 4067 predicate(n->as_Vector()->length() == 2);
aoqi@0 4068 match(Set dst (LShiftVI dst shift));
aoqi@0 4069 format %{ "pslld $dst,$shift\t! left shift packed2I" %}
aoqi@0 4070 ins_encode %{
aoqi@0 4071 __ pslld($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4072 %}
aoqi@0 4073 ins_pipe( pipe_slow );
aoqi@0 4074 %}
aoqi@0 4075
aoqi@0 4076 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{
aoqi@0 4077 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4078 match(Set dst (LShiftVI src shift));
aoqi@0 4079 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %}
aoqi@0 4080 ins_encode %{
aoqi@0 4081 bool vector256 = false;
aoqi@0 4082 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4083 %}
aoqi@0 4084 ins_pipe( pipe_slow );
aoqi@0 4085 %}
aoqi@0 4086
aoqi@0 4087 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
aoqi@0 4088 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4089 match(Set dst (LShiftVI src shift));
aoqi@0 4090 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %}
aoqi@0 4091 ins_encode %{
aoqi@0 4092 bool vector256 = false;
aoqi@0 4093 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4094 %}
aoqi@0 4095 ins_pipe( pipe_slow );
aoqi@0 4096 %}
aoqi@0 4097
aoqi@0 4098 instruct vsll4I(vecX dst, vecS shift) %{
aoqi@0 4099 predicate(n->as_Vector()->length() == 4);
aoqi@0 4100 match(Set dst (LShiftVI dst shift));
aoqi@0 4101 format %{ "pslld $dst,$shift\t! left shift packed4I" %}
aoqi@0 4102 ins_encode %{
aoqi@0 4103 __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4104 %}
aoqi@0 4105 ins_pipe( pipe_slow );
aoqi@0 4106 %}
aoqi@0 4107
aoqi@0 4108 instruct vsll4I_imm(vecX dst, immI8 shift) %{
aoqi@0 4109 predicate(n->as_Vector()->length() == 4);
aoqi@0 4110 match(Set dst (LShiftVI dst shift));
aoqi@0 4111 format %{ "pslld $dst,$shift\t! left shift packed4I" %}
aoqi@0 4112 ins_encode %{
aoqi@0 4113 __ pslld($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4114 %}
aoqi@0 4115 ins_pipe( pipe_slow );
aoqi@0 4116 %}
aoqi@0 4117
aoqi@0 4118 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{
aoqi@0 4119 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 4120 match(Set dst (LShiftVI src shift));
aoqi@0 4121 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %}
aoqi@0 4122 ins_encode %{
aoqi@0 4123 bool vector256 = false;
aoqi@0 4124 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4125 %}
aoqi@0 4126 ins_pipe( pipe_slow );
aoqi@0 4127 %}
aoqi@0 4128
aoqi@0 4129 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
aoqi@0 4130 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 4131 match(Set dst (LShiftVI src shift));
aoqi@0 4132 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %}
aoqi@0 4133 ins_encode %{
aoqi@0 4134 bool vector256 = false;
aoqi@0 4135 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4136 %}
aoqi@0 4137 ins_pipe( pipe_slow );
aoqi@0 4138 %}
aoqi@0 4139
aoqi@0 4140 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{
aoqi@0 4141 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 4142 match(Set dst (LShiftVI src shift));
aoqi@0 4143 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %}
aoqi@0 4144 ins_encode %{
aoqi@0 4145 bool vector256 = true;
aoqi@0 4146 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4147 %}
aoqi@0 4148 ins_pipe( pipe_slow );
aoqi@0 4149 %}
aoqi@0 4150
aoqi@0 4151 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
aoqi@0 4152 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 4153 match(Set dst (LShiftVI src shift));
aoqi@0 4154 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %}
aoqi@0 4155 ins_encode %{
aoqi@0 4156 bool vector256 = true;
aoqi@0 4157 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4158 %}
aoqi@0 4159 ins_pipe( pipe_slow );
aoqi@0 4160 %}
aoqi@0 4161
aoqi@0 4162 // Longs vector left shift
aoqi@0 4163 instruct vsll2L(vecX dst, vecS shift) %{
aoqi@0 4164 predicate(n->as_Vector()->length() == 2);
aoqi@0 4165 match(Set dst (LShiftVL dst shift));
aoqi@0 4166 format %{ "psllq $dst,$shift\t! left shift packed2L" %}
aoqi@0 4167 ins_encode %{
aoqi@0 4168 __ psllq($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4169 %}
aoqi@0 4170 ins_pipe( pipe_slow );
aoqi@0 4171 %}
aoqi@0 4172
aoqi@0 4173 instruct vsll2L_imm(vecX dst, immI8 shift) %{
aoqi@0 4174 predicate(n->as_Vector()->length() == 2);
aoqi@0 4175 match(Set dst (LShiftVL dst shift));
aoqi@0 4176 format %{ "psllq $dst,$shift\t! left shift packed2L" %}
aoqi@0 4177 ins_encode %{
aoqi@0 4178 __ psllq($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4179 %}
aoqi@0 4180 ins_pipe( pipe_slow );
aoqi@0 4181 %}
aoqi@0 4182
aoqi@0 4183 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{
aoqi@0 4184 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4185 match(Set dst (LShiftVL src shift));
aoqi@0 4186 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %}
aoqi@0 4187 ins_encode %{
aoqi@0 4188 bool vector256 = false;
aoqi@0 4189 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4190 %}
aoqi@0 4191 ins_pipe( pipe_slow );
aoqi@0 4192 %}
aoqi@0 4193
aoqi@0 4194 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
aoqi@0 4195 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4196 match(Set dst (LShiftVL src shift));
aoqi@0 4197 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %}
aoqi@0 4198 ins_encode %{
aoqi@0 4199 bool vector256 = false;
aoqi@0 4200 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4201 %}
aoqi@0 4202 ins_pipe( pipe_slow );
aoqi@0 4203 %}
aoqi@0 4204
aoqi@0 4205 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{
aoqi@0 4206 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
aoqi@0 4207 match(Set dst (LShiftVL src shift));
aoqi@0 4208 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %}
aoqi@0 4209 ins_encode %{
aoqi@0 4210 bool vector256 = true;
aoqi@0 4211 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4212 %}
aoqi@0 4213 ins_pipe( pipe_slow );
aoqi@0 4214 %}
aoqi@0 4215
aoqi@0 4216 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
aoqi@0 4217 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
aoqi@0 4218 match(Set dst (LShiftVL src shift));
aoqi@0 4219 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %}
aoqi@0 4220 ins_encode %{
aoqi@0 4221 bool vector256 = true;
aoqi@0 4222 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4223 %}
aoqi@0 4224 ins_pipe( pipe_slow );
aoqi@0 4225 %}
aoqi@0 4226
aoqi@0 4227 // ----------------------- LogicalRightShift -----------------------------------
aoqi@0 4228
aoqi@0 4229 // Shorts vector logical right shift produces incorrect Java result
aoqi@0 4230 // for negative data because java code convert short value into int with
aoqi@0 4231 // sign extension before a shift. But char vectors are fine since chars are
aoqi@0 4232 // unsigned values.
aoqi@0 4233
aoqi@0 4234 instruct vsrl2S(vecS dst, vecS shift) %{
aoqi@0 4235 predicate(n->as_Vector()->length() == 2);
aoqi@0 4236 match(Set dst (URShiftVS dst shift));
aoqi@0 4237 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %}
aoqi@0 4238 ins_encode %{
aoqi@0 4239 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4240 %}
aoqi@0 4241 ins_pipe( pipe_slow );
aoqi@0 4242 %}
aoqi@0 4243
aoqi@0 4244 instruct vsrl2S_imm(vecS dst, immI8 shift) %{
aoqi@0 4245 predicate(n->as_Vector()->length() == 2);
aoqi@0 4246 match(Set dst (URShiftVS dst shift));
aoqi@0 4247 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %}
aoqi@0 4248 ins_encode %{
aoqi@0 4249 __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4250 %}
aoqi@0 4251 ins_pipe( pipe_slow );
aoqi@0 4252 %}
aoqi@0 4253
aoqi@0 4254 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{
aoqi@0 4255 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4256 match(Set dst (URShiftVS src shift));
aoqi@0 4257 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
aoqi@0 4258 ins_encode %{
aoqi@0 4259 bool vector256 = false;
aoqi@0 4260 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4261 %}
aoqi@0 4262 ins_pipe( pipe_slow );
aoqi@0 4263 %}
aoqi@0 4264
aoqi@0 4265 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
aoqi@0 4266 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4267 match(Set dst (URShiftVS src shift));
aoqi@0 4268 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
aoqi@0 4269 ins_encode %{
aoqi@0 4270 bool vector256 = false;
aoqi@0 4271 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4272 %}
aoqi@0 4273 ins_pipe( pipe_slow );
aoqi@0 4274 %}
aoqi@0 4275
aoqi@0 4276 instruct vsrl4S(vecD dst, vecS shift) %{
aoqi@0 4277 predicate(n->as_Vector()->length() == 4);
aoqi@0 4278 match(Set dst (URShiftVS dst shift));
aoqi@0 4279 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %}
aoqi@0 4280 ins_encode %{
aoqi@0 4281 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4282 %}
aoqi@0 4283 ins_pipe( pipe_slow );
aoqi@0 4284 %}
aoqi@0 4285
aoqi@0 4286 instruct vsrl4S_imm(vecD dst, immI8 shift) %{
aoqi@0 4287 predicate(n->as_Vector()->length() == 4);
aoqi@0 4288 match(Set dst (URShiftVS dst shift));
aoqi@0 4289 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %}
aoqi@0 4290 ins_encode %{
aoqi@0 4291 __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4292 %}
aoqi@0 4293 ins_pipe( pipe_slow );
aoqi@0 4294 %}
aoqi@0 4295
aoqi@0 4296 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{
aoqi@0 4297 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 4298 match(Set dst (URShiftVS src shift));
aoqi@0 4299 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
aoqi@0 4300 ins_encode %{
aoqi@0 4301 bool vector256 = false;
aoqi@0 4302 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4303 %}
aoqi@0 4304 ins_pipe( pipe_slow );
aoqi@0 4305 %}
aoqi@0 4306
aoqi@0 4307 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
aoqi@0 4308 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 4309 match(Set dst (URShiftVS src shift));
aoqi@0 4310 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
aoqi@0 4311 ins_encode %{
aoqi@0 4312 bool vector256 = false;
aoqi@0 4313 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4314 %}
aoqi@0 4315 ins_pipe( pipe_slow );
aoqi@0 4316 %}
aoqi@0 4317
aoqi@0 4318 instruct vsrl8S(vecX dst, vecS shift) %{
aoqi@0 4319 predicate(n->as_Vector()->length() == 8);
aoqi@0 4320 match(Set dst (URShiftVS dst shift));
aoqi@0 4321 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %}
aoqi@0 4322 ins_encode %{
aoqi@0 4323 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4324 %}
aoqi@0 4325 ins_pipe( pipe_slow );
aoqi@0 4326 %}
aoqi@0 4327
aoqi@0 4328 instruct vsrl8S_imm(vecX dst, immI8 shift) %{
aoqi@0 4329 predicate(n->as_Vector()->length() == 8);
aoqi@0 4330 match(Set dst (URShiftVS dst shift));
aoqi@0 4331 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %}
aoqi@0 4332 ins_encode %{
aoqi@0 4333 __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4334 %}
aoqi@0 4335 ins_pipe( pipe_slow );
aoqi@0 4336 %}
aoqi@0 4337
aoqi@0 4338 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{
aoqi@0 4339 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 4340 match(Set dst (URShiftVS src shift));
aoqi@0 4341 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
aoqi@0 4342 ins_encode %{
aoqi@0 4343 bool vector256 = false;
aoqi@0 4344 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4345 %}
aoqi@0 4346 ins_pipe( pipe_slow );
aoqi@0 4347 %}
aoqi@0 4348
aoqi@0 4349 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
aoqi@0 4350 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 4351 match(Set dst (URShiftVS src shift));
aoqi@0 4352 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
aoqi@0 4353 ins_encode %{
aoqi@0 4354 bool vector256 = false;
aoqi@0 4355 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4356 %}
aoqi@0 4357 ins_pipe( pipe_slow );
aoqi@0 4358 %}
aoqi@0 4359
aoqi@0 4360 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{
aoqi@0 4361 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 4362 match(Set dst (URShiftVS src shift));
aoqi@0 4363 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
aoqi@0 4364 ins_encode %{
aoqi@0 4365 bool vector256 = true;
aoqi@0 4366 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4367 %}
aoqi@0 4368 ins_pipe( pipe_slow );
aoqi@0 4369 %}
aoqi@0 4370
aoqi@0 4371 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
aoqi@0 4372 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 4373 match(Set dst (URShiftVS src shift));
aoqi@0 4374 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
aoqi@0 4375 ins_encode %{
aoqi@0 4376 bool vector256 = true;
aoqi@0 4377 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4378 %}
aoqi@0 4379 ins_pipe( pipe_slow );
aoqi@0 4380 %}
aoqi@0 4381
aoqi@0 4382 // Integers vector logical right shift
aoqi@0 4383 instruct vsrl2I(vecD dst, vecS shift) %{
aoqi@0 4384 predicate(n->as_Vector()->length() == 2);
aoqi@0 4385 match(Set dst (URShiftVI dst shift));
aoqi@0 4386 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %}
aoqi@0 4387 ins_encode %{
aoqi@0 4388 __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4389 %}
aoqi@0 4390 ins_pipe( pipe_slow );
aoqi@0 4391 %}
aoqi@0 4392
aoqi@0 4393 instruct vsrl2I_imm(vecD dst, immI8 shift) %{
aoqi@0 4394 predicate(n->as_Vector()->length() == 2);
aoqi@0 4395 match(Set dst (URShiftVI dst shift));
aoqi@0 4396 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %}
aoqi@0 4397 ins_encode %{
aoqi@0 4398 __ psrld($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4399 %}
aoqi@0 4400 ins_pipe( pipe_slow );
aoqi@0 4401 %}
aoqi@0 4402
aoqi@0 4403 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{
aoqi@0 4404 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4405 match(Set dst (URShiftVI src shift));
aoqi@0 4406 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %}
aoqi@0 4407 ins_encode %{
aoqi@0 4408 bool vector256 = false;
aoqi@0 4409 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4410 %}
aoqi@0 4411 ins_pipe( pipe_slow );
aoqi@0 4412 %}
aoqi@0 4413
aoqi@0 4414 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
aoqi@0 4415 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4416 match(Set dst (URShiftVI src shift));
aoqi@0 4417 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %}
aoqi@0 4418 ins_encode %{
aoqi@0 4419 bool vector256 = false;
aoqi@0 4420 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4421 %}
aoqi@0 4422 ins_pipe( pipe_slow );
aoqi@0 4423 %}
aoqi@0 4424
aoqi@0 4425 instruct vsrl4I(vecX dst, vecS shift) %{
aoqi@0 4426 predicate(n->as_Vector()->length() == 4);
aoqi@0 4427 match(Set dst (URShiftVI dst shift));
aoqi@0 4428 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %}
aoqi@0 4429 ins_encode %{
aoqi@0 4430 __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4431 %}
aoqi@0 4432 ins_pipe( pipe_slow );
aoqi@0 4433 %}
aoqi@0 4434
aoqi@0 4435 instruct vsrl4I_imm(vecX dst, immI8 shift) %{
aoqi@0 4436 predicate(n->as_Vector()->length() == 4);
aoqi@0 4437 match(Set dst (URShiftVI dst shift));
aoqi@0 4438 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %}
aoqi@0 4439 ins_encode %{
aoqi@0 4440 __ psrld($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4441 %}
aoqi@0 4442 ins_pipe( pipe_slow );
aoqi@0 4443 %}
aoqi@0 4444
aoqi@0 4445 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{
aoqi@0 4446 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 4447 match(Set dst (URShiftVI src shift));
aoqi@0 4448 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %}
aoqi@0 4449 ins_encode %{
aoqi@0 4450 bool vector256 = false;
aoqi@0 4451 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4452 %}
aoqi@0 4453 ins_pipe( pipe_slow );
aoqi@0 4454 %}
aoqi@0 4455
aoqi@0 4456 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
aoqi@0 4457 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 4458 match(Set dst (URShiftVI src shift));
aoqi@0 4459 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %}
aoqi@0 4460 ins_encode %{
aoqi@0 4461 bool vector256 = false;
aoqi@0 4462 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4463 %}
aoqi@0 4464 ins_pipe( pipe_slow );
aoqi@0 4465 %}
aoqi@0 4466
aoqi@0 4467 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{
aoqi@0 4468 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 4469 match(Set dst (URShiftVI src shift));
aoqi@0 4470 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %}
aoqi@0 4471 ins_encode %{
aoqi@0 4472 bool vector256 = true;
aoqi@0 4473 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4474 %}
aoqi@0 4475 ins_pipe( pipe_slow );
aoqi@0 4476 %}
aoqi@0 4477
aoqi@0 4478 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
aoqi@0 4479 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 4480 match(Set dst (URShiftVI src shift));
aoqi@0 4481 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %}
aoqi@0 4482 ins_encode %{
aoqi@0 4483 bool vector256 = true;
aoqi@0 4484 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4485 %}
aoqi@0 4486 ins_pipe( pipe_slow );
aoqi@0 4487 %}
aoqi@0 4488
aoqi@0 4489 // Longs vector logical right shift
aoqi@0 4490 instruct vsrl2L(vecX dst, vecS shift) %{
aoqi@0 4491 predicate(n->as_Vector()->length() == 2);
aoqi@0 4492 match(Set dst (URShiftVL dst shift));
aoqi@0 4493 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %}
aoqi@0 4494 ins_encode %{
aoqi@0 4495 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4496 %}
aoqi@0 4497 ins_pipe( pipe_slow );
aoqi@0 4498 %}
aoqi@0 4499
aoqi@0 4500 instruct vsrl2L_imm(vecX dst, immI8 shift) %{
aoqi@0 4501 predicate(n->as_Vector()->length() == 2);
aoqi@0 4502 match(Set dst (URShiftVL dst shift));
aoqi@0 4503 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %}
aoqi@0 4504 ins_encode %{
aoqi@0 4505 __ psrlq($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4506 %}
aoqi@0 4507 ins_pipe( pipe_slow );
aoqi@0 4508 %}
aoqi@0 4509
aoqi@0 4510 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{
aoqi@0 4511 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4512 match(Set dst (URShiftVL src shift));
aoqi@0 4513 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %}
aoqi@0 4514 ins_encode %{
aoqi@0 4515 bool vector256 = false;
aoqi@0 4516 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4517 %}
aoqi@0 4518 ins_pipe( pipe_slow );
aoqi@0 4519 %}
aoqi@0 4520
aoqi@0 4521 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
aoqi@0 4522 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4523 match(Set dst (URShiftVL src shift));
aoqi@0 4524 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %}
aoqi@0 4525 ins_encode %{
aoqi@0 4526 bool vector256 = false;
aoqi@0 4527 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4528 %}
aoqi@0 4529 ins_pipe( pipe_slow );
aoqi@0 4530 %}
aoqi@0 4531
aoqi@0 4532 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{
aoqi@0 4533 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
aoqi@0 4534 match(Set dst (URShiftVL src shift));
aoqi@0 4535 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %}
aoqi@0 4536 ins_encode %{
aoqi@0 4537 bool vector256 = true;
aoqi@0 4538 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4539 %}
aoqi@0 4540 ins_pipe( pipe_slow );
aoqi@0 4541 %}
aoqi@0 4542
aoqi@0 4543 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
aoqi@0 4544 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
aoqi@0 4545 match(Set dst (URShiftVL src shift));
aoqi@0 4546 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %}
aoqi@0 4547 ins_encode %{
aoqi@0 4548 bool vector256 = true;
aoqi@0 4549 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4550 %}
aoqi@0 4551 ins_pipe( pipe_slow );
aoqi@0 4552 %}
aoqi@0 4553
aoqi@0 4554 // ------------------- ArithmeticRightShift -----------------------------------
aoqi@0 4555
aoqi@0 4556 // Shorts/Chars vector arithmetic right shift
aoqi@0 4557 instruct vsra2S(vecS dst, vecS shift) %{
aoqi@0 4558 predicate(n->as_Vector()->length() == 2);
aoqi@0 4559 match(Set dst (RShiftVS dst shift));
aoqi@0 4560 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %}
aoqi@0 4561 ins_encode %{
aoqi@0 4562 __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4563 %}
aoqi@0 4564 ins_pipe( pipe_slow );
aoqi@0 4565 %}
aoqi@0 4566
aoqi@0 4567 instruct vsra2S_imm(vecS dst, immI8 shift) %{
aoqi@0 4568 predicate(n->as_Vector()->length() == 2);
aoqi@0 4569 match(Set dst (RShiftVS dst shift));
aoqi@0 4570 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %}
aoqi@0 4571 ins_encode %{
aoqi@0 4572 __ psraw($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4573 %}
aoqi@0 4574 ins_pipe( pipe_slow );
aoqi@0 4575 %}
aoqi@0 4576
aoqi@0 4577 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{
aoqi@0 4578 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4579 match(Set dst (RShiftVS src shift));
aoqi@0 4580 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
aoqi@0 4581 ins_encode %{
aoqi@0 4582 bool vector256 = false;
aoqi@0 4583 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4584 %}
aoqi@0 4585 ins_pipe( pipe_slow );
aoqi@0 4586 %}
aoqi@0 4587
aoqi@0 4588 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
aoqi@0 4589 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4590 match(Set dst (RShiftVS src shift));
aoqi@0 4591 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
aoqi@0 4592 ins_encode %{
aoqi@0 4593 bool vector256 = false;
aoqi@0 4594 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4595 %}
aoqi@0 4596 ins_pipe( pipe_slow );
aoqi@0 4597 %}
aoqi@0 4598
aoqi@0 4599 instruct vsra4S(vecD dst, vecS shift) %{
aoqi@0 4600 predicate(n->as_Vector()->length() == 4);
aoqi@0 4601 match(Set dst (RShiftVS dst shift));
aoqi@0 4602 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %}
aoqi@0 4603 ins_encode %{
aoqi@0 4604 __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4605 %}
aoqi@0 4606 ins_pipe( pipe_slow );
aoqi@0 4607 %}
aoqi@0 4608
aoqi@0 4609 instruct vsra4S_imm(vecD dst, immI8 shift) %{
aoqi@0 4610 predicate(n->as_Vector()->length() == 4);
aoqi@0 4611 match(Set dst (RShiftVS dst shift));
aoqi@0 4612 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %}
aoqi@0 4613 ins_encode %{
aoqi@0 4614 __ psraw($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4615 %}
aoqi@0 4616 ins_pipe( pipe_slow );
aoqi@0 4617 %}
aoqi@0 4618
aoqi@0 4619 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{
aoqi@0 4620 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 4621 match(Set dst (RShiftVS src shift));
aoqi@0 4622 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
aoqi@0 4623 ins_encode %{
aoqi@0 4624 bool vector256 = false;
aoqi@0 4625 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4626 %}
aoqi@0 4627 ins_pipe( pipe_slow );
aoqi@0 4628 %}
aoqi@0 4629
aoqi@0 4630 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
aoqi@0 4631 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 4632 match(Set dst (RShiftVS src shift));
aoqi@0 4633 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
aoqi@0 4634 ins_encode %{
aoqi@0 4635 bool vector256 = false;
aoqi@0 4636 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4637 %}
aoqi@0 4638 ins_pipe( pipe_slow );
aoqi@0 4639 %}
aoqi@0 4640
aoqi@0 4641 instruct vsra8S(vecX dst, vecS shift) %{
aoqi@0 4642 predicate(n->as_Vector()->length() == 8);
aoqi@0 4643 match(Set dst (RShiftVS dst shift));
aoqi@0 4644 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %}
aoqi@0 4645 ins_encode %{
aoqi@0 4646 __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4647 %}
aoqi@0 4648 ins_pipe( pipe_slow );
aoqi@0 4649 %}
aoqi@0 4650
aoqi@0 4651 instruct vsra8S_imm(vecX dst, immI8 shift) %{
aoqi@0 4652 predicate(n->as_Vector()->length() == 8);
aoqi@0 4653 match(Set dst (RShiftVS dst shift));
aoqi@0 4654 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %}
aoqi@0 4655 ins_encode %{
aoqi@0 4656 __ psraw($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4657 %}
aoqi@0 4658 ins_pipe( pipe_slow );
aoqi@0 4659 %}
aoqi@0 4660
aoqi@0 4661 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{
aoqi@0 4662 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 4663 match(Set dst (RShiftVS src shift));
aoqi@0 4664 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
aoqi@0 4665 ins_encode %{
aoqi@0 4666 bool vector256 = false;
aoqi@0 4667 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4668 %}
aoqi@0 4669 ins_pipe( pipe_slow );
aoqi@0 4670 %}
aoqi@0 4671
aoqi@0 4672 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
aoqi@0 4673 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
aoqi@0 4674 match(Set dst (RShiftVS src shift));
aoqi@0 4675 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
aoqi@0 4676 ins_encode %{
aoqi@0 4677 bool vector256 = false;
aoqi@0 4678 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4679 %}
aoqi@0 4680 ins_pipe( pipe_slow );
aoqi@0 4681 %}
aoqi@0 4682
aoqi@0 4683 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{
aoqi@0 4684 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 4685 match(Set dst (RShiftVS src shift));
aoqi@0 4686 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
aoqi@0 4687 ins_encode %{
aoqi@0 4688 bool vector256 = true;
aoqi@0 4689 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4690 %}
aoqi@0 4691 ins_pipe( pipe_slow );
aoqi@0 4692 %}
aoqi@0 4693
aoqi@0 4694 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
aoqi@0 4695 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
aoqi@0 4696 match(Set dst (RShiftVS src shift));
aoqi@0 4697 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
aoqi@0 4698 ins_encode %{
aoqi@0 4699 bool vector256 = true;
aoqi@0 4700 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4701 %}
aoqi@0 4702 ins_pipe( pipe_slow );
aoqi@0 4703 %}
aoqi@0 4704
aoqi@0 4705 // Integers vector arithmetic right shift
aoqi@0 4706 instruct vsra2I(vecD dst, vecS shift) %{
aoqi@0 4707 predicate(n->as_Vector()->length() == 2);
aoqi@0 4708 match(Set dst (RShiftVI dst shift));
aoqi@0 4709 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %}
aoqi@0 4710 ins_encode %{
aoqi@0 4711 __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4712 %}
aoqi@0 4713 ins_pipe( pipe_slow );
aoqi@0 4714 %}
aoqi@0 4715
aoqi@0 4716 instruct vsra2I_imm(vecD dst, immI8 shift) %{
aoqi@0 4717 predicate(n->as_Vector()->length() == 2);
aoqi@0 4718 match(Set dst (RShiftVI dst shift));
aoqi@0 4719 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %}
aoqi@0 4720 ins_encode %{
aoqi@0 4721 __ psrad($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4722 %}
aoqi@0 4723 ins_pipe( pipe_slow );
aoqi@0 4724 %}
aoqi@0 4725
aoqi@0 4726 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{
aoqi@0 4727 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4728 match(Set dst (RShiftVI src shift));
aoqi@0 4729 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %}
aoqi@0 4730 ins_encode %{
aoqi@0 4731 bool vector256 = false;
aoqi@0 4732 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4733 %}
aoqi@0 4734 ins_pipe( pipe_slow );
aoqi@0 4735 %}
aoqi@0 4736
aoqi@0 4737 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
aoqi@0 4738 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
aoqi@0 4739 match(Set dst (RShiftVI src shift));
aoqi@0 4740 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %}
aoqi@0 4741 ins_encode %{
aoqi@0 4742 bool vector256 = false;
aoqi@0 4743 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4744 %}
aoqi@0 4745 ins_pipe( pipe_slow );
aoqi@0 4746 %}
aoqi@0 4747
aoqi@0 4748 instruct vsra4I(vecX dst, vecS shift) %{
aoqi@0 4749 predicate(n->as_Vector()->length() == 4);
aoqi@0 4750 match(Set dst (RShiftVI dst shift));
aoqi@0 4751 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %}
aoqi@0 4752 ins_encode %{
aoqi@0 4753 __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
aoqi@0 4754 %}
aoqi@0 4755 ins_pipe( pipe_slow );
aoqi@0 4756 %}
aoqi@0 4757
aoqi@0 4758 instruct vsra4I_imm(vecX dst, immI8 shift) %{
aoqi@0 4759 predicate(n->as_Vector()->length() == 4);
aoqi@0 4760 match(Set dst (RShiftVI dst shift));
aoqi@0 4761 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %}
aoqi@0 4762 ins_encode %{
aoqi@0 4763 __ psrad($dst$$XMMRegister, (int)$shift$$constant);
aoqi@0 4764 %}
aoqi@0 4765 ins_pipe( pipe_slow );
aoqi@0 4766 %}
aoqi@0 4767
aoqi@0 4768 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{
aoqi@0 4769 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 4770 match(Set dst (RShiftVI src shift));
aoqi@0 4771 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %}
aoqi@0 4772 ins_encode %{
aoqi@0 4773 bool vector256 = false;
aoqi@0 4774 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4775 %}
aoqi@0 4776 ins_pipe( pipe_slow );
aoqi@0 4777 %}
aoqi@0 4778
aoqi@0 4779 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
aoqi@0 4780 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
aoqi@0 4781 match(Set dst (RShiftVI src shift));
aoqi@0 4782 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %}
aoqi@0 4783 ins_encode %{
aoqi@0 4784 bool vector256 = false;
aoqi@0 4785 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4786 %}
aoqi@0 4787 ins_pipe( pipe_slow );
aoqi@0 4788 %}
aoqi@0 4789
aoqi@0 4790 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{
aoqi@0 4791 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 4792 match(Set dst (RShiftVI src shift));
aoqi@0 4793 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %}
aoqi@0 4794 ins_encode %{
aoqi@0 4795 bool vector256 = true;
aoqi@0 4796 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
aoqi@0 4797 %}
aoqi@0 4798 ins_pipe( pipe_slow );
aoqi@0 4799 %}
aoqi@0 4800
aoqi@0 4801 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
aoqi@0 4802 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
aoqi@0 4803 match(Set dst (RShiftVI src shift));
aoqi@0 4804 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %}
aoqi@0 4805 ins_encode %{
aoqi@0 4806 bool vector256 = true;
aoqi@0 4807 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
aoqi@0 4808 %}
aoqi@0 4809 ins_pipe( pipe_slow );
aoqi@0 4810 %}
aoqi@0 4811
aoqi@0 4812 // There are no longs vector arithmetic right shift instructions.
aoqi@0 4813
aoqi@0 4814
aoqi@0 4815 // --------------------------------- AND --------------------------------------
aoqi@0 4816
aoqi@0 4817 instruct vand4B(vecS dst, vecS src) %{
aoqi@0 4818 predicate(n->as_Vector()->length_in_bytes() == 4);
aoqi@0 4819 match(Set dst (AndV dst src));
aoqi@0 4820 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %}
aoqi@0 4821 ins_encode %{
aoqi@0 4822 __ pand($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 4823 %}
aoqi@0 4824 ins_pipe( pipe_slow );
aoqi@0 4825 %}
aoqi@0 4826
aoqi@0 4827 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{
aoqi@0 4828 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
aoqi@0 4829 match(Set dst (AndV src1 src2));
aoqi@0 4830 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %}
aoqi@0 4831 ins_encode %{
aoqi@0 4832 bool vector256 = false;
aoqi@0 4833 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 4834 %}
aoqi@0 4835 ins_pipe( pipe_slow );
aoqi@0 4836 %}
aoqi@0 4837
aoqi@0 4838 instruct vand8B(vecD dst, vecD src) %{
aoqi@0 4839 predicate(n->as_Vector()->length_in_bytes() == 8);
aoqi@0 4840 match(Set dst (AndV dst src));
aoqi@0 4841 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %}
aoqi@0 4842 ins_encode %{
aoqi@0 4843 __ pand($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 4844 %}
aoqi@0 4845 ins_pipe( pipe_slow );
aoqi@0 4846 %}
aoqi@0 4847
aoqi@0 4848 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 4849 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
aoqi@0 4850 match(Set dst (AndV src1 src2));
aoqi@0 4851 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %}
aoqi@0 4852 ins_encode %{
aoqi@0 4853 bool vector256 = false;
aoqi@0 4854 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 4855 %}
aoqi@0 4856 ins_pipe( pipe_slow );
aoqi@0 4857 %}
aoqi@0 4858
aoqi@0 4859 instruct vand16B(vecX dst, vecX src) %{
aoqi@0 4860 predicate(n->as_Vector()->length_in_bytes() == 16);
aoqi@0 4861 match(Set dst (AndV dst src));
aoqi@0 4862 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %}
aoqi@0 4863 ins_encode %{
aoqi@0 4864 __ pand($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 4865 %}
aoqi@0 4866 ins_pipe( pipe_slow );
aoqi@0 4867 %}
aoqi@0 4868
aoqi@0 4869 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 4870 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
aoqi@0 4871 match(Set dst (AndV src1 src2));
aoqi@0 4872 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %}
aoqi@0 4873 ins_encode %{
aoqi@0 4874 bool vector256 = false;
aoqi@0 4875 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 4876 %}
aoqi@0 4877 ins_pipe( pipe_slow );
aoqi@0 4878 %}
aoqi@0 4879
aoqi@0 4880 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 4881 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
aoqi@0 4882 match(Set dst (AndV src (LoadVector mem)));
aoqi@0 4883 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %}
aoqi@0 4884 ins_encode %{
aoqi@0 4885 bool vector256 = false;
aoqi@0 4886 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 4887 %}
aoqi@0 4888 ins_pipe( pipe_slow );
aoqi@0 4889 %}
aoqi@0 4890
aoqi@0 4891 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 4892 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
aoqi@0 4893 match(Set dst (AndV src1 src2));
aoqi@0 4894 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %}
aoqi@0 4895 ins_encode %{
aoqi@0 4896 bool vector256 = true;
aoqi@0 4897 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 4898 %}
aoqi@0 4899 ins_pipe( pipe_slow );
aoqi@0 4900 %}
aoqi@0 4901
aoqi@0 4902 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 4903 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
aoqi@0 4904 match(Set dst (AndV src (LoadVector mem)));
aoqi@0 4905 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %}
aoqi@0 4906 ins_encode %{
aoqi@0 4907 bool vector256 = true;
aoqi@0 4908 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 4909 %}
aoqi@0 4910 ins_pipe( pipe_slow );
aoqi@0 4911 %}
aoqi@0 4912
aoqi@0 4913 // --------------------------------- OR ---------------------------------------
aoqi@0 4914
aoqi@0 4915 instruct vor4B(vecS dst, vecS src) %{
aoqi@0 4916 predicate(n->as_Vector()->length_in_bytes() == 4);
aoqi@0 4917 match(Set dst (OrV dst src));
aoqi@0 4918 format %{ "por $dst,$src\t! or vectors (4 bytes)" %}
aoqi@0 4919 ins_encode %{
aoqi@0 4920 __ por($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 4921 %}
aoqi@0 4922 ins_pipe( pipe_slow );
aoqi@0 4923 %}
aoqi@0 4924
aoqi@0 4925 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{
aoqi@0 4926 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
aoqi@0 4927 match(Set dst (OrV src1 src2));
aoqi@0 4928 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %}
aoqi@0 4929 ins_encode %{
aoqi@0 4930 bool vector256 = false;
aoqi@0 4931 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 4932 %}
aoqi@0 4933 ins_pipe( pipe_slow );
aoqi@0 4934 %}
aoqi@0 4935
aoqi@0 4936 instruct vor8B(vecD dst, vecD src) %{
aoqi@0 4937 predicate(n->as_Vector()->length_in_bytes() == 8);
aoqi@0 4938 match(Set dst (OrV dst src));
aoqi@0 4939 format %{ "por $dst,$src\t! or vectors (8 bytes)" %}
aoqi@0 4940 ins_encode %{
aoqi@0 4941 __ por($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 4942 %}
aoqi@0 4943 ins_pipe( pipe_slow );
aoqi@0 4944 %}
aoqi@0 4945
aoqi@0 4946 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 4947 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
aoqi@0 4948 match(Set dst (OrV src1 src2));
aoqi@0 4949 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %}
aoqi@0 4950 ins_encode %{
aoqi@0 4951 bool vector256 = false;
aoqi@0 4952 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 4953 %}
aoqi@0 4954 ins_pipe( pipe_slow );
aoqi@0 4955 %}
aoqi@0 4956
aoqi@0 4957 instruct vor16B(vecX dst, vecX src) %{
aoqi@0 4958 predicate(n->as_Vector()->length_in_bytes() == 16);
aoqi@0 4959 match(Set dst (OrV dst src));
aoqi@0 4960 format %{ "por $dst,$src\t! or vectors (16 bytes)" %}
aoqi@0 4961 ins_encode %{
aoqi@0 4962 __ por($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 4963 %}
aoqi@0 4964 ins_pipe( pipe_slow );
aoqi@0 4965 %}
aoqi@0 4966
aoqi@0 4967 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 4968 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
aoqi@0 4969 match(Set dst (OrV src1 src2));
aoqi@0 4970 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %}
aoqi@0 4971 ins_encode %{
aoqi@0 4972 bool vector256 = false;
aoqi@0 4973 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 4974 %}
aoqi@0 4975 ins_pipe( pipe_slow );
aoqi@0 4976 %}
aoqi@0 4977
aoqi@0 4978 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 4979 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
aoqi@0 4980 match(Set dst (OrV src (LoadVector mem)));
aoqi@0 4981 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %}
aoqi@0 4982 ins_encode %{
aoqi@0 4983 bool vector256 = false;
aoqi@0 4984 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 4985 %}
aoqi@0 4986 ins_pipe( pipe_slow );
aoqi@0 4987 %}
aoqi@0 4988
aoqi@0 4989 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 4990 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
aoqi@0 4991 match(Set dst (OrV src1 src2));
aoqi@0 4992 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %}
aoqi@0 4993 ins_encode %{
aoqi@0 4994 bool vector256 = true;
aoqi@0 4995 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 4996 %}
aoqi@0 4997 ins_pipe( pipe_slow );
aoqi@0 4998 %}
aoqi@0 4999
aoqi@0 5000 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 5001 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
aoqi@0 5002 match(Set dst (OrV src (LoadVector mem)));
aoqi@0 5003 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %}
aoqi@0 5004 ins_encode %{
aoqi@0 5005 bool vector256 = true;
aoqi@0 5006 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 5007 %}
aoqi@0 5008 ins_pipe( pipe_slow );
aoqi@0 5009 %}
aoqi@0 5010
aoqi@0 5011 // --------------------------------- XOR --------------------------------------
aoqi@0 5012
aoqi@0 5013 instruct vxor4B(vecS dst, vecS src) %{
aoqi@0 5014 predicate(n->as_Vector()->length_in_bytes() == 4);
aoqi@0 5015 match(Set dst (XorV dst src));
aoqi@0 5016 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %}
aoqi@0 5017 ins_encode %{
aoqi@0 5018 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 5019 %}
aoqi@0 5020 ins_pipe( pipe_slow );
aoqi@0 5021 %}
aoqi@0 5022
aoqi@0 5023 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{
aoqi@0 5024 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
aoqi@0 5025 match(Set dst (XorV src1 src2));
aoqi@0 5026 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %}
aoqi@0 5027 ins_encode %{
aoqi@0 5028 bool vector256 = false;
aoqi@0 5029 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 5030 %}
aoqi@0 5031 ins_pipe( pipe_slow );
aoqi@0 5032 %}
aoqi@0 5033
aoqi@0 5034 instruct vxor8B(vecD dst, vecD src) %{
aoqi@0 5035 predicate(n->as_Vector()->length_in_bytes() == 8);
aoqi@0 5036 match(Set dst (XorV dst src));
aoqi@0 5037 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %}
aoqi@0 5038 ins_encode %{
aoqi@0 5039 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 5040 %}
aoqi@0 5041 ins_pipe( pipe_slow );
aoqi@0 5042 %}
aoqi@0 5043
aoqi@0 5044 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{
aoqi@0 5045 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
aoqi@0 5046 match(Set dst (XorV src1 src2));
aoqi@0 5047 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %}
aoqi@0 5048 ins_encode %{
aoqi@0 5049 bool vector256 = false;
aoqi@0 5050 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 5051 %}
aoqi@0 5052 ins_pipe( pipe_slow );
aoqi@0 5053 %}
aoqi@0 5054
aoqi@0 5055 instruct vxor16B(vecX dst, vecX src) %{
aoqi@0 5056 predicate(n->as_Vector()->length_in_bytes() == 16);
aoqi@0 5057 match(Set dst (XorV dst src));
aoqi@0 5058 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %}
aoqi@0 5059 ins_encode %{
aoqi@0 5060 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
aoqi@0 5061 %}
aoqi@0 5062 ins_pipe( pipe_slow );
aoqi@0 5063 %}
aoqi@0 5064
aoqi@0 5065 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{
aoqi@0 5066 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
aoqi@0 5067 match(Set dst (XorV src1 src2));
aoqi@0 5068 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %}
aoqi@0 5069 ins_encode %{
aoqi@0 5070 bool vector256 = false;
aoqi@0 5071 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 5072 %}
aoqi@0 5073 ins_pipe( pipe_slow );
aoqi@0 5074 %}
aoqi@0 5075
aoqi@0 5076 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{
aoqi@0 5077 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
aoqi@0 5078 match(Set dst (XorV src (LoadVector mem)));
aoqi@0 5079 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %}
aoqi@0 5080 ins_encode %{
aoqi@0 5081 bool vector256 = false;
aoqi@0 5082 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 5083 %}
aoqi@0 5084 ins_pipe( pipe_slow );
aoqi@0 5085 %}
aoqi@0 5086
aoqi@0 5087 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{
aoqi@0 5088 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
aoqi@0 5089 match(Set dst (XorV src1 src2));
aoqi@0 5090 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %}
aoqi@0 5091 ins_encode %{
aoqi@0 5092 bool vector256 = true;
aoqi@0 5093 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
aoqi@0 5094 %}
aoqi@0 5095 ins_pipe( pipe_slow );
aoqi@0 5096 %}
aoqi@0 5097
aoqi@0 5098 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{
aoqi@0 5099 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
aoqi@0 5100 match(Set dst (XorV src (LoadVector mem)));
aoqi@0 5101 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %}
aoqi@0 5102 ins_encode %{
aoqi@0 5103 bool vector256 = true;
aoqi@0 5104 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
aoqi@0 5105 %}
aoqi@0 5106 ins_pipe( pipe_slow );
aoqi@0 5107 %}
aoqi@0 5108

mercurial