Wed, 12 Mar 2014 11:24:26 -0700
8031321: Support Intel bit manipulation instructions
Summary: Add support for BMI1 instructions
Reviewed-by: kvn, roland
1 //
2 // Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
25 // X86 Common Architecture Description File
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // archtecture.
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
62 // XMM registers. 256-bit registers or 8 words each, labeled (a)-h.
63 // Word a in each register holds a Float, words ab hold a Double.
64 // The whole registers are used in SSE4.2 version intrinsics,
65 // array copy stubs and superword operations (see UseSSE42Intrinsics,
66 // UseXMMForArrayCopy and UseSuperword flags).
67 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX).
68 // Linux ABI: No register preserved across function calls
69 // XMM0-XMM7 might hold parameters
70 // Windows ABI: XMM6-XMM15 preserved across function calls
71 // XMM0-XMM3 might hold parameters
73 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
74 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
75 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
76 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
77 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
78 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
79 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
80 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
82 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
83 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
84 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
85 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
86 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
87 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
88 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
89 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
91 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
92 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
93 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
94 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
95 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
96 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
97 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
98 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
100 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
101 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
102 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
103 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
104 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
105 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
106 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
107 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
109 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
110 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
111 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
112 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
113 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
114 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
115 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
116 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
118 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
119 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
120 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
121 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
122 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
123 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
124 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
125 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
127 #ifdef _WIN64
129 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg());
130 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1));
131 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2));
132 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3));
133 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4));
134 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5));
135 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6));
136 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7));
138 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg());
139 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1));
140 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2));
141 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3));
142 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4));
143 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5));
144 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6));
145 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7));
147 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg());
148 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1));
149 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2));
150 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3));
151 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4));
152 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5));
153 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6));
154 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7));
156 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg());
157 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1));
158 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2));
159 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3));
160 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4));
161 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5));
162 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6));
163 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7));
165 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
166 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1));
167 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2));
168 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3));
169 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4));
170 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5));
171 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6));
172 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7));
174 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
175 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1));
176 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2));
177 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3));
178 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4));
179 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5));
180 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6));
181 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7));
183 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
184 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1));
185 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2));
186 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3));
187 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4));
188 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5));
189 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6));
190 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7));
192 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
193 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1));
194 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2));
195 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3));
196 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4));
197 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5));
198 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6));
199 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7));
201 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
202 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1));
203 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2));
204 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3));
205 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4));
206 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5));
207 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6));
208 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7));
210 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
211 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1));
212 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2));
213 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3));
214 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4));
215 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5));
216 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6));
217 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7));
219 #else // _WIN64
221 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
222 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
223 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
224 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
225 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
226 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
227 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
228 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
230 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
231 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
232 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
233 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
234 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
235 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
236 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
237 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
239 #ifdef _LP64
241 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
242 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
243 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
244 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
245 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
246 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
247 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
248 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
250 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
251 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
252 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
253 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
254 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
255 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
256 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
257 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
259 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
260 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
261 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
262 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
263 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
264 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
265 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
266 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
268 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
269 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
270 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
271 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
272 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
273 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
274 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
275 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
277 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
278 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
279 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
280 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
281 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
282 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
283 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
284 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
286 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
287 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
288 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
289 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
290 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
291 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
292 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
293 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
295 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
296 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
297 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
298 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
299 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
300 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
301 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
302 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
304 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
305 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
306 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
307 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
308 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
309 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
310 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
311 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
313 #endif // _LP64
315 #endif // _WIN64
317 #ifdef _LP64
318 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
319 #else
320 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
321 #endif // _LP64
323 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
324 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
325 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
326 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
327 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
328 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
329 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
330 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h
331 #ifdef _LP64
332 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
333 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
334 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
335 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
336 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
337 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
338 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
339 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
340 #endif
341 );
343 // flags allocation class should be last.
344 alloc_class chunk2(RFLAGS);
346 // Singleton class for condition codes
347 reg_class int_flags(RFLAGS);
349 // Class for all float registers
350 reg_class float_reg(XMM0,
351 XMM1,
352 XMM2,
353 XMM3,
354 XMM4,
355 XMM5,
356 XMM6,
357 XMM7
358 #ifdef _LP64
359 ,XMM8,
360 XMM9,
361 XMM10,
362 XMM11,
363 XMM12,
364 XMM13,
365 XMM14,
366 XMM15
367 #endif
368 );
370 // Class for all double registers
371 reg_class double_reg(XMM0, XMM0b,
372 XMM1, XMM1b,
373 XMM2, XMM2b,
374 XMM3, XMM3b,
375 XMM4, XMM4b,
376 XMM5, XMM5b,
377 XMM6, XMM6b,
378 XMM7, XMM7b
379 #ifdef _LP64
380 ,XMM8, XMM8b,
381 XMM9, XMM9b,
382 XMM10, XMM10b,
383 XMM11, XMM11b,
384 XMM12, XMM12b,
385 XMM13, XMM13b,
386 XMM14, XMM14b,
387 XMM15, XMM15b
388 #endif
389 );
391 // Class for all 32bit vector registers
392 reg_class vectors_reg(XMM0,
393 XMM1,
394 XMM2,
395 XMM3,
396 XMM4,
397 XMM5,
398 XMM6,
399 XMM7
400 #ifdef _LP64
401 ,XMM8,
402 XMM9,
403 XMM10,
404 XMM11,
405 XMM12,
406 XMM13,
407 XMM14,
408 XMM15
409 #endif
410 );
412 // Class for all 64bit vector registers
413 reg_class vectord_reg(XMM0, XMM0b,
414 XMM1, XMM1b,
415 XMM2, XMM2b,
416 XMM3, XMM3b,
417 XMM4, XMM4b,
418 XMM5, XMM5b,
419 XMM6, XMM6b,
420 XMM7, XMM7b
421 #ifdef _LP64
422 ,XMM8, XMM8b,
423 XMM9, XMM9b,
424 XMM10, XMM10b,
425 XMM11, XMM11b,
426 XMM12, XMM12b,
427 XMM13, XMM13b,
428 XMM14, XMM14b,
429 XMM15, XMM15b
430 #endif
431 );
433 // Class for all 128bit vector registers
434 reg_class vectorx_reg(XMM0, XMM0b, XMM0c, XMM0d,
435 XMM1, XMM1b, XMM1c, XMM1d,
436 XMM2, XMM2b, XMM2c, XMM2d,
437 XMM3, XMM3b, XMM3c, XMM3d,
438 XMM4, XMM4b, XMM4c, XMM4d,
439 XMM5, XMM5b, XMM5c, XMM5d,
440 XMM6, XMM6b, XMM6c, XMM6d,
441 XMM7, XMM7b, XMM7c, XMM7d
442 #ifdef _LP64
443 ,XMM8, XMM8b, XMM8c, XMM8d,
444 XMM9, XMM9b, XMM9c, XMM9d,
445 XMM10, XMM10b, XMM10c, XMM10d,
446 XMM11, XMM11b, XMM11c, XMM11d,
447 XMM12, XMM12b, XMM12c, XMM12d,
448 XMM13, XMM13b, XMM13c, XMM13d,
449 XMM14, XMM14b, XMM14c, XMM14d,
450 XMM15, XMM15b, XMM15c, XMM15d
451 #endif
452 );
454 // Class for all 256bit vector registers
455 reg_class vectory_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
456 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
457 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
458 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
459 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
460 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
461 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
462 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h
463 #ifdef _LP64
464 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
465 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
466 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
467 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
468 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
469 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
470 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
471 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
472 #endif
473 );
475 %}
477 source %{
478 // Float masks come from different places depending on platform.
479 #ifdef _LP64
480 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
481 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
482 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
483 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
484 #else
485 static address float_signmask() { return (address)float_signmask_pool; }
486 static address float_signflip() { return (address)float_signflip_pool; }
487 static address double_signmask() { return (address)double_signmask_pool; }
488 static address double_signflip() { return (address)double_signflip_pool; }
489 #endif
492 const bool Matcher::match_rule_supported(int opcode) {
493 if (!has_match_rule(opcode))
494 return false;
496 switch (opcode) {
497 case Op_PopCountI:
498 case Op_PopCountL:
499 if (!UsePopCountInstruction)
500 return false;
501 break;
502 case Op_MulVI:
503 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
504 return false;
505 break;
506 case Op_CompareAndSwapL:
507 #ifdef _LP64
508 case Op_CompareAndSwapP:
509 #endif
510 if (!VM_Version::supports_cx8())
511 return false;
512 break;
513 }
515 return true; // Per default match rules are supported.
516 }
518 // Max vector size in bytes. 0 if not supported.
519 const int Matcher::vector_width_in_bytes(BasicType bt) {
520 assert(is_java_primitive(bt), "only primitive type vectors");
521 if (UseSSE < 2) return 0;
522 // SSE2 supports 128bit vectors for all types.
523 // AVX2 supports 256bit vectors for all types.
524 int size = (UseAVX > 1) ? 32 : 16;
525 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
526 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
527 size = 32;
528 // Use flag to limit vector size.
529 size = MIN2(size,(int)MaxVectorSize);
530 // Minimum 2 values in vector (or 4 for bytes).
531 switch (bt) {
532 case T_DOUBLE:
533 case T_LONG:
534 if (size < 16) return 0;
535 case T_FLOAT:
536 case T_INT:
537 if (size < 8) return 0;
538 case T_BOOLEAN:
539 case T_BYTE:
540 case T_CHAR:
541 case T_SHORT:
542 if (size < 4) return 0;
543 break;
544 default:
545 ShouldNotReachHere();
546 }
547 return size;
548 }
550 // Limits on vector size (number of elements) loaded into vector.
551 const int Matcher::max_vector_size(const BasicType bt) {
552 return vector_width_in_bytes(bt)/type2aelembytes(bt);
553 }
554 const int Matcher::min_vector_size(const BasicType bt) {
555 int max_size = max_vector_size(bt);
556 // Min size which can be loaded into vector is 4 bytes.
557 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
558 return MIN2(size,max_size);
559 }
561 // Vector ideal reg corresponding to specidied size in bytes
562 const int Matcher::vector_ideal_reg(int size) {
563 assert(MaxVectorSize >= size, "");
564 switch(size) {
565 case 4: return Op_VecS;
566 case 8: return Op_VecD;
567 case 16: return Op_VecX;
568 case 32: return Op_VecY;
569 }
570 ShouldNotReachHere();
571 return 0;
572 }
574 // Only lowest bits of xmm reg are used for vector shift count.
575 const int Matcher::vector_shift_count_ideal_reg(int size) {
576 return Op_VecS;
577 }
579 // x86 supports misaligned vectors store/load.
580 const bool Matcher::misaligned_vectors_ok() {
581 return !AlignVector; // can be changed by flag
582 }
584 // x86 AES instructions are compatible with SunJCE expanded
585 // keys, hence we do not need to pass the original key to stubs
586 const bool Matcher::pass_original_key_for_aes() {
587 return false;
588 }
590 // Helper methods for MachSpillCopyNode::implementation().
591 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
592 int src_hi, int dst_hi, uint ireg, outputStream* st) {
593 // In 64-bit VM size calculation is very complex. Emitting instructions
594 // into scratch buffer is used to get size in 64-bit VM.
595 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
596 assert(ireg == Op_VecS || // 32bit vector
597 (src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
598 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
599 "no non-adjacent vector moves" );
600 if (cbuf) {
601 MacroAssembler _masm(cbuf);
602 int offset = __ offset();
603 switch (ireg) {
604 case Op_VecS: // copy whole register
605 case Op_VecD:
606 case Op_VecX:
607 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
608 break;
609 case Op_VecY:
610 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
611 break;
612 default:
613 ShouldNotReachHere();
614 }
615 int size = __ offset() - offset;
616 #ifdef ASSERT
617 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
618 assert(!do_size || size == 4, "incorrect size calculattion");
619 #endif
620 return size;
621 #ifndef PRODUCT
622 } else if (!do_size) {
623 switch (ireg) {
624 case Op_VecS:
625 case Op_VecD:
626 case Op_VecX:
627 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
628 break;
629 case Op_VecY:
630 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
631 break;
632 default:
633 ShouldNotReachHere();
634 }
635 #endif
636 }
637 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
638 return 4;
639 }
641 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
642 int stack_offset, int reg, uint ireg, outputStream* st) {
643 // In 64-bit VM size calculation is very complex. Emitting instructions
644 // into scratch buffer is used to get size in 64-bit VM.
645 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
646 if (cbuf) {
647 MacroAssembler _masm(cbuf);
648 int offset = __ offset();
649 if (is_load) {
650 switch (ireg) {
651 case Op_VecS:
652 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
653 break;
654 case Op_VecD:
655 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
656 break;
657 case Op_VecX:
658 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
659 break;
660 case Op_VecY:
661 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
662 break;
663 default:
664 ShouldNotReachHere();
665 }
666 } else { // store
667 switch (ireg) {
668 case Op_VecS:
669 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
670 break;
671 case Op_VecD:
672 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
673 break;
674 case Op_VecX:
675 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
676 break;
677 case Op_VecY:
678 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
679 break;
680 default:
681 ShouldNotReachHere();
682 }
683 }
684 int size = __ offset() - offset;
685 #ifdef ASSERT
686 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
687 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
688 assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
689 #endif
690 return size;
691 #ifndef PRODUCT
692 } else if (!do_size) {
693 if (is_load) {
694 switch (ireg) {
695 case Op_VecS:
696 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
697 break;
698 case Op_VecD:
699 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
700 break;
701 case Op_VecX:
702 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
703 break;
704 case Op_VecY:
705 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
706 break;
707 default:
708 ShouldNotReachHere();
709 }
710 } else { // store
711 switch (ireg) {
712 case Op_VecS:
713 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
714 break;
715 case Op_VecD:
716 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
717 break;
718 case Op_VecX:
719 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
720 break;
721 case Op_VecY:
722 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
723 break;
724 default:
725 ShouldNotReachHere();
726 }
727 }
728 #endif
729 }
730 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
731 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
732 return 5+offset_size;
733 }
735 static inline jfloat replicate4_imm(int con, int width) {
736 // Load a constant of "width" (in bytes) and replicate it to fill 32bit.
737 assert(width == 1 || width == 2, "only byte or short types here");
738 int bit_width = width * 8;
739 jint val = con;
740 val &= (1 << bit_width) - 1; // mask off sign bits
741 while(bit_width < 32) {
742 val |= (val << bit_width);
743 bit_width <<= 1;
744 }
745 jfloat fval = *((jfloat*) &val); // coerce to float type
746 return fval;
747 }
749 static inline jdouble replicate8_imm(int con, int width) {
750 // Load a constant of "width" (in bytes) and replicate it to fill 64bit.
751 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here");
752 int bit_width = width * 8;
753 jlong val = con;
754 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits
755 while(bit_width < 64) {
756 val |= (val << bit_width);
757 bit_width <<= 1;
758 }
759 jdouble dval = *((jdouble*) &val); // coerce to double type
760 return dval;
761 }
763 #ifndef PRODUCT
764 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
765 st->print("nop \t# %d bytes pad for loops and calls", _count);
766 }
767 #endif
769 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
770 MacroAssembler _masm(&cbuf);
771 __ nop(_count);
772 }
774 uint MachNopNode::size(PhaseRegAlloc*) const {
775 return _count;
776 }
778 #ifndef PRODUCT
779 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
780 st->print("# breakpoint");
781 }
782 #endif
784 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
785 MacroAssembler _masm(&cbuf);
786 __ int3();
787 }
789 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
790 return MachNode::size(ra_);
791 }
793 %}
795 encode %{
797 enc_class preserve_SP %{
798 debug_only(int off0 = cbuf.insts_size());
799 MacroAssembler _masm(&cbuf);
800 // RBP is preserved across all calls, even compiled calls.
801 // Use it to preserve RSP in places where the callee might change the SP.
802 __ movptr(rbp_mh_SP_save, rsp);
803 debug_only(int off1 = cbuf.insts_size());
804 assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
805 %}
807 enc_class restore_SP %{
808 MacroAssembler _masm(&cbuf);
809 __ movptr(rsp, rbp_mh_SP_save);
810 %}
812 enc_class call_epilog %{
813 if (VerifyStackAtCalls) {
814 // Check that stack depth is unchanged: find majik cookie on stack
815 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
816 MacroAssembler _masm(&cbuf);
817 Label L;
818 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
819 __ jccb(Assembler::equal, L);
820 // Die if stack mismatch
821 __ int3();
822 __ bind(L);
823 }
824 %}
826 %}
829 //----------OPERANDS-----------------------------------------------------------
830 // Operand definitions must precede instruction definitions for correct parsing
831 // in the ADLC because operands constitute user defined types which are used in
832 // instruction definitions.
834 // Vectors
835 operand vecS() %{
836 constraint(ALLOC_IN_RC(vectors_reg));
837 match(VecS);
839 format %{ %}
840 interface(REG_INTER);
841 %}
843 operand vecD() %{
844 constraint(ALLOC_IN_RC(vectord_reg));
845 match(VecD);
847 format %{ %}
848 interface(REG_INTER);
849 %}
851 operand vecX() %{
852 constraint(ALLOC_IN_RC(vectorx_reg));
853 match(VecX);
855 format %{ %}
856 interface(REG_INTER);
857 %}
859 operand vecY() %{
860 constraint(ALLOC_IN_RC(vectory_reg));
861 match(VecY);
863 format %{ %}
864 interface(REG_INTER);
865 %}
868 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
870 // ============================================================================
872 instruct ShouldNotReachHere() %{
873 match(Halt);
874 format %{ "int3\t# ShouldNotReachHere" %}
875 ins_encode %{
876 __ int3();
877 %}
878 ins_pipe(pipe_slow);
879 %}
881 // ============================================================================
883 instruct addF_reg(regF dst, regF src) %{
884 predicate((UseSSE>=1) && (UseAVX == 0));
885 match(Set dst (AddF dst src));
887 format %{ "addss $dst, $src" %}
888 ins_cost(150);
889 ins_encode %{
890 __ addss($dst$$XMMRegister, $src$$XMMRegister);
891 %}
892 ins_pipe(pipe_slow);
893 %}
895 instruct addF_mem(regF dst, memory src) %{
896 predicate((UseSSE>=1) && (UseAVX == 0));
897 match(Set dst (AddF dst (LoadF src)));
899 format %{ "addss $dst, $src" %}
900 ins_cost(150);
901 ins_encode %{
902 __ addss($dst$$XMMRegister, $src$$Address);
903 %}
904 ins_pipe(pipe_slow);
905 %}
907 instruct addF_imm(regF dst, immF con) %{
908 predicate((UseSSE>=1) && (UseAVX == 0));
909 match(Set dst (AddF dst con));
910 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
911 ins_cost(150);
912 ins_encode %{
913 __ addss($dst$$XMMRegister, $constantaddress($con));
914 %}
915 ins_pipe(pipe_slow);
916 %}
918 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
919 predicate(UseAVX > 0);
920 match(Set dst (AddF src1 src2));
922 format %{ "vaddss $dst, $src1, $src2" %}
923 ins_cost(150);
924 ins_encode %{
925 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
926 %}
927 ins_pipe(pipe_slow);
928 %}
930 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
931 predicate(UseAVX > 0);
932 match(Set dst (AddF src1 (LoadF src2)));
934 format %{ "vaddss $dst, $src1, $src2" %}
935 ins_cost(150);
936 ins_encode %{
937 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
938 %}
939 ins_pipe(pipe_slow);
940 %}
942 instruct addF_reg_imm(regF dst, regF src, immF con) %{
943 predicate(UseAVX > 0);
944 match(Set dst (AddF src con));
946 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
947 ins_cost(150);
948 ins_encode %{
949 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
950 %}
951 ins_pipe(pipe_slow);
952 %}
954 instruct addD_reg(regD dst, regD src) %{
955 predicate((UseSSE>=2) && (UseAVX == 0));
956 match(Set dst (AddD dst src));
958 format %{ "addsd $dst, $src" %}
959 ins_cost(150);
960 ins_encode %{
961 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
962 %}
963 ins_pipe(pipe_slow);
964 %}
966 instruct addD_mem(regD dst, memory src) %{
967 predicate((UseSSE>=2) && (UseAVX == 0));
968 match(Set dst (AddD dst (LoadD src)));
970 format %{ "addsd $dst, $src" %}
971 ins_cost(150);
972 ins_encode %{
973 __ addsd($dst$$XMMRegister, $src$$Address);
974 %}
975 ins_pipe(pipe_slow);
976 %}
978 instruct addD_imm(regD dst, immD con) %{
979 predicate((UseSSE>=2) && (UseAVX == 0));
980 match(Set dst (AddD dst con));
981 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
982 ins_cost(150);
983 ins_encode %{
984 __ addsd($dst$$XMMRegister, $constantaddress($con));
985 %}
986 ins_pipe(pipe_slow);
987 %}
989 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
990 predicate(UseAVX > 0);
991 match(Set dst (AddD src1 src2));
993 format %{ "vaddsd $dst, $src1, $src2" %}
994 ins_cost(150);
995 ins_encode %{
996 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
997 %}
998 ins_pipe(pipe_slow);
999 %}
1001 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
1002 predicate(UseAVX > 0);
1003 match(Set dst (AddD src1 (LoadD src2)));
1005 format %{ "vaddsd $dst, $src1, $src2" %}
1006 ins_cost(150);
1007 ins_encode %{
1008 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1009 %}
1010 ins_pipe(pipe_slow);
1011 %}
1013 instruct addD_reg_imm(regD dst, regD src, immD con) %{
1014 predicate(UseAVX > 0);
1015 match(Set dst (AddD src con));
1017 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
1018 ins_cost(150);
1019 ins_encode %{
1020 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1021 %}
1022 ins_pipe(pipe_slow);
1023 %}
1025 instruct subF_reg(regF dst, regF src) %{
1026 predicate((UseSSE>=1) && (UseAVX == 0));
1027 match(Set dst (SubF dst src));
1029 format %{ "subss $dst, $src" %}
1030 ins_cost(150);
1031 ins_encode %{
1032 __ subss($dst$$XMMRegister, $src$$XMMRegister);
1033 %}
1034 ins_pipe(pipe_slow);
1035 %}
1037 instruct subF_mem(regF dst, memory src) %{
1038 predicate((UseSSE>=1) && (UseAVX == 0));
1039 match(Set dst (SubF dst (LoadF src)));
1041 format %{ "subss $dst, $src" %}
1042 ins_cost(150);
1043 ins_encode %{
1044 __ subss($dst$$XMMRegister, $src$$Address);
1045 %}
1046 ins_pipe(pipe_slow);
1047 %}
1049 instruct subF_imm(regF dst, immF con) %{
1050 predicate((UseSSE>=1) && (UseAVX == 0));
1051 match(Set dst (SubF dst con));
1052 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
1053 ins_cost(150);
1054 ins_encode %{
1055 __ subss($dst$$XMMRegister, $constantaddress($con));
1056 %}
1057 ins_pipe(pipe_slow);
1058 %}
1060 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
1061 predicate(UseAVX > 0);
1062 match(Set dst (SubF src1 src2));
1064 format %{ "vsubss $dst, $src1, $src2" %}
1065 ins_cost(150);
1066 ins_encode %{
1067 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1068 %}
1069 ins_pipe(pipe_slow);
1070 %}
1072 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
1073 predicate(UseAVX > 0);
1074 match(Set dst (SubF src1 (LoadF src2)));
1076 format %{ "vsubss $dst, $src1, $src2" %}
1077 ins_cost(150);
1078 ins_encode %{
1079 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1080 %}
1081 ins_pipe(pipe_slow);
1082 %}
1084 instruct subF_reg_imm(regF dst, regF src, immF con) %{
1085 predicate(UseAVX > 0);
1086 match(Set dst (SubF src con));
1088 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
1089 ins_cost(150);
1090 ins_encode %{
1091 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1092 %}
1093 ins_pipe(pipe_slow);
1094 %}
1096 instruct subD_reg(regD dst, regD src) %{
1097 predicate((UseSSE>=2) && (UseAVX == 0));
1098 match(Set dst (SubD dst src));
1100 format %{ "subsd $dst, $src" %}
1101 ins_cost(150);
1102 ins_encode %{
1103 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
1104 %}
1105 ins_pipe(pipe_slow);
1106 %}
1108 instruct subD_mem(regD dst, memory src) %{
1109 predicate((UseSSE>=2) && (UseAVX == 0));
1110 match(Set dst (SubD dst (LoadD src)));
1112 format %{ "subsd $dst, $src" %}
1113 ins_cost(150);
1114 ins_encode %{
1115 __ subsd($dst$$XMMRegister, $src$$Address);
1116 %}
1117 ins_pipe(pipe_slow);
1118 %}
1120 instruct subD_imm(regD dst, immD con) %{
1121 predicate((UseSSE>=2) && (UseAVX == 0));
1122 match(Set dst (SubD dst con));
1123 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
1124 ins_cost(150);
1125 ins_encode %{
1126 __ subsd($dst$$XMMRegister, $constantaddress($con));
1127 %}
1128 ins_pipe(pipe_slow);
1129 %}
1131 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
1132 predicate(UseAVX > 0);
1133 match(Set dst (SubD src1 src2));
1135 format %{ "vsubsd $dst, $src1, $src2" %}
1136 ins_cost(150);
1137 ins_encode %{
1138 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1139 %}
1140 ins_pipe(pipe_slow);
1141 %}
1143 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
1144 predicate(UseAVX > 0);
1145 match(Set dst (SubD src1 (LoadD src2)));
1147 format %{ "vsubsd $dst, $src1, $src2" %}
1148 ins_cost(150);
1149 ins_encode %{
1150 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1151 %}
1152 ins_pipe(pipe_slow);
1153 %}
1155 instruct subD_reg_imm(regD dst, regD src, immD con) %{
1156 predicate(UseAVX > 0);
1157 match(Set dst (SubD src con));
1159 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
1160 ins_cost(150);
1161 ins_encode %{
1162 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1163 %}
1164 ins_pipe(pipe_slow);
1165 %}
1167 instruct mulF_reg(regF dst, regF src) %{
1168 predicate((UseSSE>=1) && (UseAVX == 0));
1169 match(Set dst (MulF dst src));
1171 format %{ "mulss $dst, $src" %}
1172 ins_cost(150);
1173 ins_encode %{
1174 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
1175 %}
1176 ins_pipe(pipe_slow);
1177 %}
1179 instruct mulF_mem(regF dst, memory src) %{
1180 predicate((UseSSE>=1) && (UseAVX == 0));
1181 match(Set dst (MulF dst (LoadF src)));
1183 format %{ "mulss $dst, $src" %}
1184 ins_cost(150);
1185 ins_encode %{
1186 __ mulss($dst$$XMMRegister, $src$$Address);
1187 %}
1188 ins_pipe(pipe_slow);
1189 %}
1191 instruct mulF_imm(regF dst, immF con) %{
1192 predicate((UseSSE>=1) && (UseAVX == 0));
1193 match(Set dst (MulF dst con));
1194 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
1195 ins_cost(150);
1196 ins_encode %{
1197 __ mulss($dst$$XMMRegister, $constantaddress($con));
1198 %}
1199 ins_pipe(pipe_slow);
1200 %}
1202 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
1203 predicate(UseAVX > 0);
1204 match(Set dst (MulF src1 src2));
1206 format %{ "vmulss $dst, $src1, $src2" %}
1207 ins_cost(150);
1208 ins_encode %{
1209 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1210 %}
1211 ins_pipe(pipe_slow);
1212 %}
1214 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
1215 predicate(UseAVX > 0);
1216 match(Set dst (MulF src1 (LoadF src2)));
1218 format %{ "vmulss $dst, $src1, $src2" %}
1219 ins_cost(150);
1220 ins_encode %{
1221 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1222 %}
1223 ins_pipe(pipe_slow);
1224 %}
1226 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
1227 predicate(UseAVX > 0);
1228 match(Set dst (MulF src con));
1230 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
1231 ins_cost(150);
1232 ins_encode %{
1233 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1234 %}
1235 ins_pipe(pipe_slow);
1236 %}
1238 instruct mulD_reg(regD dst, regD src) %{
1239 predicate((UseSSE>=2) && (UseAVX == 0));
1240 match(Set dst (MulD dst src));
1242 format %{ "mulsd $dst, $src" %}
1243 ins_cost(150);
1244 ins_encode %{
1245 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
1246 %}
1247 ins_pipe(pipe_slow);
1248 %}
1250 instruct mulD_mem(regD dst, memory src) %{
1251 predicate((UseSSE>=2) && (UseAVX == 0));
1252 match(Set dst (MulD dst (LoadD src)));
1254 format %{ "mulsd $dst, $src" %}
1255 ins_cost(150);
1256 ins_encode %{
1257 __ mulsd($dst$$XMMRegister, $src$$Address);
1258 %}
1259 ins_pipe(pipe_slow);
1260 %}
1262 instruct mulD_imm(regD dst, immD con) %{
1263 predicate((UseSSE>=2) && (UseAVX == 0));
1264 match(Set dst (MulD dst con));
1265 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
1266 ins_cost(150);
1267 ins_encode %{
1268 __ mulsd($dst$$XMMRegister, $constantaddress($con));
1269 %}
1270 ins_pipe(pipe_slow);
1271 %}
1273 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
1274 predicate(UseAVX > 0);
1275 match(Set dst (MulD src1 src2));
1277 format %{ "vmulsd $dst, $src1, $src2" %}
1278 ins_cost(150);
1279 ins_encode %{
1280 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1281 %}
1282 ins_pipe(pipe_slow);
1283 %}
1285 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
1286 predicate(UseAVX > 0);
1287 match(Set dst (MulD src1 (LoadD src2)));
1289 format %{ "vmulsd $dst, $src1, $src2" %}
1290 ins_cost(150);
1291 ins_encode %{
1292 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1293 %}
1294 ins_pipe(pipe_slow);
1295 %}
1297 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
1298 predicate(UseAVX > 0);
1299 match(Set dst (MulD src con));
1301 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
1302 ins_cost(150);
1303 ins_encode %{
1304 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1305 %}
1306 ins_pipe(pipe_slow);
1307 %}
1309 instruct divF_reg(regF dst, regF src) %{
1310 predicate((UseSSE>=1) && (UseAVX == 0));
1311 match(Set dst (DivF dst src));
1313 format %{ "divss $dst, $src" %}
1314 ins_cost(150);
1315 ins_encode %{
1316 __ divss($dst$$XMMRegister, $src$$XMMRegister);
1317 %}
1318 ins_pipe(pipe_slow);
1319 %}
1321 instruct divF_mem(regF dst, memory src) %{
1322 predicate((UseSSE>=1) && (UseAVX == 0));
1323 match(Set dst (DivF dst (LoadF src)));
1325 format %{ "divss $dst, $src" %}
1326 ins_cost(150);
1327 ins_encode %{
1328 __ divss($dst$$XMMRegister, $src$$Address);
1329 %}
1330 ins_pipe(pipe_slow);
1331 %}
1333 instruct divF_imm(regF dst, immF con) %{
1334 predicate((UseSSE>=1) && (UseAVX == 0));
1335 match(Set dst (DivF dst con));
1336 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
1337 ins_cost(150);
1338 ins_encode %{
1339 __ divss($dst$$XMMRegister, $constantaddress($con));
1340 %}
1341 ins_pipe(pipe_slow);
1342 %}
1344 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
1345 predicate(UseAVX > 0);
1346 match(Set dst (DivF src1 src2));
1348 format %{ "vdivss $dst, $src1, $src2" %}
1349 ins_cost(150);
1350 ins_encode %{
1351 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1352 %}
1353 ins_pipe(pipe_slow);
1354 %}
1356 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
1357 predicate(UseAVX > 0);
1358 match(Set dst (DivF src1 (LoadF src2)));
1360 format %{ "vdivss $dst, $src1, $src2" %}
1361 ins_cost(150);
1362 ins_encode %{
1363 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1364 %}
1365 ins_pipe(pipe_slow);
1366 %}
1368 instruct divF_reg_imm(regF dst, regF src, immF con) %{
1369 predicate(UseAVX > 0);
1370 match(Set dst (DivF src con));
1372 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
1373 ins_cost(150);
1374 ins_encode %{
1375 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1376 %}
1377 ins_pipe(pipe_slow);
1378 %}
1380 instruct divD_reg(regD dst, regD src) %{
1381 predicate((UseSSE>=2) && (UseAVX == 0));
1382 match(Set dst (DivD dst src));
1384 format %{ "divsd $dst, $src" %}
1385 ins_cost(150);
1386 ins_encode %{
1387 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
1388 %}
1389 ins_pipe(pipe_slow);
1390 %}
1392 instruct divD_mem(regD dst, memory src) %{
1393 predicate((UseSSE>=2) && (UseAVX == 0));
1394 match(Set dst (DivD dst (LoadD src)));
1396 format %{ "divsd $dst, $src" %}
1397 ins_cost(150);
1398 ins_encode %{
1399 __ divsd($dst$$XMMRegister, $src$$Address);
1400 %}
1401 ins_pipe(pipe_slow);
1402 %}
1404 instruct divD_imm(regD dst, immD con) %{
1405 predicate((UseSSE>=2) && (UseAVX == 0));
1406 match(Set dst (DivD dst con));
1407 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
1408 ins_cost(150);
1409 ins_encode %{
1410 __ divsd($dst$$XMMRegister, $constantaddress($con));
1411 %}
1412 ins_pipe(pipe_slow);
1413 %}
1415 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
1416 predicate(UseAVX > 0);
1417 match(Set dst (DivD src1 src2));
1419 format %{ "vdivsd $dst, $src1, $src2" %}
1420 ins_cost(150);
1421 ins_encode %{
1422 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1423 %}
1424 ins_pipe(pipe_slow);
1425 %}
1427 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
1428 predicate(UseAVX > 0);
1429 match(Set dst (DivD src1 (LoadD src2)));
1431 format %{ "vdivsd $dst, $src1, $src2" %}
1432 ins_cost(150);
1433 ins_encode %{
1434 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1435 %}
1436 ins_pipe(pipe_slow);
1437 %}
1439 instruct divD_reg_imm(regD dst, regD src, immD con) %{
1440 predicate(UseAVX > 0);
1441 match(Set dst (DivD src con));
1443 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
1444 ins_cost(150);
1445 ins_encode %{
1446 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1447 %}
1448 ins_pipe(pipe_slow);
1449 %}
1451 instruct absF_reg(regF dst) %{
1452 predicate((UseSSE>=1) && (UseAVX == 0));
1453 match(Set dst (AbsF dst));
1454 ins_cost(150);
1455 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
1456 ins_encode %{
1457 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
1458 %}
1459 ins_pipe(pipe_slow);
1460 %}
1462 instruct absF_reg_reg(regF dst, regF src) %{
1463 predicate(UseAVX > 0);
1464 match(Set dst (AbsF src));
1465 ins_cost(150);
1466 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
1467 ins_encode %{
1468 bool vector256 = false;
1469 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
1470 ExternalAddress(float_signmask()), vector256);
1471 %}
1472 ins_pipe(pipe_slow);
1473 %}
1475 instruct absD_reg(regD dst) %{
1476 predicate((UseSSE>=2) && (UseAVX == 0));
1477 match(Set dst (AbsD dst));
1478 ins_cost(150);
1479 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
1480 "# abs double by sign masking" %}
1481 ins_encode %{
1482 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
1483 %}
1484 ins_pipe(pipe_slow);
1485 %}
1487 instruct absD_reg_reg(regD dst, regD src) %{
1488 predicate(UseAVX > 0);
1489 match(Set dst (AbsD src));
1490 ins_cost(150);
1491 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
1492 "# abs double by sign masking" %}
1493 ins_encode %{
1494 bool vector256 = false;
1495 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
1496 ExternalAddress(double_signmask()), vector256);
1497 %}
1498 ins_pipe(pipe_slow);
1499 %}
1501 instruct negF_reg(regF dst) %{
1502 predicate((UseSSE>=1) && (UseAVX == 0));
1503 match(Set dst (NegF dst));
1504 ins_cost(150);
1505 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
1506 ins_encode %{
1507 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
1508 %}
1509 ins_pipe(pipe_slow);
1510 %}
1512 instruct negF_reg_reg(regF dst, regF src) %{
1513 predicate(UseAVX > 0);
1514 match(Set dst (NegF src));
1515 ins_cost(150);
1516 format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
1517 ins_encode %{
1518 bool vector256 = false;
1519 __ vxorps($dst$$XMMRegister, $src$$XMMRegister,
1520 ExternalAddress(float_signflip()), vector256);
1521 %}
1522 ins_pipe(pipe_slow);
1523 %}
1525 instruct negD_reg(regD dst) %{
1526 predicate((UseSSE>=2) && (UseAVX == 0));
1527 match(Set dst (NegD dst));
1528 ins_cost(150);
1529 format %{ "xorpd $dst, [0x8000000000000000]\t"
1530 "# neg double by sign flipping" %}
1531 ins_encode %{
1532 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
1533 %}
1534 ins_pipe(pipe_slow);
1535 %}
1537 instruct negD_reg_reg(regD dst, regD src) %{
1538 predicate(UseAVX > 0);
1539 match(Set dst (NegD src));
1540 ins_cost(150);
1541 format %{ "vxorpd $dst, $src, [0x8000000000000000]\t"
1542 "# neg double by sign flipping" %}
1543 ins_encode %{
1544 bool vector256 = false;
1545 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister,
1546 ExternalAddress(double_signflip()), vector256);
1547 %}
1548 ins_pipe(pipe_slow);
1549 %}
1551 instruct sqrtF_reg(regF dst, regF src) %{
1552 predicate(UseSSE>=1);
1553 match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
1555 format %{ "sqrtss $dst, $src" %}
1556 ins_cost(150);
1557 ins_encode %{
1558 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
1559 %}
1560 ins_pipe(pipe_slow);
1561 %}
1563 instruct sqrtF_mem(regF dst, memory src) %{
1564 predicate(UseSSE>=1);
1565 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
1567 format %{ "sqrtss $dst, $src" %}
1568 ins_cost(150);
1569 ins_encode %{
1570 __ sqrtss($dst$$XMMRegister, $src$$Address);
1571 %}
1572 ins_pipe(pipe_slow);
1573 %}
1575 instruct sqrtF_imm(regF dst, immF con) %{
1576 predicate(UseSSE>=1);
1577 match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
1578 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
1579 ins_cost(150);
1580 ins_encode %{
1581 __ sqrtss($dst$$XMMRegister, $constantaddress($con));
1582 %}
1583 ins_pipe(pipe_slow);
1584 %}
1586 instruct sqrtD_reg(regD dst, regD src) %{
1587 predicate(UseSSE>=2);
1588 match(Set dst (SqrtD src));
1590 format %{ "sqrtsd $dst, $src" %}
1591 ins_cost(150);
1592 ins_encode %{
1593 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
1594 %}
1595 ins_pipe(pipe_slow);
1596 %}
1598 instruct sqrtD_mem(regD dst, memory src) %{
1599 predicate(UseSSE>=2);
1600 match(Set dst (SqrtD (LoadD src)));
1602 format %{ "sqrtsd $dst, $src" %}
1603 ins_cost(150);
1604 ins_encode %{
1605 __ sqrtsd($dst$$XMMRegister, $src$$Address);
1606 %}
1607 ins_pipe(pipe_slow);
1608 %}
1610 instruct sqrtD_imm(regD dst, immD con) %{
1611 predicate(UseSSE>=2);
1612 match(Set dst (SqrtD con));
1613 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
1614 ins_cost(150);
1615 ins_encode %{
1616 __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
1617 %}
1618 ins_pipe(pipe_slow);
1619 %}
1622 // ====================VECTOR INSTRUCTIONS=====================================
1624 // Load vectors (4 bytes long)
1625 instruct loadV4(vecS dst, memory mem) %{
1626 predicate(n->as_LoadVector()->memory_size() == 4);
1627 match(Set dst (LoadVector mem));
1628 ins_cost(125);
1629 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %}
1630 ins_encode %{
1631 __ movdl($dst$$XMMRegister, $mem$$Address);
1632 %}
1633 ins_pipe( pipe_slow );
1634 %}
1636 // Load vectors (8 bytes long)
1637 instruct loadV8(vecD dst, memory mem) %{
1638 predicate(n->as_LoadVector()->memory_size() == 8);
1639 match(Set dst (LoadVector mem));
1640 ins_cost(125);
1641 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %}
1642 ins_encode %{
1643 __ movq($dst$$XMMRegister, $mem$$Address);
1644 %}
1645 ins_pipe( pipe_slow );
1646 %}
1648 // Load vectors (16 bytes long)
1649 instruct loadV16(vecX dst, memory mem) %{
1650 predicate(n->as_LoadVector()->memory_size() == 16);
1651 match(Set dst (LoadVector mem));
1652 ins_cost(125);
1653 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %}
1654 ins_encode %{
1655 __ movdqu($dst$$XMMRegister, $mem$$Address);
1656 %}
1657 ins_pipe( pipe_slow );
1658 %}
1660 // Load vectors (32 bytes long)
1661 instruct loadV32(vecY dst, memory mem) %{
1662 predicate(n->as_LoadVector()->memory_size() == 32);
1663 match(Set dst (LoadVector mem));
1664 ins_cost(125);
1665 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %}
1666 ins_encode %{
1667 __ vmovdqu($dst$$XMMRegister, $mem$$Address);
1668 %}
1669 ins_pipe( pipe_slow );
1670 %}
1672 // Store vectors
1673 instruct storeV4(memory mem, vecS src) %{
1674 predicate(n->as_StoreVector()->memory_size() == 4);
1675 match(Set mem (StoreVector mem src));
1676 ins_cost(145);
1677 format %{ "movd $mem,$src\t! store vector (4 bytes)" %}
1678 ins_encode %{
1679 __ movdl($mem$$Address, $src$$XMMRegister);
1680 %}
1681 ins_pipe( pipe_slow );
1682 %}
1684 instruct storeV8(memory mem, vecD src) %{
1685 predicate(n->as_StoreVector()->memory_size() == 8);
1686 match(Set mem (StoreVector mem src));
1687 ins_cost(145);
1688 format %{ "movq $mem,$src\t! store vector (8 bytes)" %}
1689 ins_encode %{
1690 __ movq($mem$$Address, $src$$XMMRegister);
1691 %}
1692 ins_pipe( pipe_slow );
1693 %}
1695 instruct storeV16(memory mem, vecX src) %{
1696 predicate(n->as_StoreVector()->memory_size() == 16);
1697 match(Set mem (StoreVector mem src));
1698 ins_cost(145);
1699 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %}
1700 ins_encode %{
1701 __ movdqu($mem$$Address, $src$$XMMRegister);
1702 %}
1703 ins_pipe( pipe_slow );
1704 %}
1706 instruct storeV32(memory mem, vecY src) %{
1707 predicate(n->as_StoreVector()->memory_size() == 32);
1708 match(Set mem (StoreVector mem src));
1709 ins_cost(145);
1710 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %}
1711 ins_encode %{
1712 __ vmovdqu($mem$$Address, $src$$XMMRegister);
1713 %}
1714 ins_pipe( pipe_slow );
1715 %}
1717 // Replicate byte scalar to be vector
1718 instruct Repl4B(vecS dst, rRegI src) %{
1719 predicate(n->as_Vector()->length() == 4);
1720 match(Set dst (ReplicateB src));
1721 format %{ "movd $dst,$src\n\t"
1722 "punpcklbw $dst,$dst\n\t"
1723 "pshuflw $dst,$dst,0x00\t! replicate4B" %}
1724 ins_encode %{
1725 __ movdl($dst$$XMMRegister, $src$$Register);
1726 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
1727 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1728 %}
1729 ins_pipe( pipe_slow );
1730 %}
1732 instruct Repl8B(vecD dst, rRegI src) %{
1733 predicate(n->as_Vector()->length() == 8);
1734 match(Set dst (ReplicateB src));
1735 format %{ "movd $dst,$src\n\t"
1736 "punpcklbw $dst,$dst\n\t"
1737 "pshuflw $dst,$dst,0x00\t! replicate8B" %}
1738 ins_encode %{
1739 __ movdl($dst$$XMMRegister, $src$$Register);
1740 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
1741 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1742 %}
1743 ins_pipe( pipe_slow );
1744 %}
1746 instruct Repl16B(vecX dst, rRegI src) %{
1747 predicate(n->as_Vector()->length() == 16);
1748 match(Set dst (ReplicateB src));
1749 format %{ "movd $dst,$src\n\t"
1750 "punpcklbw $dst,$dst\n\t"
1751 "pshuflw $dst,$dst,0x00\n\t"
1752 "punpcklqdq $dst,$dst\t! replicate16B" %}
1753 ins_encode %{
1754 __ movdl($dst$$XMMRegister, $src$$Register);
1755 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
1756 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1757 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1758 %}
1759 ins_pipe( pipe_slow );
1760 %}
1762 instruct Repl32B(vecY dst, rRegI src) %{
1763 predicate(n->as_Vector()->length() == 32);
1764 match(Set dst (ReplicateB src));
1765 format %{ "movd $dst,$src\n\t"
1766 "punpcklbw $dst,$dst\n\t"
1767 "pshuflw $dst,$dst,0x00\n\t"
1768 "punpcklqdq $dst,$dst\n\t"
1769 "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
1770 ins_encode %{
1771 __ movdl($dst$$XMMRegister, $src$$Register);
1772 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
1773 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1774 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1775 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
1776 %}
1777 ins_pipe( pipe_slow );
1778 %}
1780 // Replicate byte scalar immediate to be vector by loading from const table.
1781 instruct Repl4B_imm(vecS dst, immI con) %{
1782 predicate(n->as_Vector()->length() == 4);
1783 match(Set dst (ReplicateB con));
1784 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %}
1785 ins_encode %{
1786 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1)));
1787 %}
1788 ins_pipe( pipe_slow );
1789 %}
1791 instruct Repl8B_imm(vecD dst, immI con) %{
1792 predicate(n->as_Vector()->length() == 8);
1793 match(Set dst (ReplicateB con));
1794 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %}
1795 ins_encode %{
1796 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
1797 %}
1798 ins_pipe( pipe_slow );
1799 %}
1801 instruct Repl16B_imm(vecX dst, immI con) %{
1802 predicate(n->as_Vector()->length() == 16);
1803 match(Set dst (ReplicateB con));
1804 format %{ "movq $dst,[$constantaddress]\n\t"
1805 "punpcklqdq $dst,$dst\t! replicate16B($con)" %}
1806 ins_encode %{
1807 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
1808 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1809 %}
1810 ins_pipe( pipe_slow );
1811 %}
1813 instruct Repl32B_imm(vecY dst, immI con) %{
1814 predicate(n->as_Vector()->length() == 32);
1815 match(Set dst (ReplicateB con));
1816 format %{ "movq $dst,[$constantaddress]\n\t"
1817 "punpcklqdq $dst,$dst\n\t"
1818 "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %}
1819 ins_encode %{
1820 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
1821 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1822 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
1823 %}
1824 ins_pipe( pipe_slow );
1825 %}
1827 // Replicate byte scalar zero to be vector
1828 instruct Repl4B_zero(vecS dst, immI0 zero) %{
1829 predicate(n->as_Vector()->length() == 4);
1830 match(Set dst (ReplicateB zero));
1831 format %{ "pxor $dst,$dst\t! replicate4B zero" %}
1832 ins_encode %{
1833 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1834 %}
1835 ins_pipe( fpu_reg_reg );
1836 %}
1838 instruct Repl8B_zero(vecD dst, immI0 zero) %{
1839 predicate(n->as_Vector()->length() == 8);
1840 match(Set dst (ReplicateB zero));
1841 format %{ "pxor $dst,$dst\t! replicate8B zero" %}
1842 ins_encode %{
1843 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1844 %}
1845 ins_pipe( fpu_reg_reg );
1846 %}
1848 instruct Repl16B_zero(vecX dst, immI0 zero) %{
1849 predicate(n->as_Vector()->length() == 16);
1850 match(Set dst (ReplicateB zero));
1851 format %{ "pxor $dst,$dst\t! replicate16B zero" %}
1852 ins_encode %{
1853 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1854 %}
1855 ins_pipe( fpu_reg_reg );
1856 %}
1858 instruct Repl32B_zero(vecY dst, immI0 zero) %{
1859 predicate(n->as_Vector()->length() == 32);
1860 match(Set dst (ReplicateB zero));
1861 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %}
1862 ins_encode %{
1863 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
1864 bool vector256 = true;
1865 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
1866 %}
1867 ins_pipe( fpu_reg_reg );
1868 %}
1870 // Replicate char/short (2 byte) scalar to be vector
1871 instruct Repl2S(vecS dst, rRegI src) %{
1872 predicate(n->as_Vector()->length() == 2);
1873 match(Set dst (ReplicateS src));
1874 format %{ "movd $dst,$src\n\t"
1875 "pshuflw $dst,$dst,0x00\t! replicate2S" %}
1876 ins_encode %{
1877 __ movdl($dst$$XMMRegister, $src$$Register);
1878 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1879 %}
1880 ins_pipe( fpu_reg_reg );
1881 %}
1883 instruct Repl4S(vecD dst, rRegI src) %{
1884 predicate(n->as_Vector()->length() == 4);
1885 match(Set dst (ReplicateS src));
1886 format %{ "movd $dst,$src\n\t"
1887 "pshuflw $dst,$dst,0x00\t! replicate4S" %}
1888 ins_encode %{
1889 __ movdl($dst$$XMMRegister, $src$$Register);
1890 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1891 %}
1892 ins_pipe( fpu_reg_reg );
1893 %}
1895 instruct Repl8S(vecX dst, rRegI src) %{
1896 predicate(n->as_Vector()->length() == 8);
1897 match(Set dst (ReplicateS src));
1898 format %{ "movd $dst,$src\n\t"
1899 "pshuflw $dst,$dst,0x00\n\t"
1900 "punpcklqdq $dst,$dst\t! replicate8S" %}
1901 ins_encode %{
1902 __ movdl($dst$$XMMRegister, $src$$Register);
1903 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1904 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1905 %}
1906 ins_pipe( pipe_slow );
1907 %}
1909 instruct Repl16S(vecY dst, rRegI src) %{
1910 predicate(n->as_Vector()->length() == 16);
1911 match(Set dst (ReplicateS src));
1912 format %{ "movd $dst,$src\n\t"
1913 "pshuflw $dst,$dst,0x00\n\t"
1914 "punpcklqdq $dst,$dst\n\t"
1915 "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
1916 ins_encode %{
1917 __ movdl($dst$$XMMRegister, $src$$Register);
1918 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1919 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1920 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
1921 %}
1922 ins_pipe( pipe_slow );
1923 %}
1925 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table.
1926 instruct Repl2S_imm(vecS dst, immI con) %{
1927 predicate(n->as_Vector()->length() == 2);
1928 match(Set dst (ReplicateS con));
1929 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %}
1930 ins_encode %{
1931 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2)));
1932 %}
1933 ins_pipe( fpu_reg_reg );
1934 %}
1936 instruct Repl4S_imm(vecD dst, immI con) %{
1937 predicate(n->as_Vector()->length() == 4);
1938 match(Set dst (ReplicateS con));
1939 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %}
1940 ins_encode %{
1941 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
1942 %}
1943 ins_pipe( fpu_reg_reg );
1944 %}
1946 instruct Repl8S_imm(vecX dst, immI con) %{
1947 predicate(n->as_Vector()->length() == 8);
1948 match(Set dst (ReplicateS con));
1949 format %{ "movq $dst,[$constantaddress]\n\t"
1950 "punpcklqdq $dst,$dst\t! replicate8S($con)" %}
1951 ins_encode %{
1952 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
1953 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1954 %}
1955 ins_pipe( pipe_slow );
1956 %}
1958 instruct Repl16S_imm(vecY dst, immI con) %{
1959 predicate(n->as_Vector()->length() == 16);
1960 match(Set dst (ReplicateS con));
1961 format %{ "movq $dst,[$constantaddress]\n\t"
1962 "punpcklqdq $dst,$dst\n\t"
1963 "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %}
1964 ins_encode %{
1965 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
1966 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1967 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
1968 %}
1969 ins_pipe( pipe_slow );
1970 %}
1972 // Replicate char/short (2 byte) scalar zero to be vector
1973 instruct Repl2S_zero(vecS dst, immI0 zero) %{
1974 predicate(n->as_Vector()->length() == 2);
1975 match(Set dst (ReplicateS zero));
1976 format %{ "pxor $dst,$dst\t! replicate2S zero" %}
1977 ins_encode %{
1978 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1979 %}
1980 ins_pipe( fpu_reg_reg );
1981 %}
1983 instruct Repl4S_zero(vecD dst, immI0 zero) %{
1984 predicate(n->as_Vector()->length() == 4);
1985 match(Set dst (ReplicateS zero));
1986 format %{ "pxor $dst,$dst\t! replicate4S zero" %}
1987 ins_encode %{
1988 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1989 %}
1990 ins_pipe( fpu_reg_reg );
1991 %}
1993 instruct Repl8S_zero(vecX dst, immI0 zero) %{
1994 predicate(n->as_Vector()->length() == 8);
1995 match(Set dst (ReplicateS zero));
1996 format %{ "pxor $dst,$dst\t! replicate8S zero" %}
1997 ins_encode %{
1998 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1999 %}
2000 ins_pipe( fpu_reg_reg );
2001 %}
2003 instruct Repl16S_zero(vecY dst, immI0 zero) %{
2004 predicate(n->as_Vector()->length() == 16);
2005 match(Set dst (ReplicateS zero));
2006 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %}
2007 ins_encode %{
2008 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
2009 bool vector256 = true;
2010 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2011 %}
2012 ins_pipe( fpu_reg_reg );
2013 %}
2015 // Replicate integer (4 byte) scalar to be vector
2016 instruct Repl2I(vecD dst, rRegI src) %{
2017 predicate(n->as_Vector()->length() == 2);
2018 match(Set dst (ReplicateI src));
2019 format %{ "movd $dst,$src\n\t"
2020 "pshufd $dst,$dst,0x00\t! replicate2I" %}
2021 ins_encode %{
2022 __ movdl($dst$$XMMRegister, $src$$Register);
2023 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2024 %}
2025 ins_pipe( fpu_reg_reg );
2026 %}
2028 instruct Repl4I(vecX dst, rRegI src) %{
2029 predicate(n->as_Vector()->length() == 4);
2030 match(Set dst (ReplicateI src));
2031 format %{ "movd $dst,$src\n\t"
2032 "pshufd $dst,$dst,0x00\t! replicate4I" %}
2033 ins_encode %{
2034 __ movdl($dst$$XMMRegister, $src$$Register);
2035 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2036 %}
2037 ins_pipe( pipe_slow );
2038 %}
2040 instruct Repl8I(vecY dst, rRegI src) %{
2041 predicate(n->as_Vector()->length() == 8);
2042 match(Set dst (ReplicateI src));
2043 format %{ "movd $dst,$src\n\t"
2044 "pshufd $dst,$dst,0x00\n\t"
2045 "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
2046 ins_encode %{
2047 __ movdl($dst$$XMMRegister, $src$$Register);
2048 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2049 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2050 %}
2051 ins_pipe( pipe_slow );
2052 %}
2054 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
2055 instruct Repl2I_imm(vecD dst, immI con) %{
2056 predicate(n->as_Vector()->length() == 2);
2057 match(Set dst (ReplicateI con));
2058 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %}
2059 ins_encode %{
2060 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
2061 %}
2062 ins_pipe( fpu_reg_reg );
2063 %}
2065 instruct Repl4I_imm(vecX dst, immI con) %{
2066 predicate(n->as_Vector()->length() == 4);
2067 match(Set dst (ReplicateI con));
2068 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t"
2069 "punpcklqdq $dst,$dst" %}
2070 ins_encode %{
2071 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
2072 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2073 %}
2074 ins_pipe( pipe_slow );
2075 %}
2077 instruct Repl8I_imm(vecY dst, immI con) %{
2078 predicate(n->as_Vector()->length() == 8);
2079 match(Set dst (ReplicateI con));
2080 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t"
2081 "punpcklqdq $dst,$dst\n\t"
2082 "vinserti128h $dst,$dst,$dst" %}
2083 ins_encode %{
2084 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
2085 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2086 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2087 %}
2088 ins_pipe( pipe_slow );
2089 %}
2091 // Integer could be loaded into xmm register directly from memory.
2092 instruct Repl2I_mem(vecD dst, memory mem) %{
2093 predicate(n->as_Vector()->length() == 2);
2094 match(Set dst (ReplicateI (LoadI mem)));
2095 format %{ "movd $dst,$mem\n\t"
2096 "pshufd $dst,$dst,0x00\t! replicate2I" %}
2097 ins_encode %{
2098 __ movdl($dst$$XMMRegister, $mem$$Address);
2099 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2100 %}
2101 ins_pipe( fpu_reg_reg );
2102 %}
2104 instruct Repl4I_mem(vecX dst, memory mem) %{
2105 predicate(n->as_Vector()->length() == 4);
2106 match(Set dst (ReplicateI (LoadI mem)));
2107 format %{ "movd $dst,$mem\n\t"
2108 "pshufd $dst,$dst,0x00\t! replicate4I" %}
2109 ins_encode %{
2110 __ movdl($dst$$XMMRegister, $mem$$Address);
2111 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2112 %}
2113 ins_pipe( pipe_slow );
2114 %}
2116 instruct Repl8I_mem(vecY dst, memory mem) %{
2117 predicate(n->as_Vector()->length() == 8);
2118 match(Set dst (ReplicateI (LoadI mem)));
2119 format %{ "movd $dst,$mem\n\t"
2120 "pshufd $dst,$dst,0x00\n\t"
2121 "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
2122 ins_encode %{
2123 __ movdl($dst$$XMMRegister, $mem$$Address);
2124 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2125 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2126 %}
2127 ins_pipe( pipe_slow );
2128 %}
2130 // Replicate integer (4 byte) scalar zero to be vector
2131 instruct Repl2I_zero(vecD dst, immI0 zero) %{
2132 predicate(n->as_Vector()->length() == 2);
2133 match(Set dst (ReplicateI zero));
2134 format %{ "pxor $dst,$dst\t! replicate2I" %}
2135 ins_encode %{
2136 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2137 %}
2138 ins_pipe( fpu_reg_reg );
2139 %}
2141 instruct Repl4I_zero(vecX dst, immI0 zero) %{
2142 predicate(n->as_Vector()->length() == 4);
2143 match(Set dst (ReplicateI zero));
2144 format %{ "pxor $dst,$dst\t! replicate4I zero)" %}
2145 ins_encode %{
2146 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2147 %}
2148 ins_pipe( fpu_reg_reg );
2149 %}
2151 instruct Repl8I_zero(vecY dst, immI0 zero) %{
2152 predicate(n->as_Vector()->length() == 8);
2153 match(Set dst (ReplicateI zero));
2154 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %}
2155 ins_encode %{
2156 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
2157 bool vector256 = true;
2158 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2159 %}
2160 ins_pipe( fpu_reg_reg );
2161 %}
2163 // Replicate long (8 byte) scalar to be vector
2164 #ifdef _LP64
2165 instruct Repl2L(vecX dst, rRegL src) %{
2166 predicate(n->as_Vector()->length() == 2);
2167 match(Set dst (ReplicateL src));
2168 format %{ "movdq $dst,$src\n\t"
2169 "punpcklqdq $dst,$dst\t! replicate2L" %}
2170 ins_encode %{
2171 __ movdq($dst$$XMMRegister, $src$$Register);
2172 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2173 %}
2174 ins_pipe( pipe_slow );
2175 %}
2177 instruct Repl4L(vecY dst, rRegL src) %{
2178 predicate(n->as_Vector()->length() == 4);
2179 match(Set dst (ReplicateL src));
2180 format %{ "movdq $dst,$src\n\t"
2181 "punpcklqdq $dst,$dst\n\t"
2182 "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
2183 ins_encode %{
2184 __ movdq($dst$$XMMRegister, $src$$Register);
2185 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2186 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2187 %}
2188 ins_pipe( pipe_slow );
2189 %}
2190 #else // _LP64
2191 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{
2192 predicate(n->as_Vector()->length() == 2);
2193 match(Set dst (ReplicateL src));
2194 effect(TEMP dst, USE src, TEMP tmp);
2195 format %{ "movdl $dst,$src.lo\n\t"
2196 "movdl $tmp,$src.hi\n\t"
2197 "punpckldq $dst,$tmp\n\t"
2198 "punpcklqdq $dst,$dst\t! replicate2L"%}
2199 ins_encode %{
2200 __ movdl($dst$$XMMRegister, $src$$Register);
2201 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
2202 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
2203 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2204 %}
2205 ins_pipe( pipe_slow );
2206 %}
2208 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
2209 predicate(n->as_Vector()->length() == 4);
2210 match(Set dst (ReplicateL src));
2211 effect(TEMP dst, USE src, TEMP tmp);
2212 format %{ "movdl $dst,$src.lo\n\t"
2213 "movdl $tmp,$src.hi\n\t"
2214 "punpckldq $dst,$tmp\n\t"
2215 "punpcklqdq $dst,$dst\n\t"
2216 "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
2217 ins_encode %{
2218 __ movdl($dst$$XMMRegister, $src$$Register);
2219 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
2220 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
2221 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2222 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2223 %}
2224 ins_pipe( pipe_slow );
2225 %}
2226 #endif // _LP64
2228 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
2229 instruct Repl2L_imm(vecX dst, immL con) %{
2230 predicate(n->as_Vector()->length() == 2);
2231 match(Set dst (ReplicateL con));
2232 format %{ "movq $dst,[$constantaddress]\n\t"
2233 "punpcklqdq $dst,$dst\t! replicate2L($con)" %}
2234 ins_encode %{
2235 __ movq($dst$$XMMRegister, $constantaddress($con));
2236 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2237 %}
2238 ins_pipe( pipe_slow );
2239 %}
2241 instruct Repl4L_imm(vecY dst, immL con) %{
2242 predicate(n->as_Vector()->length() == 4);
2243 match(Set dst (ReplicateL con));
2244 format %{ "movq $dst,[$constantaddress]\n\t"
2245 "punpcklqdq $dst,$dst\n\t"
2246 "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %}
2247 ins_encode %{
2248 __ movq($dst$$XMMRegister, $constantaddress($con));
2249 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2250 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2251 %}
2252 ins_pipe( pipe_slow );
2253 %}
2255 // Long could be loaded into xmm register directly from memory.
2256 instruct Repl2L_mem(vecX dst, memory mem) %{
2257 predicate(n->as_Vector()->length() == 2);
2258 match(Set dst (ReplicateL (LoadL mem)));
2259 format %{ "movq $dst,$mem\n\t"
2260 "punpcklqdq $dst,$dst\t! replicate2L" %}
2261 ins_encode %{
2262 __ movq($dst$$XMMRegister, $mem$$Address);
2263 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2264 %}
2265 ins_pipe( pipe_slow );
2266 %}
2268 instruct Repl4L_mem(vecY dst, memory mem) %{
2269 predicate(n->as_Vector()->length() == 4);
2270 match(Set dst (ReplicateL (LoadL mem)));
2271 format %{ "movq $dst,$mem\n\t"
2272 "punpcklqdq $dst,$dst\n\t"
2273 "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
2274 ins_encode %{
2275 __ movq($dst$$XMMRegister, $mem$$Address);
2276 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2277 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2278 %}
2279 ins_pipe( pipe_slow );
2280 %}
2282 // Replicate long (8 byte) scalar zero to be vector
2283 instruct Repl2L_zero(vecX dst, immL0 zero) %{
2284 predicate(n->as_Vector()->length() == 2);
2285 match(Set dst (ReplicateL zero));
2286 format %{ "pxor $dst,$dst\t! replicate2L zero" %}
2287 ins_encode %{
2288 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2289 %}
2290 ins_pipe( fpu_reg_reg );
2291 %}
2293 instruct Repl4L_zero(vecY dst, immL0 zero) %{
2294 predicate(n->as_Vector()->length() == 4);
2295 match(Set dst (ReplicateL zero));
2296 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %}
2297 ins_encode %{
2298 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
2299 bool vector256 = true;
2300 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2301 %}
2302 ins_pipe( fpu_reg_reg );
2303 %}
2305 // Replicate float (4 byte) scalar to be vector
2306 instruct Repl2F(vecD dst, regF src) %{
2307 predicate(n->as_Vector()->length() == 2);
2308 match(Set dst (ReplicateF src));
2309 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %}
2310 ins_encode %{
2311 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
2312 %}
2313 ins_pipe( fpu_reg_reg );
2314 %}
2316 instruct Repl4F(vecX dst, regF src) %{
2317 predicate(n->as_Vector()->length() == 4);
2318 match(Set dst (ReplicateF src));
2319 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %}
2320 ins_encode %{
2321 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
2322 %}
2323 ins_pipe( pipe_slow );
2324 %}
2326 instruct Repl8F(vecY dst, regF src) %{
2327 predicate(n->as_Vector()->length() == 8);
2328 match(Set dst (ReplicateF src));
2329 format %{ "pshufd $dst,$src,0x00\n\t"
2330 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
2331 ins_encode %{
2332 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
2333 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2334 %}
2335 ins_pipe( pipe_slow );
2336 %}
2338 // Replicate float (4 byte) scalar zero to be vector
2339 instruct Repl2F_zero(vecD dst, immF0 zero) %{
2340 predicate(n->as_Vector()->length() == 2);
2341 match(Set dst (ReplicateF zero));
2342 format %{ "xorps $dst,$dst\t! replicate2F zero" %}
2343 ins_encode %{
2344 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
2345 %}
2346 ins_pipe( fpu_reg_reg );
2347 %}
2349 instruct Repl4F_zero(vecX dst, immF0 zero) %{
2350 predicate(n->as_Vector()->length() == 4);
2351 match(Set dst (ReplicateF zero));
2352 format %{ "xorps $dst,$dst\t! replicate4F zero" %}
2353 ins_encode %{
2354 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
2355 %}
2356 ins_pipe( fpu_reg_reg );
2357 %}
2359 instruct Repl8F_zero(vecY dst, immF0 zero) %{
2360 predicate(n->as_Vector()->length() == 8);
2361 match(Set dst (ReplicateF zero));
2362 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %}
2363 ins_encode %{
2364 bool vector256 = true;
2365 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2366 %}
2367 ins_pipe( fpu_reg_reg );
2368 %}
2370 // Replicate double (8 bytes) scalar to be vector
2371 instruct Repl2D(vecX dst, regD src) %{
2372 predicate(n->as_Vector()->length() == 2);
2373 match(Set dst (ReplicateD src));
2374 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %}
2375 ins_encode %{
2376 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
2377 %}
2378 ins_pipe( pipe_slow );
2379 %}
2381 instruct Repl4D(vecY dst, regD src) %{
2382 predicate(n->as_Vector()->length() == 4);
2383 match(Set dst (ReplicateD src));
2384 format %{ "pshufd $dst,$src,0x44\n\t"
2385 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
2386 ins_encode %{
2387 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
2388 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2389 %}
2390 ins_pipe( pipe_slow );
2391 %}
2393 // Replicate double (8 byte) scalar zero to be vector
2394 instruct Repl2D_zero(vecX dst, immD0 zero) %{
2395 predicate(n->as_Vector()->length() == 2);
2396 match(Set dst (ReplicateD zero));
2397 format %{ "xorpd $dst,$dst\t! replicate2D zero" %}
2398 ins_encode %{
2399 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
2400 %}
2401 ins_pipe( fpu_reg_reg );
2402 %}
2404 instruct Repl4D_zero(vecY dst, immD0 zero) %{
2405 predicate(n->as_Vector()->length() == 4);
2406 match(Set dst (ReplicateD zero));
2407 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %}
2408 ins_encode %{
2409 bool vector256 = true;
2410 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2411 %}
2412 ins_pipe( fpu_reg_reg );
2413 %}
2415 // ====================VECTOR ARITHMETIC=======================================
2417 // --------------------------------- ADD --------------------------------------
2419 // Bytes vector add
2420 instruct vadd4B(vecS dst, vecS src) %{
2421 predicate(n->as_Vector()->length() == 4);
2422 match(Set dst (AddVB dst src));
2423 format %{ "paddb $dst,$src\t! add packed4B" %}
2424 ins_encode %{
2425 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
2426 %}
2427 ins_pipe( pipe_slow );
2428 %}
2430 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{
2431 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2432 match(Set dst (AddVB src1 src2));
2433 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %}
2434 ins_encode %{
2435 bool vector256 = false;
2436 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2437 %}
2438 ins_pipe( pipe_slow );
2439 %}
2441 instruct vadd8B(vecD dst, vecD src) %{
2442 predicate(n->as_Vector()->length() == 8);
2443 match(Set dst (AddVB dst src));
2444 format %{ "paddb $dst,$src\t! add packed8B" %}
2445 ins_encode %{
2446 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
2447 %}
2448 ins_pipe( pipe_slow );
2449 %}
2451 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
2452 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2453 match(Set dst (AddVB src1 src2));
2454 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %}
2455 ins_encode %{
2456 bool vector256 = false;
2457 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2458 %}
2459 ins_pipe( pipe_slow );
2460 %}
2462 instruct vadd16B(vecX dst, vecX src) %{
2463 predicate(n->as_Vector()->length() == 16);
2464 match(Set dst (AddVB dst src));
2465 format %{ "paddb $dst,$src\t! add packed16B" %}
2466 ins_encode %{
2467 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
2468 %}
2469 ins_pipe( pipe_slow );
2470 %}
2472 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
2473 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
2474 match(Set dst (AddVB src1 src2));
2475 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %}
2476 ins_encode %{
2477 bool vector256 = false;
2478 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2479 %}
2480 ins_pipe( pipe_slow );
2481 %}
2483 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{
2484 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
2485 match(Set dst (AddVB src (LoadVector mem)));
2486 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %}
2487 ins_encode %{
2488 bool vector256 = false;
2489 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2490 %}
2491 ins_pipe( pipe_slow );
2492 %}
2494 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{
2495 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
2496 match(Set dst (AddVB src1 src2));
2497 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %}
2498 ins_encode %{
2499 bool vector256 = true;
2500 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2501 %}
2502 ins_pipe( pipe_slow );
2503 %}
2505 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{
2506 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
2507 match(Set dst (AddVB src (LoadVector mem)));
2508 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %}
2509 ins_encode %{
2510 bool vector256 = true;
2511 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2512 %}
2513 ins_pipe( pipe_slow );
2514 %}
2516 // Shorts/Chars vector add
2517 instruct vadd2S(vecS dst, vecS src) %{
2518 predicate(n->as_Vector()->length() == 2);
2519 match(Set dst (AddVS dst src));
2520 format %{ "paddw $dst,$src\t! add packed2S" %}
2521 ins_encode %{
2522 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
2523 %}
2524 ins_pipe( pipe_slow );
2525 %}
2527 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{
2528 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2529 match(Set dst (AddVS src1 src2));
2530 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %}
2531 ins_encode %{
2532 bool vector256 = false;
2533 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2534 %}
2535 ins_pipe( pipe_slow );
2536 %}
2538 instruct vadd4S(vecD dst, vecD src) %{
2539 predicate(n->as_Vector()->length() == 4);
2540 match(Set dst (AddVS dst src));
2541 format %{ "paddw $dst,$src\t! add packed4S" %}
2542 ins_encode %{
2543 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
2544 %}
2545 ins_pipe( pipe_slow );
2546 %}
2548 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
2549 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2550 match(Set dst (AddVS src1 src2));
2551 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %}
2552 ins_encode %{
2553 bool vector256 = false;
2554 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2555 %}
2556 ins_pipe( pipe_slow );
2557 %}
2559 instruct vadd8S(vecX dst, vecX src) %{
2560 predicate(n->as_Vector()->length() == 8);
2561 match(Set dst (AddVS dst src));
2562 format %{ "paddw $dst,$src\t! add packed8S" %}
2563 ins_encode %{
2564 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
2565 %}
2566 ins_pipe( pipe_slow );
2567 %}
2569 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
2570 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2571 match(Set dst (AddVS src1 src2));
2572 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %}
2573 ins_encode %{
2574 bool vector256 = false;
2575 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2576 %}
2577 ins_pipe( pipe_slow );
2578 %}
2580 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{
2581 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2582 match(Set dst (AddVS src (LoadVector mem)));
2583 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %}
2584 ins_encode %{
2585 bool vector256 = false;
2586 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2587 %}
2588 ins_pipe( pipe_slow );
2589 %}
2591 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{
2592 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
2593 match(Set dst (AddVS src1 src2));
2594 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %}
2595 ins_encode %{
2596 bool vector256 = true;
2597 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2598 %}
2599 ins_pipe( pipe_slow );
2600 %}
2602 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{
2603 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
2604 match(Set dst (AddVS src (LoadVector mem)));
2605 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %}
2606 ins_encode %{
2607 bool vector256 = true;
2608 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2609 %}
2610 ins_pipe( pipe_slow );
2611 %}
2613 // Integers vector add
2614 instruct vadd2I(vecD dst, vecD src) %{
2615 predicate(n->as_Vector()->length() == 2);
2616 match(Set dst (AddVI dst src));
2617 format %{ "paddd $dst,$src\t! add packed2I" %}
2618 ins_encode %{
2619 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
2620 %}
2621 ins_pipe( pipe_slow );
2622 %}
2624 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{
2625 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2626 match(Set dst (AddVI src1 src2));
2627 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %}
2628 ins_encode %{
2629 bool vector256 = false;
2630 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2631 %}
2632 ins_pipe( pipe_slow );
2633 %}
2635 instruct vadd4I(vecX dst, vecX src) %{
2636 predicate(n->as_Vector()->length() == 4);
2637 match(Set dst (AddVI dst src));
2638 format %{ "paddd $dst,$src\t! add packed4I" %}
2639 ins_encode %{
2640 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
2641 %}
2642 ins_pipe( pipe_slow );
2643 %}
2645 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
2646 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2647 match(Set dst (AddVI src1 src2));
2648 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %}
2649 ins_encode %{
2650 bool vector256 = false;
2651 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2652 %}
2653 ins_pipe( pipe_slow );
2654 %}
2656 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{
2657 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2658 match(Set dst (AddVI src (LoadVector mem)));
2659 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %}
2660 ins_encode %{
2661 bool vector256 = false;
2662 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2663 %}
2664 ins_pipe( pipe_slow );
2665 %}
2667 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{
2668 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
2669 match(Set dst (AddVI src1 src2));
2670 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %}
2671 ins_encode %{
2672 bool vector256 = true;
2673 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2674 %}
2675 ins_pipe( pipe_slow );
2676 %}
2678 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{
2679 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
2680 match(Set dst (AddVI src (LoadVector mem)));
2681 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %}
2682 ins_encode %{
2683 bool vector256 = true;
2684 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2685 %}
2686 ins_pipe( pipe_slow );
2687 %}
2689 // Longs vector add
2690 instruct vadd2L(vecX dst, vecX src) %{
2691 predicate(n->as_Vector()->length() == 2);
2692 match(Set dst (AddVL dst src));
2693 format %{ "paddq $dst,$src\t! add packed2L" %}
2694 ins_encode %{
2695 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
2696 %}
2697 ins_pipe( pipe_slow );
2698 %}
2700 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
2701 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2702 match(Set dst (AddVL src1 src2));
2703 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %}
2704 ins_encode %{
2705 bool vector256 = false;
2706 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2707 %}
2708 ins_pipe( pipe_slow );
2709 %}
2711 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{
2712 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2713 match(Set dst (AddVL src (LoadVector mem)));
2714 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %}
2715 ins_encode %{
2716 bool vector256 = false;
2717 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2718 %}
2719 ins_pipe( pipe_slow );
2720 %}
2722 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{
2723 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
2724 match(Set dst (AddVL src1 src2));
2725 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %}
2726 ins_encode %{
2727 bool vector256 = true;
2728 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2729 %}
2730 ins_pipe( pipe_slow );
2731 %}
2733 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{
2734 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
2735 match(Set dst (AddVL src (LoadVector mem)));
2736 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %}
2737 ins_encode %{
2738 bool vector256 = true;
2739 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2740 %}
2741 ins_pipe( pipe_slow );
2742 %}
2744 // Floats vector add
2745 instruct vadd2F(vecD dst, vecD src) %{
2746 predicate(n->as_Vector()->length() == 2);
2747 match(Set dst (AddVF dst src));
2748 format %{ "addps $dst,$src\t! add packed2F" %}
2749 ins_encode %{
2750 __ addps($dst$$XMMRegister, $src$$XMMRegister);
2751 %}
2752 ins_pipe( pipe_slow );
2753 %}
2755 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{
2756 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2757 match(Set dst (AddVF src1 src2));
2758 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %}
2759 ins_encode %{
2760 bool vector256 = false;
2761 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2762 %}
2763 ins_pipe( pipe_slow );
2764 %}
2766 instruct vadd4F(vecX dst, vecX src) %{
2767 predicate(n->as_Vector()->length() == 4);
2768 match(Set dst (AddVF dst src));
2769 format %{ "addps $dst,$src\t! add packed4F" %}
2770 ins_encode %{
2771 __ addps($dst$$XMMRegister, $src$$XMMRegister);
2772 %}
2773 ins_pipe( pipe_slow );
2774 %}
2776 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
2777 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2778 match(Set dst (AddVF src1 src2));
2779 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %}
2780 ins_encode %{
2781 bool vector256 = false;
2782 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2783 %}
2784 ins_pipe( pipe_slow );
2785 %}
2787 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{
2788 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2789 match(Set dst (AddVF src (LoadVector mem)));
2790 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %}
2791 ins_encode %{
2792 bool vector256 = false;
2793 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2794 %}
2795 ins_pipe( pipe_slow );
2796 %}
2798 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{
2799 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2800 match(Set dst (AddVF src1 src2));
2801 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %}
2802 ins_encode %{
2803 bool vector256 = true;
2804 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2805 %}
2806 ins_pipe( pipe_slow );
2807 %}
2809 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{
2810 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2811 match(Set dst (AddVF src (LoadVector mem)));
2812 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %}
2813 ins_encode %{
2814 bool vector256 = true;
2815 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2816 %}
2817 ins_pipe( pipe_slow );
2818 %}
2820 // Doubles vector add
2821 instruct vadd2D(vecX dst, vecX src) %{
2822 predicate(n->as_Vector()->length() == 2);
2823 match(Set dst (AddVD dst src));
2824 format %{ "addpd $dst,$src\t! add packed2D" %}
2825 ins_encode %{
2826 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
2827 %}
2828 ins_pipe( pipe_slow );
2829 %}
2831 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
2832 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2833 match(Set dst (AddVD src1 src2));
2834 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %}
2835 ins_encode %{
2836 bool vector256 = false;
2837 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2838 %}
2839 ins_pipe( pipe_slow );
2840 %}
2842 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{
2843 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2844 match(Set dst (AddVD src (LoadVector mem)));
2845 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %}
2846 ins_encode %{
2847 bool vector256 = false;
2848 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2849 %}
2850 ins_pipe( pipe_slow );
2851 %}
2853 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{
2854 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2855 match(Set dst (AddVD src1 src2));
2856 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %}
2857 ins_encode %{
2858 bool vector256 = true;
2859 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2860 %}
2861 ins_pipe( pipe_slow );
2862 %}
2864 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{
2865 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2866 match(Set dst (AddVD src (LoadVector mem)));
2867 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %}
2868 ins_encode %{
2869 bool vector256 = true;
2870 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2871 %}
2872 ins_pipe( pipe_slow );
2873 %}
2875 // --------------------------------- SUB --------------------------------------
2877 // Bytes vector sub
2878 instruct vsub4B(vecS dst, vecS src) %{
2879 predicate(n->as_Vector()->length() == 4);
2880 match(Set dst (SubVB dst src));
2881 format %{ "psubb $dst,$src\t! sub packed4B" %}
2882 ins_encode %{
2883 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
2884 %}
2885 ins_pipe( pipe_slow );
2886 %}
2888 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{
2889 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2890 match(Set dst (SubVB src1 src2));
2891 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %}
2892 ins_encode %{
2893 bool vector256 = false;
2894 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2895 %}
2896 ins_pipe( pipe_slow );
2897 %}
2899 instruct vsub8B(vecD dst, vecD src) %{
2900 predicate(n->as_Vector()->length() == 8);
2901 match(Set dst (SubVB dst src));
2902 format %{ "psubb $dst,$src\t! sub packed8B" %}
2903 ins_encode %{
2904 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
2905 %}
2906 ins_pipe( pipe_slow );
2907 %}
2909 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{
2910 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2911 match(Set dst (SubVB src1 src2));
2912 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %}
2913 ins_encode %{
2914 bool vector256 = false;
2915 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2916 %}
2917 ins_pipe( pipe_slow );
2918 %}
2920 instruct vsub16B(vecX dst, vecX src) %{
2921 predicate(n->as_Vector()->length() == 16);
2922 match(Set dst (SubVB dst src));
2923 format %{ "psubb $dst,$src\t! sub packed16B" %}
2924 ins_encode %{
2925 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
2926 %}
2927 ins_pipe( pipe_slow );
2928 %}
2930 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
2931 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
2932 match(Set dst (SubVB src1 src2));
2933 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %}
2934 ins_encode %{
2935 bool vector256 = false;
2936 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2937 %}
2938 ins_pipe( pipe_slow );
2939 %}
2941 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{
2942 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
2943 match(Set dst (SubVB src (LoadVector mem)));
2944 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %}
2945 ins_encode %{
2946 bool vector256 = false;
2947 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2948 %}
2949 ins_pipe( pipe_slow );
2950 %}
2952 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{
2953 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
2954 match(Set dst (SubVB src1 src2));
2955 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %}
2956 ins_encode %{
2957 bool vector256 = true;
2958 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2959 %}
2960 ins_pipe( pipe_slow );
2961 %}
2963 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{
2964 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
2965 match(Set dst (SubVB src (LoadVector mem)));
2966 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %}
2967 ins_encode %{
2968 bool vector256 = true;
2969 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2970 %}
2971 ins_pipe( pipe_slow );
2972 %}
2974 // Shorts/Chars vector sub
2975 instruct vsub2S(vecS dst, vecS src) %{
2976 predicate(n->as_Vector()->length() == 2);
2977 match(Set dst (SubVS dst src));
2978 format %{ "psubw $dst,$src\t! sub packed2S" %}
2979 ins_encode %{
2980 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
2981 %}
2982 ins_pipe( pipe_slow );
2983 %}
2985 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{
2986 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2987 match(Set dst (SubVS src1 src2));
2988 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %}
2989 ins_encode %{
2990 bool vector256 = false;
2991 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2992 %}
2993 ins_pipe( pipe_slow );
2994 %}
2996 instruct vsub4S(vecD dst, vecD src) %{
2997 predicate(n->as_Vector()->length() == 4);
2998 match(Set dst (SubVS dst src));
2999 format %{ "psubw $dst,$src\t! sub packed4S" %}
3000 ins_encode %{
3001 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
3002 %}
3003 ins_pipe( pipe_slow );
3004 %}
3006 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{
3007 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3008 match(Set dst (SubVS src1 src2));
3009 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %}
3010 ins_encode %{
3011 bool vector256 = false;
3012 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3013 %}
3014 ins_pipe( pipe_slow );
3015 %}
3017 instruct vsub8S(vecX dst, vecX src) %{
3018 predicate(n->as_Vector()->length() == 8);
3019 match(Set dst (SubVS dst src));
3020 format %{ "psubw $dst,$src\t! sub packed8S" %}
3021 ins_encode %{
3022 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
3023 %}
3024 ins_pipe( pipe_slow );
3025 %}
3027 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
3028 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3029 match(Set dst (SubVS src1 src2));
3030 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %}
3031 ins_encode %{
3032 bool vector256 = false;
3033 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3034 %}
3035 ins_pipe( pipe_slow );
3036 %}
3038 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{
3039 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3040 match(Set dst (SubVS src (LoadVector mem)));
3041 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %}
3042 ins_encode %{
3043 bool vector256 = false;
3044 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3045 %}
3046 ins_pipe( pipe_slow );
3047 %}
3049 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{
3050 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3051 match(Set dst (SubVS src1 src2));
3052 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %}
3053 ins_encode %{
3054 bool vector256 = true;
3055 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3056 %}
3057 ins_pipe( pipe_slow );
3058 %}
3060 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{
3061 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3062 match(Set dst (SubVS src (LoadVector mem)));
3063 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %}
3064 ins_encode %{
3065 bool vector256 = true;
3066 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3067 %}
3068 ins_pipe( pipe_slow );
3069 %}
3071 // Integers vector sub
3072 instruct vsub2I(vecD dst, vecD src) %{
3073 predicate(n->as_Vector()->length() == 2);
3074 match(Set dst (SubVI dst src));
3075 format %{ "psubd $dst,$src\t! sub packed2I" %}
3076 ins_encode %{
3077 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
3078 %}
3079 ins_pipe( pipe_slow );
3080 %}
3082 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{
3083 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3084 match(Set dst (SubVI src1 src2));
3085 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %}
3086 ins_encode %{
3087 bool vector256 = false;
3088 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3089 %}
3090 ins_pipe( pipe_slow );
3091 %}
3093 instruct vsub4I(vecX dst, vecX src) %{
3094 predicate(n->as_Vector()->length() == 4);
3095 match(Set dst (SubVI dst src));
3096 format %{ "psubd $dst,$src\t! sub packed4I" %}
3097 ins_encode %{
3098 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
3099 %}
3100 ins_pipe( pipe_slow );
3101 %}
3103 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
3104 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3105 match(Set dst (SubVI src1 src2));
3106 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %}
3107 ins_encode %{
3108 bool vector256 = false;
3109 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3110 %}
3111 ins_pipe( pipe_slow );
3112 %}
3114 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{
3115 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3116 match(Set dst (SubVI src (LoadVector mem)));
3117 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %}
3118 ins_encode %{
3119 bool vector256 = false;
3120 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3121 %}
3122 ins_pipe( pipe_slow );
3123 %}
3125 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{
3126 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3127 match(Set dst (SubVI src1 src2));
3128 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %}
3129 ins_encode %{
3130 bool vector256 = true;
3131 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3132 %}
3133 ins_pipe( pipe_slow );
3134 %}
3136 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{
3137 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3138 match(Set dst (SubVI src (LoadVector mem)));
3139 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %}
3140 ins_encode %{
3141 bool vector256 = true;
3142 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3143 %}
3144 ins_pipe( pipe_slow );
3145 %}
3147 // Longs vector sub
3148 instruct vsub2L(vecX dst, vecX src) %{
3149 predicate(n->as_Vector()->length() == 2);
3150 match(Set dst (SubVL dst src));
3151 format %{ "psubq $dst,$src\t! sub packed2L" %}
3152 ins_encode %{
3153 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
3154 %}
3155 ins_pipe( pipe_slow );
3156 %}
3158 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
3159 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3160 match(Set dst (SubVL src1 src2));
3161 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %}
3162 ins_encode %{
3163 bool vector256 = false;
3164 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3165 %}
3166 ins_pipe( pipe_slow );
3167 %}
3169 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{
3170 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3171 match(Set dst (SubVL src (LoadVector mem)));
3172 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %}
3173 ins_encode %{
3174 bool vector256 = false;
3175 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3176 %}
3177 ins_pipe( pipe_slow );
3178 %}
3180 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{
3181 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
3182 match(Set dst (SubVL src1 src2));
3183 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %}
3184 ins_encode %{
3185 bool vector256 = true;
3186 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3187 %}
3188 ins_pipe( pipe_slow );
3189 %}
3191 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{
3192 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
3193 match(Set dst (SubVL src (LoadVector mem)));
3194 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %}
3195 ins_encode %{
3196 bool vector256 = true;
3197 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3198 %}
3199 ins_pipe( pipe_slow );
3200 %}
3202 // Floats vector sub
3203 instruct vsub2F(vecD dst, vecD src) %{
3204 predicate(n->as_Vector()->length() == 2);
3205 match(Set dst (SubVF dst src));
3206 format %{ "subps $dst,$src\t! sub packed2F" %}
3207 ins_encode %{
3208 __ subps($dst$$XMMRegister, $src$$XMMRegister);
3209 %}
3210 ins_pipe( pipe_slow );
3211 %}
3213 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{
3214 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3215 match(Set dst (SubVF src1 src2));
3216 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %}
3217 ins_encode %{
3218 bool vector256 = false;
3219 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3220 %}
3221 ins_pipe( pipe_slow );
3222 %}
3224 instruct vsub4F(vecX dst, vecX src) %{
3225 predicate(n->as_Vector()->length() == 4);
3226 match(Set dst (SubVF dst src));
3227 format %{ "subps $dst,$src\t! sub packed4F" %}
3228 ins_encode %{
3229 __ subps($dst$$XMMRegister, $src$$XMMRegister);
3230 %}
3231 ins_pipe( pipe_slow );
3232 %}
3234 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
3235 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3236 match(Set dst (SubVF src1 src2));
3237 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %}
3238 ins_encode %{
3239 bool vector256 = false;
3240 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3241 %}
3242 ins_pipe( pipe_slow );
3243 %}
3245 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{
3246 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3247 match(Set dst (SubVF src (LoadVector mem)));
3248 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %}
3249 ins_encode %{
3250 bool vector256 = false;
3251 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3252 %}
3253 ins_pipe( pipe_slow );
3254 %}
3256 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{
3257 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3258 match(Set dst (SubVF src1 src2));
3259 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %}
3260 ins_encode %{
3261 bool vector256 = true;
3262 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3263 %}
3264 ins_pipe( pipe_slow );
3265 %}
3267 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{
3268 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3269 match(Set dst (SubVF src (LoadVector mem)));
3270 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %}
3271 ins_encode %{
3272 bool vector256 = true;
3273 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3274 %}
3275 ins_pipe( pipe_slow );
3276 %}
3278 // Doubles vector sub
3279 instruct vsub2D(vecX dst, vecX src) %{
3280 predicate(n->as_Vector()->length() == 2);
3281 match(Set dst (SubVD dst src));
3282 format %{ "subpd $dst,$src\t! sub packed2D" %}
3283 ins_encode %{
3284 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
3285 %}
3286 ins_pipe( pipe_slow );
3287 %}
3289 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{
3290 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3291 match(Set dst (SubVD src1 src2));
3292 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %}
3293 ins_encode %{
3294 bool vector256 = false;
3295 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3296 %}
3297 ins_pipe( pipe_slow );
3298 %}
3300 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{
3301 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3302 match(Set dst (SubVD src (LoadVector mem)));
3303 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %}
3304 ins_encode %{
3305 bool vector256 = false;
3306 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3307 %}
3308 ins_pipe( pipe_slow );
3309 %}
3311 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{
3312 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3313 match(Set dst (SubVD src1 src2));
3314 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %}
3315 ins_encode %{
3316 bool vector256 = true;
3317 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3318 %}
3319 ins_pipe( pipe_slow );
3320 %}
3322 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{
3323 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3324 match(Set dst (SubVD src (LoadVector mem)));
3325 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %}
3326 ins_encode %{
3327 bool vector256 = true;
3328 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3329 %}
3330 ins_pipe( pipe_slow );
3331 %}
3333 // --------------------------------- MUL --------------------------------------
3335 // Shorts/Chars vector mul
3336 instruct vmul2S(vecS dst, vecS src) %{
3337 predicate(n->as_Vector()->length() == 2);
3338 match(Set dst (MulVS dst src));
3339 format %{ "pmullw $dst,$src\t! mul packed2S" %}
3340 ins_encode %{
3341 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
3342 %}
3343 ins_pipe( pipe_slow );
3344 %}
3346 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{
3347 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3348 match(Set dst (MulVS src1 src2));
3349 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
3350 ins_encode %{
3351 bool vector256 = false;
3352 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3353 %}
3354 ins_pipe( pipe_slow );
3355 %}
3357 instruct vmul4S(vecD dst, vecD src) %{
3358 predicate(n->as_Vector()->length() == 4);
3359 match(Set dst (MulVS dst src));
3360 format %{ "pmullw $dst,$src\t! mul packed4S" %}
3361 ins_encode %{
3362 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
3363 %}
3364 ins_pipe( pipe_slow );
3365 %}
3367 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
3368 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3369 match(Set dst (MulVS src1 src2));
3370 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
3371 ins_encode %{
3372 bool vector256 = false;
3373 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3374 %}
3375 ins_pipe( pipe_slow );
3376 %}
3378 instruct vmul8S(vecX dst, vecX src) %{
3379 predicate(n->as_Vector()->length() == 8);
3380 match(Set dst (MulVS dst src));
3381 format %{ "pmullw $dst,$src\t! mul packed8S" %}
3382 ins_encode %{
3383 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
3384 %}
3385 ins_pipe( pipe_slow );
3386 %}
3388 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
3389 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3390 match(Set dst (MulVS src1 src2));
3391 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
3392 ins_encode %{
3393 bool vector256 = false;
3394 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3395 %}
3396 ins_pipe( pipe_slow );
3397 %}
3399 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{
3400 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3401 match(Set dst (MulVS src (LoadVector mem)));
3402 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
3403 ins_encode %{
3404 bool vector256 = false;
3405 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3406 %}
3407 ins_pipe( pipe_slow );
3408 %}
3410 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{
3411 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3412 match(Set dst (MulVS src1 src2));
3413 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
3414 ins_encode %{
3415 bool vector256 = true;
3416 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3417 %}
3418 ins_pipe( pipe_slow );
3419 %}
3421 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{
3422 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3423 match(Set dst (MulVS src (LoadVector mem)));
3424 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
3425 ins_encode %{
3426 bool vector256 = true;
3427 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3428 %}
3429 ins_pipe( pipe_slow );
3430 %}
3432 // Integers vector mul (sse4_1)
3433 instruct vmul2I(vecD dst, vecD src) %{
3434 predicate(UseSSE > 3 && n->as_Vector()->length() == 2);
3435 match(Set dst (MulVI dst src));
3436 format %{ "pmulld $dst,$src\t! mul packed2I" %}
3437 ins_encode %{
3438 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
3439 %}
3440 ins_pipe( pipe_slow );
3441 %}
3443 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{
3444 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3445 match(Set dst (MulVI src1 src2));
3446 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %}
3447 ins_encode %{
3448 bool vector256 = false;
3449 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3450 %}
3451 ins_pipe( pipe_slow );
3452 %}
3454 instruct vmul4I(vecX dst, vecX src) %{
3455 predicate(UseSSE > 3 && n->as_Vector()->length() == 4);
3456 match(Set dst (MulVI dst src));
3457 format %{ "pmulld $dst,$src\t! mul packed4I" %}
3458 ins_encode %{
3459 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
3460 %}
3461 ins_pipe( pipe_slow );
3462 %}
3464 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
3465 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3466 match(Set dst (MulVI src1 src2));
3467 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %}
3468 ins_encode %{
3469 bool vector256 = false;
3470 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3471 %}
3472 ins_pipe( pipe_slow );
3473 %}
3475 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{
3476 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3477 match(Set dst (MulVI src (LoadVector mem)));
3478 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %}
3479 ins_encode %{
3480 bool vector256 = false;
3481 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3482 %}
3483 ins_pipe( pipe_slow );
3484 %}
3486 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{
3487 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3488 match(Set dst (MulVI src1 src2));
3489 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %}
3490 ins_encode %{
3491 bool vector256 = true;
3492 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3493 %}
3494 ins_pipe( pipe_slow );
3495 %}
3497 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{
3498 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3499 match(Set dst (MulVI src (LoadVector mem)));
3500 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %}
3501 ins_encode %{
3502 bool vector256 = true;
3503 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3504 %}
3505 ins_pipe( pipe_slow );
3506 %}
3508 // Floats vector mul
3509 instruct vmul2F(vecD dst, vecD src) %{
3510 predicate(n->as_Vector()->length() == 2);
3511 match(Set dst (MulVF dst src));
3512 format %{ "mulps $dst,$src\t! mul packed2F" %}
3513 ins_encode %{
3514 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
3515 %}
3516 ins_pipe( pipe_slow );
3517 %}
3519 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{
3520 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3521 match(Set dst (MulVF src1 src2));
3522 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %}
3523 ins_encode %{
3524 bool vector256 = false;
3525 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3526 %}
3527 ins_pipe( pipe_slow );
3528 %}
3530 instruct vmul4F(vecX dst, vecX src) %{
3531 predicate(n->as_Vector()->length() == 4);
3532 match(Set dst (MulVF dst src));
3533 format %{ "mulps $dst,$src\t! mul packed4F" %}
3534 ins_encode %{
3535 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
3536 %}
3537 ins_pipe( pipe_slow );
3538 %}
3540 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
3541 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3542 match(Set dst (MulVF src1 src2));
3543 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %}
3544 ins_encode %{
3545 bool vector256 = false;
3546 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3547 %}
3548 ins_pipe( pipe_slow );
3549 %}
3551 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{
3552 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3553 match(Set dst (MulVF src (LoadVector mem)));
3554 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %}
3555 ins_encode %{
3556 bool vector256 = false;
3557 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3558 %}
3559 ins_pipe( pipe_slow );
3560 %}
3562 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{
3563 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3564 match(Set dst (MulVF src1 src2));
3565 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %}
3566 ins_encode %{
3567 bool vector256 = true;
3568 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3569 %}
3570 ins_pipe( pipe_slow );
3571 %}
3573 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{
3574 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3575 match(Set dst (MulVF src (LoadVector mem)));
3576 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %}
3577 ins_encode %{
3578 bool vector256 = true;
3579 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3580 %}
3581 ins_pipe( pipe_slow );
3582 %}
3584 // Doubles vector mul
3585 instruct vmul2D(vecX dst, vecX src) %{
3586 predicate(n->as_Vector()->length() == 2);
3587 match(Set dst (MulVD dst src));
3588 format %{ "mulpd $dst,$src\t! mul packed2D" %}
3589 ins_encode %{
3590 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
3591 %}
3592 ins_pipe( pipe_slow );
3593 %}
3595 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
3596 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3597 match(Set dst (MulVD src1 src2));
3598 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %}
3599 ins_encode %{
3600 bool vector256 = false;
3601 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3602 %}
3603 ins_pipe( pipe_slow );
3604 %}
3606 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{
3607 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3608 match(Set dst (MulVD src (LoadVector mem)));
3609 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %}
3610 ins_encode %{
3611 bool vector256 = false;
3612 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3613 %}
3614 ins_pipe( pipe_slow );
3615 %}
3617 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{
3618 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3619 match(Set dst (MulVD src1 src2));
3620 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %}
3621 ins_encode %{
3622 bool vector256 = true;
3623 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3624 %}
3625 ins_pipe( pipe_slow );
3626 %}
3628 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{
3629 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3630 match(Set dst (MulVD src (LoadVector mem)));
3631 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %}
3632 ins_encode %{
3633 bool vector256 = true;
3634 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3635 %}
3636 ins_pipe( pipe_slow );
3637 %}
3639 // --------------------------------- DIV --------------------------------------
3641 // Floats vector div
3642 instruct vdiv2F(vecD dst, vecD src) %{
3643 predicate(n->as_Vector()->length() == 2);
3644 match(Set dst (DivVF dst src));
3645 format %{ "divps $dst,$src\t! div packed2F" %}
3646 ins_encode %{
3647 __ divps($dst$$XMMRegister, $src$$XMMRegister);
3648 %}
3649 ins_pipe( pipe_slow );
3650 %}
3652 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{
3653 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3654 match(Set dst (DivVF src1 src2));
3655 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %}
3656 ins_encode %{
3657 bool vector256 = false;
3658 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3659 %}
3660 ins_pipe( pipe_slow );
3661 %}
3663 instruct vdiv4F(vecX dst, vecX src) %{
3664 predicate(n->as_Vector()->length() == 4);
3665 match(Set dst (DivVF dst src));
3666 format %{ "divps $dst,$src\t! div packed4F" %}
3667 ins_encode %{
3668 __ divps($dst$$XMMRegister, $src$$XMMRegister);
3669 %}
3670 ins_pipe( pipe_slow );
3671 %}
3673 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{
3674 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3675 match(Set dst (DivVF src1 src2));
3676 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %}
3677 ins_encode %{
3678 bool vector256 = false;
3679 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3680 %}
3681 ins_pipe( pipe_slow );
3682 %}
3684 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{
3685 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3686 match(Set dst (DivVF src (LoadVector mem)));
3687 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %}
3688 ins_encode %{
3689 bool vector256 = false;
3690 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3691 %}
3692 ins_pipe( pipe_slow );
3693 %}
3695 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{
3696 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3697 match(Set dst (DivVF src1 src2));
3698 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %}
3699 ins_encode %{
3700 bool vector256 = true;
3701 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3702 %}
3703 ins_pipe( pipe_slow );
3704 %}
3706 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{
3707 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3708 match(Set dst (DivVF src (LoadVector mem)));
3709 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %}
3710 ins_encode %{
3711 bool vector256 = true;
3712 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3713 %}
3714 ins_pipe( pipe_slow );
3715 %}
3717 // Doubles vector div
3718 instruct vdiv2D(vecX dst, vecX src) %{
3719 predicate(n->as_Vector()->length() == 2);
3720 match(Set dst (DivVD dst src));
3721 format %{ "divpd $dst,$src\t! div packed2D" %}
3722 ins_encode %{
3723 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
3724 %}
3725 ins_pipe( pipe_slow );
3726 %}
3728 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
3729 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3730 match(Set dst (DivVD src1 src2));
3731 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %}
3732 ins_encode %{
3733 bool vector256 = false;
3734 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3735 %}
3736 ins_pipe( pipe_slow );
3737 %}
3739 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{
3740 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3741 match(Set dst (DivVD src (LoadVector mem)));
3742 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %}
3743 ins_encode %{
3744 bool vector256 = false;
3745 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3746 %}
3747 ins_pipe( pipe_slow );
3748 %}
3750 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{
3751 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3752 match(Set dst (DivVD src1 src2));
3753 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %}
3754 ins_encode %{
3755 bool vector256 = true;
3756 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3757 %}
3758 ins_pipe( pipe_slow );
3759 %}
3761 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{
3762 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3763 match(Set dst (DivVD src (LoadVector mem)));
3764 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %}
3765 ins_encode %{
3766 bool vector256 = true;
3767 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3768 %}
3769 ins_pipe( pipe_slow );
3770 %}
3772 // ------------------------------ Shift ---------------------------------------
3774 // Left and right shift count vectors are the same on x86
3775 // (only lowest bits of xmm reg are used for count).
3776 instruct vshiftcnt(vecS dst, rRegI cnt) %{
3777 match(Set dst (LShiftCntV cnt));
3778 match(Set dst (RShiftCntV cnt));
3779 format %{ "movd $dst,$cnt\t! load shift count" %}
3780 ins_encode %{
3781 __ movdl($dst$$XMMRegister, $cnt$$Register);
3782 %}
3783 ins_pipe( pipe_slow );
3784 %}
3786 // ------------------------------ LeftShift -----------------------------------
3788 // Shorts/Chars vector left shift
3789 instruct vsll2S(vecS dst, vecS shift) %{
3790 predicate(n->as_Vector()->length() == 2);
3791 match(Set dst (LShiftVS dst shift));
3792 format %{ "psllw $dst,$shift\t! left shift packed2S" %}
3793 ins_encode %{
3794 __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
3795 %}
3796 ins_pipe( pipe_slow );
3797 %}
3799 instruct vsll2S_imm(vecS dst, immI8 shift) %{
3800 predicate(n->as_Vector()->length() == 2);
3801 match(Set dst (LShiftVS dst shift));
3802 format %{ "psllw $dst,$shift\t! left shift packed2S" %}
3803 ins_encode %{
3804 __ psllw($dst$$XMMRegister, (int)$shift$$constant);
3805 %}
3806 ins_pipe( pipe_slow );
3807 %}
3809 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{
3810 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3811 match(Set dst (LShiftVS src shift));
3812 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
3813 ins_encode %{
3814 bool vector256 = false;
3815 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3816 %}
3817 ins_pipe( pipe_slow );
3818 %}
3820 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
3821 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3822 match(Set dst (LShiftVS src shift));
3823 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
3824 ins_encode %{
3825 bool vector256 = false;
3826 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
3827 %}
3828 ins_pipe( pipe_slow );
3829 %}
3831 instruct vsll4S(vecD dst, vecS shift) %{
3832 predicate(n->as_Vector()->length() == 4);
3833 match(Set dst (LShiftVS dst shift));
3834 format %{ "psllw $dst,$shift\t! left shift packed4S" %}
3835 ins_encode %{
3836 __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
3837 %}
3838 ins_pipe( pipe_slow );
3839 %}
3841 instruct vsll4S_imm(vecD dst, immI8 shift) %{
3842 predicate(n->as_Vector()->length() == 4);
3843 match(Set dst (LShiftVS dst shift));
3844 format %{ "psllw $dst,$shift\t! left shift packed4S" %}
3845 ins_encode %{
3846 __ psllw($dst$$XMMRegister, (int)$shift$$constant);
3847 %}
3848 ins_pipe( pipe_slow );
3849 %}
3851 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{
3852 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3853 match(Set dst (LShiftVS src shift));
3854 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
3855 ins_encode %{
3856 bool vector256 = false;
3857 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3858 %}
3859 ins_pipe( pipe_slow );
3860 %}
3862 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
3863 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3864 match(Set dst (LShiftVS src shift));
3865 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
3866 ins_encode %{
3867 bool vector256 = false;
3868 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
3869 %}
3870 ins_pipe( pipe_slow );
3871 %}
3873 instruct vsll8S(vecX dst, vecS shift) %{
3874 predicate(n->as_Vector()->length() == 8);
3875 match(Set dst (LShiftVS dst shift));
3876 format %{ "psllw $dst,$shift\t! left shift packed8S" %}
3877 ins_encode %{
3878 __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
3879 %}
3880 ins_pipe( pipe_slow );
3881 %}
3883 instruct vsll8S_imm(vecX dst, immI8 shift) %{
3884 predicate(n->as_Vector()->length() == 8);
3885 match(Set dst (LShiftVS dst shift));
3886 format %{ "psllw $dst,$shift\t! left shift packed8S" %}
3887 ins_encode %{
3888 __ psllw($dst$$XMMRegister, (int)$shift$$constant);
3889 %}
3890 ins_pipe( pipe_slow );
3891 %}
3893 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{
3894 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3895 match(Set dst (LShiftVS src shift));
3896 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
3897 ins_encode %{
3898 bool vector256 = false;
3899 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3900 %}
3901 ins_pipe( pipe_slow );
3902 %}
3904 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
3905 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3906 match(Set dst (LShiftVS src shift));
3907 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
3908 ins_encode %{
3909 bool vector256 = false;
3910 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
3911 %}
3912 ins_pipe( pipe_slow );
3913 %}
3915 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{
3916 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3917 match(Set dst (LShiftVS src shift));
3918 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
3919 ins_encode %{
3920 bool vector256 = true;
3921 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3922 %}
3923 ins_pipe( pipe_slow );
3924 %}
3926 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
3927 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3928 match(Set dst (LShiftVS src shift));
3929 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
3930 ins_encode %{
3931 bool vector256 = true;
3932 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
3933 %}
3934 ins_pipe( pipe_slow );
3935 %}
3937 // Integers vector left shift
3938 instruct vsll2I(vecD dst, vecS shift) %{
3939 predicate(n->as_Vector()->length() == 2);
3940 match(Set dst (LShiftVI dst shift));
3941 format %{ "pslld $dst,$shift\t! left shift packed2I" %}
3942 ins_encode %{
3943 __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
3944 %}
3945 ins_pipe( pipe_slow );
3946 %}
3948 instruct vsll2I_imm(vecD dst, immI8 shift) %{
3949 predicate(n->as_Vector()->length() == 2);
3950 match(Set dst (LShiftVI dst shift));
3951 format %{ "pslld $dst,$shift\t! left shift packed2I" %}
3952 ins_encode %{
3953 __ pslld($dst$$XMMRegister, (int)$shift$$constant);
3954 %}
3955 ins_pipe( pipe_slow );
3956 %}
3958 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{
3959 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3960 match(Set dst (LShiftVI src shift));
3961 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %}
3962 ins_encode %{
3963 bool vector256 = false;
3964 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3965 %}
3966 ins_pipe( pipe_slow );
3967 %}
3969 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
3970 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3971 match(Set dst (LShiftVI src shift));
3972 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %}
3973 ins_encode %{
3974 bool vector256 = false;
3975 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
3976 %}
3977 ins_pipe( pipe_slow );
3978 %}
3980 instruct vsll4I(vecX dst, vecS shift) %{
3981 predicate(n->as_Vector()->length() == 4);
3982 match(Set dst (LShiftVI dst shift));
3983 format %{ "pslld $dst,$shift\t! left shift packed4I" %}
3984 ins_encode %{
3985 __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
3986 %}
3987 ins_pipe( pipe_slow );
3988 %}
3990 instruct vsll4I_imm(vecX dst, immI8 shift) %{
3991 predicate(n->as_Vector()->length() == 4);
3992 match(Set dst (LShiftVI dst shift));
3993 format %{ "pslld $dst,$shift\t! left shift packed4I" %}
3994 ins_encode %{
3995 __ pslld($dst$$XMMRegister, (int)$shift$$constant);
3996 %}
3997 ins_pipe( pipe_slow );
3998 %}
4000 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{
4001 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4002 match(Set dst (LShiftVI src shift));
4003 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %}
4004 ins_encode %{
4005 bool vector256 = false;
4006 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4007 %}
4008 ins_pipe( pipe_slow );
4009 %}
4011 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
4012 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4013 match(Set dst (LShiftVI src shift));
4014 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %}
4015 ins_encode %{
4016 bool vector256 = false;
4017 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4018 %}
4019 ins_pipe( pipe_slow );
4020 %}
4022 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{
4023 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4024 match(Set dst (LShiftVI src shift));
4025 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %}
4026 ins_encode %{
4027 bool vector256 = true;
4028 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4029 %}
4030 ins_pipe( pipe_slow );
4031 %}
4033 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
4034 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4035 match(Set dst (LShiftVI src shift));
4036 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %}
4037 ins_encode %{
4038 bool vector256 = true;
4039 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4040 %}
4041 ins_pipe( pipe_slow );
4042 %}
4044 // Longs vector left shift
4045 instruct vsll2L(vecX dst, vecS shift) %{
4046 predicate(n->as_Vector()->length() == 2);
4047 match(Set dst (LShiftVL dst shift));
4048 format %{ "psllq $dst,$shift\t! left shift packed2L" %}
4049 ins_encode %{
4050 __ psllq($dst$$XMMRegister, $shift$$XMMRegister);
4051 %}
4052 ins_pipe( pipe_slow );
4053 %}
4055 instruct vsll2L_imm(vecX dst, immI8 shift) %{
4056 predicate(n->as_Vector()->length() == 2);
4057 match(Set dst (LShiftVL dst shift));
4058 format %{ "psllq $dst,$shift\t! left shift packed2L" %}
4059 ins_encode %{
4060 __ psllq($dst$$XMMRegister, (int)$shift$$constant);
4061 %}
4062 ins_pipe( pipe_slow );
4063 %}
4065 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{
4066 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4067 match(Set dst (LShiftVL src shift));
4068 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %}
4069 ins_encode %{
4070 bool vector256 = false;
4071 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4072 %}
4073 ins_pipe( pipe_slow );
4074 %}
4076 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
4077 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4078 match(Set dst (LShiftVL src shift));
4079 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %}
4080 ins_encode %{
4081 bool vector256 = false;
4082 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4083 %}
4084 ins_pipe( pipe_slow );
4085 %}
4087 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{
4088 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4089 match(Set dst (LShiftVL src shift));
4090 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %}
4091 ins_encode %{
4092 bool vector256 = true;
4093 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4094 %}
4095 ins_pipe( pipe_slow );
4096 %}
4098 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
4099 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4100 match(Set dst (LShiftVL src shift));
4101 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %}
4102 ins_encode %{
4103 bool vector256 = true;
4104 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4105 %}
4106 ins_pipe( pipe_slow );
4107 %}
4109 // ----------------------- LogicalRightShift -----------------------------------
4111 // Shorts vector logical right shift produces incorrect Java result
4112 // for negative data because java code convert short value into int with
4113 // sign extension before a shift. But char vectors are fine since chars are
4114 // unsigned values.
4116 instruct vsrl2S(vecS dst, vecS shift) %{
4117 predicate(n->as_Vector()->length() == 2);
4118 match(Set dst (URShiftVS dst shift));
4119 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %}
4120 ins_encode %{
4121 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
4122 %}
4123 ins_pipe( pipe_slow );
4124 %}
4126 instruct vsrl2S_imm(vecS dst, immI8 shift) %{
4127 predicate(n->as_Vector()->length() == 2);
4128 match(Set dst (URShiftVS dst shift));
4129 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %}
4130 ins_encode %{
4131 __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
4132 %}
4133 ins_pipe( pipe_slow );
4134 %}
4136 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{
4137 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4138 match(Set dst (URShiftVS src shift));
4139 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
4140 ins_encode %{
4141 bool vector256 = false;
4142 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4143 %}
4144 ins_pipe( pipe_slow );
4145 %}
4147 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
4148 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4149 match(Set dst (URShiftVS src shift));
4150 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
4151 ins_encode %{
4152 bool vector256 = false;
4153 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4154 %}
4155 ins_pipe( pipe_slow );
4156 %}
4158 instruct vsrl4S(vecD dst, vecS shift) %{
4159 predicate(n->as_Vector()->length() == 4);
4160 match(Set dst (URShiftVS dst shift));
4161 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %}
4162 ins_encode %{
4163 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
4164 %}
4165 ins_pipe( pipe_slow );
4166 %}
4168 instruct vsrl4S_imm(vecD dst, immI8 shift) %{
4169 predicate(n->as_Vector()->length() == 4);
4170 match(Set dst (URShiftVS dst shift));
4171 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %}
4172 ins_encode %{
4173 __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
4174 %}
4175 ins_pipe( pipe_slow );
4176 %}
4178 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{
4179 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4180 match(Set dst (URShiftVS src shift));
4181 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
4182 ins_encode %{
4183 bool vector256 = false;
4184 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4185 %}
4186 ins_pipe( pipe_slow );
4187 %}
4189 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
4190 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4191 match(Set dst (URShiftVS src shift));
4192 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
4193 ins_encode %{
4194 bool vector256 = false;
4195 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4196 %}
4197 ins_pipe( pipe_slow );
4198 %}
4200 instruct vsrl8S(vecX dst, vecS shift) %{
4201 predicate(n->as_Vector()->length() == 8);
4202 match(Set dst (URShiftVS dst shift));
4203 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %}
4204 ins_encode %{
4205 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
4206 %}
4207 ins_pipe( pipe_slow );
4208 %}
4210 instruct vsrl8S_imm(vecX dst, immI8 shift) %{
4211 predicate(n->as_Vector()->length() == 8);
4212 match(Set dst (URShiftVS dst shift));
4213 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %}
4214 ins_encode %{
4215 __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
4216 %}
4217 ins_pipe( pipe_slow );
4218 %}
4220 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{
4221 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4222 match(Set dst (URShiftVS src shift));
4223 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
4224 ins_encode %{
4225 bool vector256 = false;
4226 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4227 %}
4228 ins_pipe( pipe_slow );
4229 %}
4231 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
4232 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4233 match(Set dst (URShiftVS src shift));
4234 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
4235 ins_encode %{
4236 bool vector256 = false;
4237 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4238 %}
4239 ins_pipe( pipe_slow );
4240 %}
4242 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{
4243 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4244 match(Set dst (URShiftVS src shift));
4245 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
4246 ins_encode %{
4247 bool vector256 = true;
4248 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4249 %}
4250 ins_pipe( pipe_slow );
4251 %}
4253 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
4254 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4255 match(Set dst (URShiftVS src shift));
4256 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
4257 ins_encode %{
4258 bool vector256 = true;
4259 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4260 %}
4261 ins_pipe( pipe_slow );
4262 %}
4264 // Integers vector logical right shift
4265 instruct vsrl2I(vecD dst, vecS shift) %{
4266 predicate(n->as_Vector()->length() == 2);
4267 match(Set dst (URShiftVI dst shift));
4268 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %}
4269 ins_encode %{
4270 __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
4271 %}
4272 ins_pipe( pipe_slow );
4273 %}
4275 instruct vsrl2I_imm(vecD dst, immI8 shift) %{
4276 predicate(n->as_Vector()->length() == 2);
4277 match(Set dst (URShiftVI dst shift));
4278 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %}
4279 ins_encode %{
4280 __ psrld($dst$$XMMRegister, (int)$shift$$constant);
4281 %}
4282 ins_pipe( pipe_slow );
4283 %}
4285 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{
4286 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4287 match(Set dst (URShiftVI src shift));
4288 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %}
4289 ins_encode %{
4290 bool vector256 = false;
4291 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4292 %}
4293 ins_pipe( pipe_slow );
4294 %}
4296 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
4297 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4298 match(Set dst (URShiftVI src shift));
4299 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %}
4300 ins_encode %{
4301 bool vector256 = false;
4302 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4303 %}
4304 ins_pipe( pipe_slow );
4305 %}
4307 instruct vsrl4I(vecX dst, vecS shift) %{
4308 predicate(n->as_Vector()->length() == 4);
4309 match(Set dst (URShiftVI dst shift));
4310 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %}
4311 ins_encode %{
4312 __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
4313 %}
4314 ins_pipe( pipe_slow );
4315 %}
4317 instruct vsrl4I_imm(vecX dst, immI8 shift) %{
4318 predicate(n->as_Vector()->length() == 4);
4319 match(Set dst (URShiftVI dst shift));
4320 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %}
4321 ins_encode %{
4322 __ psrld($dst$$XMMRegister, (int)$shift$$constant);
4323 %}
4324 ins_pipe( pipe_slow );
4325 %}
4327 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{
4328 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4329 match(Set dst (URShiftVI src shift));
4330 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %}
4331 ins_encode %{
4332 bool vector256 = false;
4333 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4334 %}
4335 ins_pipe( pipe_slow );
4336 %}
4338 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
4339 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4340 match(Set dst (URShiftVI src shift));
4341 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %}
4342 ins_encode %{
4343 bool vector256 = false;
4344 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4345 %}
4346 ins_pipe( pipe_slow );
4347 %}
4349 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{
4350 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4351 match(Set dst (URShiftVI src shift));
4352 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %}
4353 ins_encode %{
4354 bool vector256 = true;
4355 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4356 %}
4357 ins_pipe( pipe_slow );
4358 %}
4360 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
4361 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4362 match(Set dst (URShiftVI src shift));
4363 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %}
4364 ins_encode %{
4365 bool vector256 = true;
4366 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4367 %}
4368 ins_pipe( pipe_slow );
4369 %}
4371 // Longs vector logical right shift
4372 instruct vsrl2L(vecX dst, vecS shift) %{
4373 predicate(n->as_Vector()->length() == 2);
4374 match(Set dst (URShiftVL dst shift));
4375 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %}
4376 ins_encode %{
4377 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
4378 %}
4379 ins_pipe( pipe_slow );
4380 %}
4382 instruct vsrl2L_imm(vecX dst, immI8 shift) %{
4383 predicate(n->as_Vector()->length() == 2);
4384 match(Set dst (URShiftVL dst shift));
4385 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %}
4386 ins_encode %{
4387 __ psrlq($dst$$XMMRegister, (int)$shift$$constant);
4388 %}
4389 ins_pipe( pipe_slow );
4390 %}
4392 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{
4393 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4394 match(Set dst (URShiftVL src shift));
4395 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %}
4396 ins_encode %{
4397 bool vector256 = false;
4398 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4399 %}
4400 ins_pipe( pipe_slow );
4401 %}
4403 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
4404 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4405 match(Set dst (URShiftVL src shift));
4406 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %}
4407 ins_encode %{
4408 bool vector256 = false;
4409 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4410 %}
4411 ins_pipe( pipe_slow );
4412 %}
4414 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{
4415 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4416 match(Set dst (URShiftVL src shift));
4417 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %}
4418 ins_encode %{
4419 bool vector256 = true;
4420 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4421 %}
4422 ins_pipe( pipe_slow );
4423 %}
4425 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
4426 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4427 match(Set dst (URShiftVL src shift));
4428 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %}
4429 ins_encode %{
4430 bool vector256 = true;
4431 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4432 %}
4433 ins_pipe( pipe_slow );
4434 %}
4436 // ------------------- ArithmeticRightShift -----------------------------------
4438 // Shorts/Chars vector arithmetic right shift
4439 instruct vsra2S(vecS dst, vecS shift) %{
4440 predicate(n->as_Vector()->length() == 2);
4441 match(Set dst (RShiftVS dst shift));
4442 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %}
4443 ins_encode %{
4444 __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
4445 %}
4446 ins_pipe( pipe_slow );
4447 %}
4449 instruct vsra2S_imm(vecS dst, immI8 shift) %{
4450 predicate(n->as_Vector()->length() == 2);
4451 match(Set dst (RShiftVS dst shift));
4452 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %}
4453 ins_encode %{
4454 __ psraw($dst$$XMMRegister, (int)$shift$$constant);
4455 %}
4456 ins_pipe( pipe_slow );
4457 %}
4459 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{
4460 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4461 match(Set dst (RShiftVS src shift));
4462 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
4463 ins_encode %{
4464 bool vector256 = false;
4465 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4466 %}
4467 ins_pipe( pipe_slow );
4468 %}
4470 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
4471 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4472 match(Set dst (RShiftVS src shift));
4473 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
4474 ins_encode %{
4475 bool vector256 = false;
4476 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4477 %}
4478 ins_pipe( pipe_slow );
4479 %}
4481 instruct vsra4S(vecD dst, vecS shift) %{
4482 predicate(n->as_Vector()->length() == 4);
4483 match(Set dst (RShiftVS dst shift));
4484 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %}
4485 ins_encode %{
4486 __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
4487 %}
4488 ins_pipe( pipe_slow );
4489 %}
4491 instruct vsra4S_imm(vecD dst, immI8 shift) %{
4492 predicate(n->as_Vector()->length() == 4);
4493 match(Set dst (RShiftVS dst shift));
4494 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %}
4495 ins_encode %{
4496 __ psraw($dst$$XMMRegister, (int)$shift$$constant);
4497 %}
4498 ins_pipe( pipe_slow );
4499 %}
4501 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{
4502 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4503 match(Set dst (RShiftVS src shift));
4504 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
4505 ins_encode %{
4506 bool vector256 = false;
4507 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4508 %}
4509 ins_pipe( pipe_slow );
4510 %}
4512 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
4513 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4514 match(Set dst (RShiftVS src shift));
4515 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
4516 ins_encode %{
4517 bool vector256 = false;
4518 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4519 %}
4520 ins_pipe( pipe_slow );
4521 %}
4523 instruct vsra8S(vecX dst, vecS shift) %{
4524 predicate(n->as_Vector()->length() == 8);
4525 match(Set dst (RShiftVS dst shift));
4526 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %}
4527 ins_encode %{
4528 __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
4529 %}
4530 ins_pipe( pipe_slow );
4531 %}
4533 instruct vsra8S_imm(vecX dst, immI8 shift) %{
4534 predicate(n->as_Vector()->length() == 8);
4535 match(Set dst (RShiftVS dst shift));
4536 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %}
4537 ins_encode %{
4538 __ psraw($dst$$XMMRegister, (int)$shift$$constant);
4539 %}
4540 ins_pipe( pipe_slow );
4541 %}
4543 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{
4544 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4545 match(Set dst (RShiftVS src shift));
4546 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
4547 ins_encode %{
4548 bool vector256 = false;
4549 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4550 %}
4551 ins_pipe( pipe_slow );
4552 %}
4554 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
4555 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4556 match(Set dst (RShiftVS src shift));
4557 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
4558 ins_encode %{
4559 bool vector256 = false;
4560 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4561 %}
4562 ins_pipe( pipe_slow );
4563 %}
4565 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{
4566 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4567 match(Set dst (RShiftVS src shift));
4568 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
4569 ins_encode %{
4570 bool vector256 = true;
4571 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4572 %}
4573 ins_pipe( pipe_slow );
4574 %}
4576 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
4577 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4578 match(Set dst (RShiftVS src shift));
4579 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
4580 ins_encode %{
4581 bool vector256 = true;
4582 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4583 %}
4584 ins_pipe( pipe_slow );
4585 %}
4587 // Integers vector arithmetic right shift
4588 instruct vsra2I(vecD dst, vecS shift) %{
4589 predicate(n->as_Vector()->length() == 2);
4590 match(Set dst (RShiftVI dst shift));
4591 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %}
4592 ins_encode %{
4593 __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
4594 %}
4595 ins_pipe( pipe_slow );
4596 %}
4598 instruct vsra2I_imm(vecD dst, immI8 shift) %{
4599 predicate(n->as_Vector()->length() == 2);
4600 match(Set dst (RShiftVI dst shift));
4601 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %}
4602 ins_encode %{
4603 __ psrad($dst$$XMMRegister, (int)$shift$$constant);
4604 %}
4605 ins_pipe( pipe_slow );
4606 %}
4608 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{
4609 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4610 match(Set dst (RShiftVI src shift));
4611 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %}
4612 ins_encode %{
4613 bool vector256 = false;
4614 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4615 %}
4616 ins_pipe( pipe_slow );
4617 %}
4619 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
4620 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4621 match(Set dst (RShiftVI src shift));
4622 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %}
4623 ins_encode %{
4624 bool vector256 = false;
4625 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4626 %}
4627 ins_pipe( pipe_slow );
4628 %}
4630 instruct vsra4I(vecX dst, vecS shift) %{
4631 predicate(n->as_Vector()->length() == 4);
4632 match(Set dst (RShiftVI dst shift));
4633 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %}
4634 ins_encode %{
4635 __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
4636 %}
4637 ins_pipe( pipe_slow );
4638 %}
4640 instruct vsra4I_imm(vecX dst, immI8 shift) %{
4641 predicate(n->as_Vector()->length() == 4);
4642 match(Set dst (RShiftVI dst shift));
4643 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %}
4644 ins_encode %{
4645 __ psrad($dst$$XMMRegister, (int)$shift$$constant);
4646 %}
4647 ins_pipe( pipe_slow );
4648 %}
4650 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{
4651 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4652 match(Set dst (RShiftVI src shift));
4653 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %}
4654 ins_encode %{
4655 bool vector256 = false;
4656 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4657 %}
4658 ins_pipe( pipe_slow );
4659 %}
4661 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
4662 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4663 match(Set dst (RShiftVI src shift));
4664 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %}
4665 ins_encode %{
4666 bool vector256 = false;
4667 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4668 %}
4669 ins_pipe( pipe_slow );
4670 %}
4672 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{
4673 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4674 match(Set dst (RShiftVI src shift));
4675 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %}
4676 ins_encode %{
4677 bool vector256 = true;
4678 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4679 %}
4680 ins_pipe( pipe_slow );
4681 %}
4683 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
4684 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4685 match(Set dst (RShiftVI src shift));
4686 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %}
4687 ins_encode %{
4688 bool vector256 = true;
4689 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4690 %}
4691 ins_pipe( pipe_slow );
4692 %}
4694 // There are no longs vector arithmetic right shift instructions.
4697 // --------------------------------- AND --------------------------------------
4699 instruct vand4B(vecS dst, vecS src) %{
4700 predicate(n->as_Vector()->length_in_bytes() == 4);
4701 match(Set dst (AndV dst src));
4702 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %}
4703 ins_encode %{
4704 __ pand($dst$$XMMRegister, $src$$XMMRegister);
4705 %}
4706 ins_pipe( pipe_slow );
4707 %}
4709 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{
4710 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
4711 match(Set dst (AndV src1 src2));
4712 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %}
4713 ins_encode %{
4714 bool vector256 = false;
4715 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4716 %}
4717 ins_pipe( pipe_slow );
4718 %}
4720 instruct vand8B(vecD dst, vecD src) %{
4721 predicate(n->as_Vector()->length_in_bytes() == 8);
4722 match(Set dst (AndV dst src));
4723 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %}
4724 ins_encode %{
4725 __ pand($dst$$XMMRegister, $src$$XMMRegister);
4726 %}
4727 ins_pipe( pipe_slow );
4728 %}
4730 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{
4731 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
4732 match(Set dst (AndV src1 src2));
4733 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %}
4734 ins_encode %{
4735 bool vector256 = false;
4736 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4737 %}
4738 ins_pipe( pipe_slow );
4739 %}
4741 instruct vand16B(vecX dst, vecX src) %{
4742 predicate(n->as_Vector()->length_in_bytes() == 16);
4743 match(Set dst (AndV dst src));
4744 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %}
4745 ins_encode %{
4746 __ pand($dst$$XMMRegister, $src$$XMMRegister);
4747 %}
4748 ins_pipe( pipe_slow );
4749 %}
4751 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{
4752 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4753 match(Set dst (AndV src1 src2));
4754 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %}
4755 ins_encode %{
4756 bool vector256 = false;
4757 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4758 %}
4759 ins_pipe( pipe_slow );
4760 %}
4762 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{
4763 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4764 match(Set dst (AndV src (LoadVector mem)));
4765 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %}
4766 ins_encode %{
4767 bool vector256 = false;
4768 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4769 %}
4770 ins_pipe( pipe_slow );
4771 %}
4773 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{
4774 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4775 match(Set dst (AndV src1 src2));
4776 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %}
4777 ins_encode %{
4778 bool vector256 = true;
4779 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4780 %}
4781 ins_pipe( pipe_slow );
4782 %}
4784 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{
4785 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4786 match(Set dst (AndV src (LoadVector mem)));
4787 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %}
4788 ins_encode %{
4789 bool vector256 = true;
4790 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4791 %}
4792 ins_pipe( pipe_slow );
4793 %}
4795 // --------------------------------- OR ---------------------------------------
4797 instruct vor4B(vecS dst, vecS src) %{
4798 predicate(n->as_Vector()->length_in_bytes() == 4);
4799 match(Set dst (OrV dst src));
4800 format %{ "por $dst,$src\t! or vectors (4 bytes)" %}
4801 ins_encode %{
4802 __ por($dst$$XMMRegister, $src$$XMMRegister);
4803 %}
4804 ins_pipe( pipe_slow );
4805 %}
4807 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{
4808 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
4809 match(Set dst (OrV src1 src2));
4810 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %}
4811 ins_encode %{
4812 bool vector256 = false;
4813 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4814 %}
4815 ins_pipe( pipe_slow );
4816 %}
4818 instruct vor8B(vecD dst, vecD src) %{
4819 predicate(n->as_Vector()->length_in_bytes() == 8);
4820 match(Set dst (OrV dst src));
4821 format %{ "por $dst,$src\t! or vectors (8 bytes)" %}
4822 ins_encode %{
4823 __ por($dst$$XMMRegister, $src$$XMMRegister);
4824 %}
4825 ins_pipe( pipe_slow );
4826 %}
4828 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{
4829 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
4830 match(Set dst (OrV src1 src2));
4831 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %}
4832 ins_encode %{
4833 bool vector256 = false;
4834 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4835 %}
4836 ins_pipe( pipe_slow );
4837 %}
4839 instruct vor16B(vecX dst, vecX src) %{
4840 predicate(n->as_Vector()->length_in_bytes() == 16);
4841 match(Set dst (OrV dst src));
4842 format %{ "por $dst,$src\t! or vectors (16 bytes)" %}
4843 ins_encode %{
4844 __ por($dst$$XMMRegister, $src$$XMMRegister);
4845 %}
4846 ins_pipe( pipe_slow );
4847 %}
4849 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{
4850 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4851 match(Set dst (OrV src1 src2));
4852 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %}
4853 ins_encode %{
4854 bool vector256 = false;
4855 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4856 %}
4857 ins_pipe( pipe_slow );
4858 %}
4860 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{
4861 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4862 match(Set dst (OrV src (LoadVector mem)));
4863 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %}
4864 ins_encode %{
4865 bool vector256 = false;
4866 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4867 %}
4868 ins_pipe( pipe_slow );
4869 %}
4871 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{
4872 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4873 match(Set dst (OrV src1 src2));
4874 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %}
4875 ins_encode %{
4876 bool vector256 = true;
4877 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4878 %}
4879 ins_pipe( pipe_slow );
4880 %}
4882 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{
4883 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4884 match(Set dst (OrV src (LoadVector mem)));
4885 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %}
4886 ins_encode %{
4887 bool vector256 = true;
4888 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4889 %}
4890 ins_pipe( pipe_slow );
4891 %}
4893 // --------------------------------- XOR --------------------------------------
4895 instruct vxor4B(vecS dst, vecS src) %{
4896 predicate(n->as_Vector()->length_in_bytes() == 4);
4897 match(Set dst (XorV dst src));
4898 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %}
4899 ins_encode %{
4900 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
4901 %}
4902 ins_pipe( pipe_slow );
4903 %}
4905 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{
4906 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
4907 match(Set dst (XorV src1 src2));
4908 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %}
4909 ins_encode %{
4910 bool vector256 = false;
4911 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4912 %}
4913 ins_pipe( pipe_slow );
4914 %}
4916 instruct vxor8B(vecD dst, vecD src) %{
4917 predicate(n->as_Vector()->length_in_bytes() == 8);
4918 match(Set dst (XorV dst src));
4919 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %}
4920 ins_encode %{
4921 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
4922 %}
4923 ins_pipe( pipe_slow );
4924 %}
4926 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{
4927 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
4928 match(Set dst (XorV src1 src2));
4929 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %}
4930 ins_encode %{
4931 bool vector256 = false;
4932 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4933 %}
4934 ins_pipe( pipe_slow );
4935 %}
4937 instruct vxor16B(vecX dst, vecX src) %{
4938 predicate(n->as_Vector()->length_in_bytes() == 16);
4939 match(Set dst (XorV dst src));
4940 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %}
4941 ins_encode %{
4942 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
4943 %}
4944 ins_pipe( pipe_slow );
4945 %}
4947 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{
4948 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4949 match(Set dst (XorV src1 src2));
4950 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %}
4951 ins_encode %{
4952 bool vector256 = false;
4953 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4954 %}
4955 ins_pipe( pipe_slow );
4956 %}
4958 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{
4959 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4960 match(Set dst (XorV src (LoadVector mem)));
4961 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %}
4962 ins_encode %{
4963 bool vector256 = false;
4964 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4965 %}
4966 ins_pipe( pipe_slow );
4967 %}
4969 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{
4970 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4971 match(Set dst (XorV src1 src2));
4972 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %}
4973 ins_encode %{
4974 bool vector256 = true;
4975 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4976 %}
4977 ins_pipe( pipe_slow );
4978 %}
4980 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{
4981 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4982 match(Set dst (XorV src (LoadVector mem)));
4983 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %}
4984 ins_encode %{
4985 bool vector256 = true;
4986 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4987 %}
4988 ins_pipe( pipe_slow );
4989 %}