Thu, 15 Aug 2013 20:04:10 -0400
8003424: Enable Class Data Sharing for CompressedOops
8016729: ObjectAlignmentInBytes=16 now forces the use of heap based compressed oops
8005933: The -Xshare:auto option is ignored for -server
Summary: Move klass metaspace above the heap and support CDS with compressed klass ptrs.
Reviewed-by: coleenp, kvn, mgerdin, tschatzl, stefank
1 //
2 // Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
25 // X86 Common Architecture Description File
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // archtecture.
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
62 // XMM registers. 256-bit registers or 8 words each, labeled (a)-h.
63 // Word a in each register holds a Float, words ab hold a Double.
64 // The whole registers are used in SSE4.2 version intrinsics,
65 // array copy stubs and superword operations (see UseSSE42Intrinsics,
66 // UseXMMForArrayCopy and UseSuperword flags).
67 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX).
68 // Linux ABI: No register preserved across function calls
69 // XMM0-XMM7 might hold parameters
70 // Windows ABI: XMM6-XMM15 preserved across function calls
71 // XMM0-XMM3 might hold parameters
73 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
74 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
75 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
76 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
77 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
78 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
79 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
80 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
82 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
83 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
84 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
85 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
86 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
87 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
88 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
89 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
91 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
92 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
93 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
94 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
95 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
96 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
97 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
98 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
100 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
101 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
102 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
103 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
104 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
105 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
106 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
107 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
109 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
110 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
111 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
112 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
113 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
114 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
115 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
116 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
118 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
119 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
120 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
121 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
122 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
123 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
124 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
125 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
127 #ifdef _WIN64
129 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg());
130 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1));
131 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2));
132 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3));
133 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4));
134 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5));
135 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6));
136 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7));
138 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg());
139 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1));
140 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2));
141 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3));
142 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4));
143 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5));
144 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6));
145 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7));
147 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg());
148 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1));
149 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2));
150 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3));
151 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4));
152 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5));
153 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6));
154 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7));
156 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg());
157 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1));
158 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2));
159 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3));
160 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4));
161 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5));
162 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6));
163 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7));
165 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
166 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1));
167 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2));
168 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3));
169 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4));
170 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5));
171 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6));
172 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7));
174 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
175 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1));
176 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2));
177 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3));
178 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4));
179 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5));
180 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6));
181 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7));
183 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
184 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1));
185 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2));
186 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3));
187 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4));
188 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5));
189 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6));
190 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7));
192 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
193 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1));
194 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2));
195 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3));
196 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4));
197 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5));
198 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6));
199 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7));
201 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
202 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1));
203 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2));
204 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3));
205 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4));
206 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5));
207 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6));
208 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7));
210 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
211 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1));
212 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2));
213 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3));
214 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4));
215 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5));
216 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6));
217 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7));
219 #else // _WIN64
221 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
222 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
223 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
224 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
225 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
226 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
227 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
228 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
230 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
231 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
232 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
233 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
234 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
235 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
236 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
237 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
239 #ifdef _LP64
241 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
242 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
243 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
244 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
245 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
246 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
247 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
248 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
250 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
251 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
252 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
253 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
254 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
255 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
256 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
257 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
259 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
260 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
261 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
262 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
263 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
264 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
265 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
266 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
268 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
269 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
270 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
271 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
272 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
273 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
274 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
275 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
277 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
278 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
279 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
280 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
281 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
282 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
283 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
284 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
286 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
287 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
288 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
289 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
290 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
291 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
292 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
293 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
295 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
296 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
297 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
298 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
299 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
300 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
301 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
302 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
304 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
305 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
306 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
307 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
308 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
309 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
310 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
311 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
313 #endif // _LP64
315 #endif // _WIN64
317 #ifdef _LP64
318 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
319 #else
320 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
321 #endif // _LP64
323 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
324 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
325 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
326 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
327 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
328 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
329 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
330 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h
331 #ifdef _LP64
332 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
333 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
334 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
335 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
336 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
337 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
338 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
339 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
340 #endif
341 );
343 // flags allocation class should be last.
344 alloc_class chunk2(RFLAGS);
346 // Singleton class for condition codes
347 reg_class int_flags(RFLAGS);
349 // Class for all float registers
350 reg_class float_reg(XMM0,
351 XMM1,
352 XMM2,
353 XMM3,
354 XMM4,
355 XMM5,
356 XMM6,
357 XMM7
358 #ifdef _LP64
359 ,XMM8,
360 XMM9,
361 XMM10,
362 XMM11,
363 XMM12,
364 XMM13,
365 XMM14,
366 XMM15
367 #endif
368 );
370 // Class for all double registers
371 reg_class double_reg(XMM0, XMM0b,
372 XMM1, XMM1b,
373 XMM2, XMM2b,
374 XMM3, XMM3b,
375 XMM4, XMM4b,
376 XMM5, XMM5b,
377 XMM6, XMM6b,
378 XMM7, XMM7b
379 #ifdef _LP64
380 ,XMM8, XMM8b,
381 XMM9, XMM9b,
382 XMM10, XMM10b,
383 XMM11, XMM11b,
384 XMM12, XMM12b,
385 XMM13, XMM13b,
386 XMM14, XMM14b,
387 XMM15, XMM15b
388 #endif
389 );
391 // Class for all 32bit vector registers
392 reg_class vectors_reg(XMM0,
393 XMM1,
394 XMM2,
395 XMM3,
396 XMM4,
397 XMM5,
398 XMM6,
399 XMM7
400 #ifdef _LP64
401 ,XMM8,
402 XMM9,
403 XMM10,
404 XMM11,
405 XMM12,
406 XMM13,
407 XMM14,
408 XMM15
409 #endif
410 );
412 // Class for all 64bit vector registers
413 reg_class vectord_reg(XMM0, XMM0b,
414 XMM1, XMM1b,
415 XMM2, XMM2b,
416 XMM3, XMM3b,
417 XMM4, XMM4b,
418 XMM5, XMM5b,
419 XMM6, XMM6b,
420 XMM7, XMM7b
421 #ifdef _LP64
422 ,XMM8, XMM8b,
423 XMM9, XMM9b,
424 XMM10, XMM10b,
425 XMM11, XMM11b,
426 XMM12, XMM12b,
427 XMM13, XMM13b,
428 XMM14, XMM14b,
429 XMM15, XMM15b
430 #endif
431 );
433 // Class for all 128bit vector registers
434 reg_class vectorx_reg(XMM0, XMM0b, XMM0c, XMM0d,
435 XMM1, XMM1b, XMM1c, XMM1d,
436 XMM2, XMM2b, XMM2c, XMM2d,
437 XMM3, XMM3b, XMM3c, XMM3d,
438 XMM4, XMM4b, XMM4c, XMM4d,
439 XMM5, XMM5b, XMM5c, XMM5d,
440 XMM6, XMM6b, XMM6c, XMM6d,
441 XMM7, XMM7b, XMM7c, XMM7d
442 #ifdef _LP64
443 ,XMM8, XMM8b, XMM8c, XMM8d,
444 XMM9, XMM9b, XMM9c, XMM9d,
445 XMM10, XMM10b, XMM10c, XMM10d,
446 XMM11, XMM11b, XMM11c, XMM11d,
447 XMM12, XMM12b, XMM12c, XMM12d,
448 XMM13, XMM13b, XMM13c, XMM13d,
449 XMM14, XMM14b, XMM14c, XMM14d,
450 XMM15, XMM15b, XMM15c, XMM15d
451 #endif
452 );
454 // Class for all 256bit vector registers
455 reg_class vectory_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
456 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
457 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
458 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
459 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
460 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
461 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
462 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h
463 #ifdef _LP64
464 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
465 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
466 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
467 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
468 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
469 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
470 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
471 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
472 #endif
473 );
475 %}
477 source %{
478 // Float masks come from different places depending on platform.
479 #ifdef _LP64
480 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
481 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
482 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
483 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
484 #else
485 static address float_signmask() { return (address)float_signmask_pool; }
486 static address float_signflip() { return (address)float_signflip_pool; }
487 static address double_signmask() { return (address)double_signmask_pool; }
488 static address double_signflip() { return (address)double_signflip_pool; }
489 #endif
492 const bool Matcher::match_rule_supported(int opcode) {
493 if (!has_match_rule(opcode))
494 return false;
496 switch (opcode) {
497 case Op_PopCountI:
498 case Op_PopCountL:
499 if (!UsePopCountInstruction)
500 return false;
501 break;
502 case Op_MulVI:
503 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
504 return false;
505 break;
506 case Op_CompareAndSwapL:
507 #ifdef _LP64
508 case Op_CompareAndSwapP:
509 #endif
510 if (!VM_Version::supports_cx8())
511 return false;
512 break;
513 }
515 return true; // Per default match rules are supported.
516 }
518 // Max vector size in bytes. 0 if not supported.
519 const int Matcher::vector_width_in_bytes(BasicType bt) {
520 assert(is_java_primitive(bt), "only primitive type vectors");
521 if (UseSSE < 2) return 0;
522 // SSE2 supports 128bit vectors for all types.
523 // AVX2 supports 256bit vectors for all types.
524 int size = (UseAVX > 1) ? 32 : 16;
525 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
526 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
527 size = 32;
528 // Use flag to limit vector size.
529 size = MIN2(size,(int)MaxVectorSize);
530 // Minimum 2 values in vector (or 4 for bytes).
531 switch (bt) {
532 case T_DOUBLE:
533 case T_LONG:
534 if (size < 16) return 0;
535 case T_FLOAT:
536 case T_INT:
537 if (size < 8) return 0;
538 case T_BOOLEAN:
539 case T_BYTE:
540 case T_CHAR:
541 case T_SHORT:
542 if (size < 4) return 0;
543 break;
544 default:
545 ShouldNotReachHere();
546 }
547 return size;
548 }
550 // Limits on vector size (number of elements) loaded into vector.
551 const int Matcher::max_vector_size(const BasicType bt) {
552 return vector_width_in_bytes(bt)/type2aelembytes(bt);
553 }
554 const int Matcher::min_vector_size(const BasicType bt) {
555 int max_size = max_vector_size(bt);
556 // Min size which can be loaded into vector is 4 bytes.
557 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
558 return MIN2(size,max_size);
559 }
561 // Vector ideal reg corresponding to specidied size in bytes
562 const int Matcher::vector_ideal_reg(int size) {
563 assert(MaxVectorSize >= size, "");
564 switch(size) {
565 case 4: return Op_VecS;
566 case 8: return Op_VecD;
567 case 16: return Op_VecX;
568 case 32: return Op_VecY;
569 }
570 ShouldNotReachHere();
571 return 0;
572 }
574 // Only lowest bits of xmm reg are used for vector shift count.
575 const int Matcher::vector_shift_count_ideal_reg(int size) {
576 return Op_VecS;
577 }
579 // x86 supports misaligned vectors store/load.
580 const bool Matcher::misaligned_vectors_ok() {
581 return !AlignVector; // can be changed by flag
582 }
584 // Helper methods for MachSpillCopyNode::implementation().
585 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
586 int src_hi, int dst_hi, uint ireg, outputStream* st) {
587 // In 64-bit VM size calculation is very complex. Emitting instructions
588 // into scratch buffer is used to get size in 64-bit VM.
589 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
590 assert(ireg == Op_VecS || // 32bit vector
591 (src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
592 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
593 "no non-adjacent vector moves" );
594 if (cbuf) {
595 MacroAssembler _masm(cbuf);
596 int offset = __ offset();
597 switch (ireg) {
598 case Op_VecS: // copy whole register
599 case Op_VecD:
600 case Op_VecX:
601 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
602 break;
603 case Op_VecY:
604 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
605 break;
606 default:
607 ShouldNotReachHere();
608 }
609 int size = __ offset() - offset;
610 #ifdef ASSERT
611 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
612 assert(!do_size || size == 4, "incorrect size calculattion");
613 #endif
614 return size;
615 #ifndef PRODUCT
616 } else if (!do_size) {
617 switch (ireg) {
618 case Op_VecS:
619 case Op_VecD:
620 case Op_VecX:
621 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
622 break;
623 case Op_VecY:
624 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
625 break;
626 default:
627 ShouldNotReachHere();
628 }
629 #endif
630 }
631 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
632 return 4;
633 }
635 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
636 int stack_offset, int reg, uint ireg, outputStream* st) {
637 // In 64-bit VM size calculation is very complex. Emitting instructions
638 // into scratch buffer is used to get size in 64-bit VM.
639 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
640 if (cbuf) {
641 MacroAssembler _masm(cbuf);
642 int offset = __ offset();
643 if (is_load) {
644 switch (ireg) {
645 case Op_VecS:
646 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
647 break;
648 case Op_VecD:
649 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
650 break;
651 case Op_VecX:
652 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
653 break;
654 case Op_VecY:
655 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
656 break;
657 default:
658 ShouldNotReachHere();
659 }
660 } else { // store
661 switch (ireg) {
662 case Op_VecS:
663 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
664 break;
665 case Op_VecD:
666 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
667 break;
668 case Op_VecX:
669 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
670 break;
671 case Op_VecY:
672 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
673 break;
674 default:
675 ShouldNotReachHere();
676 }
677 }
678 int size = __ offset() - offset;
679 #ifdef ASSERT
680 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
681 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
682 assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
683 #endif
684 return size;
685 #ifndef PRODUCT
686 } else if (!do_size) {
687 if (is_load) {
688 switch (ireg) {
689 case Op_VecS:
690 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
691 break;
692 case Op_VecD:
693 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
694 break;
695 case Op_VecX:
696 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
697 break;
698 case Op_VecY:
699 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
700 break;
701 default:
702 ShouldNotReachHere();
703 }
704 } else { // store
705 switch (ireg) {
706 case Op_VecS:
707 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
708 break;
709 case Op_VecD:
710 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
711 break;
712 case Op_VecX:
713 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
714 break;
715 case Op_VecY:
716 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
717 break;
718 default:
719 ShouldNotReachHere();
720 }
721 }
722 #endif
723 }
724 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
725 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
726 return 5+offset_size;
727 }
729 static inline jfloat replicate4_imm(int con, int width) {
730 // Load a constant of "width" (in bytes) and replicate it to fill 32bit.
731 assert(width == 1 || width == 2, "only byte or short types here");
732 int bit_width = width * 8;
733 jint val = con;
734 val &= (1 << bit_width) - 1; // mask off sign bits
735 while(bit_width < 32) {
736 val |= (val << bit_width);
737 bit_width <<= 1;
738 }
739 jfloat fval = *((jfloat*) &val); // coerce to float type
740 return fval;
741 }
743 static inline jdouble replicate8_imm(int con, int width) {
744 // Load a constant of "width" (in bytes) and replicate it to fill 64bit.
745 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here");
746 int bit_width = width * 8;
747 jlong val = con;
748 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits
749 while(bit_width < 64) {
750 val |= (val << bit_width);
751 bit_width <<= 1;
752 }
753 jdouble dval = *((jdouble*) &val); // coerce to double type
754 return dval;
755 }
757 #ifndef PRODUCT
758 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
759 st->print("nop \t# %d bytes pad for loops and calls", _count);
760 }
761 #endif
763 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
764 MacroAssembler _masm(&cbuf);
765 __ nop(_count);
766 }
768 uint MachNopNode::size(PhaseRegAlloc*) const {
769 return _count;
770 }
772 #ifndef PRODUCT
773 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
774 st->print("# breakpoint");
775 }
776 #endif
778 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
779 MacroAssembler _masm(&cbuf);
780 __ int3();
781 }
783 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
784 return MachNode::size(ra_);
785 }
787 %}
789 encode %{
791 enc_class preserve_SP %{
792 debug_only(int off0 = cbuf.insts_size());
793 MacroAssembler _masm(&cbuf);
794 // RBP is preserved across all calls, even compiled calls.
795 // Use it to preserve RSP in places where the callee might change the SP.
796 __ movptr(rbp_mh_SP_save, rsp);
797 debug_only(int off1 = cbuf.insts_size());
798 assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
799 %}
801 enc_class restore_SP %{
802 MacroAssembler _masm(&cbuf);
803 __ movptr(rsp, rbp_mh_SP_save);
804 %}
806 enc_class call_epilog %{
807 if (VerifyStackAtCalls) {
808 // Check that stack depth is unchanged: find majik cookie on stack
809 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
810 MacroAssembler _masm(&cbuf);
811 Label L;
812 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
813 __ jccb(Assembler::equal, L);
814 // Die if stack mismatch
815 __ int3();
816 __ bind(L);
817 }
818 %}
820 %}
823 //----------OPERANDS-----------------------------------------------------------
824 // Operand definitions must precede instruction definitions for correct parsing
825 // in the ADLC because operands constitute user defined types which are used in
826 // instruction definitions.
828 // Vectors
829 operand vecS() %{
830 constraint(ALLOC_IN_RC(vectors_reg));
831 match(VecS);
833 format %{ %}
834 interface(REG_INTER);
835 %}
837 operand vecD() %{
838 constraint(ALLOC_IN_RC(vectord_reg));
839 match(VecD);
841 format %{ %}
842 interface(REG_INTER);
843 %}
845 operand vecX() %{
846 constraint(ALLOC_IN_RC(vectorx_reg));
847 match(VecX);
849 format %{ %}
850 interface(REG_INTER);
851 %}
853 operand vecY() %{
854 constraint(ALLOC_IN_RC(vectory_reg));
855 match(VecY);
857 format %{ %}
858 interface(REG_INTER);
859 %}
862 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
864 // ============================================================================
866 instruct ShouldNotReachHere() %{
867 match(Halt);
868 format %{ "int3\t# ShouldNotReachHere" %}
869 ins_encode %{
870 __ int3();
871 %}
872 ins_pipe(pipe_slow);
873 %}
875 // ============================================================================
877 instruct addF_reg(regF dst, regF src) %{
878 predicate((UseSSE>=1) && (UseAVX == 0));
879 match(Set dst (AddF dst src));
881 format %{ "addss $dst, $src" %}
882 ins_cost(150);
883 ins_encode %{
884 __ addss($dst$$XMMRegister, $src$$XMMRegister);
885 %}
886 ins_pipe(pipe_slow);
887 %}
889 instruct addF_mem(regF dst, memory src) %{
890 predicate((UseSSE>=1) && (UseAVX == 0));
891 match(Set dst (AddF dst (LoadF src)));
893 format %{ "addss $dst, $src" %}
894 ins_cost(150);
895 ins_encode %{
896 __ addss($dst$$XMMRegister, $src$$Address);
897 %}
898 ins_pipe(pipe_slow);
899 %}
901 instruct addF_imm(regF dst, immF con) %{
902 predicate((UseSSE>=1) && (UseAVX == 0));
903 match(Set dst (AddF dst con));
904 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
905 ins_cost(150);
906 ins_encode %{
907 __ addss($dst$$XMMRegister, $constantaddress($con));
908 %}
909 ins_pipe(pipe_slow);
910 %}
912 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
913 predicate(UseAVX > 0);
914 match(Set dst (AddF src1 src2));
916 format %{ "vaddss $dst, $src1, $src2" %}
917 ins_cost(150);
918 ins_encode %{
919 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
920 %}
921 ins_pipe(pipe_slow);
922 %}
924 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
925 predicate(UseAVX > 0);
926 match(Set dst (AddF src1 (LoadF src2)));
928 format %{ "vaddss $dst, $src1, $src2" %}
929 ins_cost(150);
930 ins_encode %{
931 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
932 %}
933 ins_pipe(pipe_slow);
934 %}
936 instruct addF_reg_imm(regF dst, regF src, immF con) %{
937 predicate(UseAVX > 0);
938 match(Set dst (AddF src con));
940 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
941 ins_cost(150);
942 ins_encode %{
943 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
944 %}
945 ins_pipe(pipe_slow);
946 %}
948 instruct addD_reg(regD dst, regD src) %{
949 predicate((UseSSE>=2) && (UseAVX == 0));
950 match(Set dst (AddD dst src));
952 format %{ "addsd $dst, $src" %}
953 ins_cost(150);
954 ins_encode %{
955 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
956 %}
957 ins_pipe(pipe_slow);
958 %}
960 instruct addD_mem(regD dst, memory src) %{
961 predicate((UseSSE>=2) && (UseAVX == 0));
962 match(Set dst (AddD dst (LoadD src)));
964 format %{ "addsd $dst, $src" %}
965 ins_cost(150);
966 ins_encode %{
967 __ addsd($dst$$XMMRegister, $src$$Address);
968 %}
969 ins_pipe(pipe_slow);
970 %}
972 instruct addD_imm(regD dst, immD con) %{
973 predicate((UseSSE>=2) && (UseAVX == 0));
974 match(Set dst (AddD dst con));
975 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
976 ins_cost(150);
977 ins_encode %{
978 __ addsd($dst$$XMMRegister, $constantaddress($con));
979 %}
980 ins_pipe(pipe_slow);
981 %}
983 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
984 predicate(UseAVX > 0);
985 match(Set dst (AddD src1 src2));
987 format %{ "vaddsd $dst, $src1, $src2" %}
988 ins_cost(150);
989 ins_encode %{
990 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
991 %}
992 ins_pipe(pipe_slow);
993 %}
995 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
996 predicate(UseAVX > 0);
997 match(Set dst (AddD src1 (LoadD src2)));
999 format %{ "vaddsd $dst, $src1, $src2" %}
1000 ins_cost(150);
1001 ins_encode %{
1002 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1003 %}
1004 ins_pipe(pipe_slow);
1005 %}
1007 instruct addD_reg_imm(regD dst, regD src, immD con) %{
1008 predicate(UseAVX > 0);
1009 match(Set dst (AddD src con));
1011 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
1012 ins_cost(150);
1013 ins_encode %{
1014 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1015 %}
1016 ins_pipe(pipe_slow);
1017 %}
1019 instruct subF_reg(regF dst, regF src) %{
1020 predicate((UseSSE>=1) && (UseAVX == 0));
1021 match(Set dst (SubF dst src));
1023 format %{ "subss $dst, $src" %}
1024 ins_cost(150);
1025 ins_encode %{
1026 __ subss($dst$$XMMRegister, $src$$XMMRegister);
1027 %}
1028 ins_pipe(pipe_slow);
1029 %}
1031 instruct subF_mem(regF dst, memory src) %{
1032 predicate((UseSSE>=1) && (UseAVX == 0));
1033 match(Set dst (SubF dst (LoadF src)));
1035 format %{ "subss $dst, $src" %}
1036 ins_cost(150);
1037 ins_encode %{
1038 __ subss($dst$$XMMRegister, $src$$Address);
1039 %}
1040 ins_pipe(pipe_slow);
1041 %}
1043 instruct subF_imm(regF dst, immF con) %{
1044 predicate((UseSSE>=1) && (UseAVX == 0));
1045 match(Set dst (SubF dst con));
1046 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
1047 ins_cost(150);
1048 ins_encode %{
1049 __ subss($dst$$XMMRegister, $constantaddress($con));
1050 %}
1051 ins_pipe(pipe_slow);
1052 %}
1054 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
1055 predicate(UseAVX > 0);
1056 match(Set dst (SubF src1 src2));
1058 format %{ "vsubss $dst, $src1, $src2" %}
1059 ins_cost(150);
1060 ins_encode %{
1061 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1062 %}
1063 ins_pipe(pipe_slow);
1064 %}
1066 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
1067 predicate(UseAVX > 0);
1068 match(Set dst (SubF src1 (LoadF src2)));
1070 format %{ "vsubss $dst, $src1, $src2" %}
1071 ins_cost(150);
1072 ins_encode %{
1073 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1074 %}
1075 ins_pipe(pipe_slow);
1076 %}
1078 instruct subF_reg_imm(regF dst, regF src, immF con) %{
1079 predicate(UseAVX > 0);
1080 match(Set dst (SubF src con));
1082 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
1083 ins_cost(150);
1084 ins_encode %{
1085 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1086 %}
1087 ins_pipe(pipe_slow);
1088 %}
1090 instruct subD_reg(regD dst, regD src) %{
1091 predicate((UseSSE>=2) && (UseAVX == 0));
1092 match(Set dst (SubD dst src));
1094 format %{ "subsd $dst, $src" %}
1095 ins_cost(150);
1096 ins_encode %{
1097 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
1098 %}
1099 ins_pipe(pipe_slow);
1100 %}
1102 instruct subD_mem(regD dst, memory src) %{
1103 predicate((UseSSE>=2) && (UseAVX == 0));
1104 match(Set dst (SubD dst (LoadD src)));
1106 format %{ "subsd $dst, $src" %}
1107 ins_cost(150);
1108 ins_encode %{
1109 __ subsd($dst$$XMMRegister, $src$$Address);
1110 %}
1111 ins_pipe(pipe_slow);
1112 %}
1114 instruct subD_imm(regD dst, immD con) %{
1115 predicate((UseSSE>=2) && (UseAVX == 0));
1116 match(Set dst (SubD dst con));
1117 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
1118 ins_cost(150);
1119 ins_encode %{
1120 __ subsd($dst$$XMMRegister, $constantaddress($con));
1121 %}
1122 ins_pipe(pipe_slow);
1123 %}
1125 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
1126 predicate(UseAVX > 0);
1127 match(Set dst (SubD src1 src2));
1129 format %{ "vsubsd $dst, $src1, $src2" %}
1130 ins_cost(150);
1131 ins_encode %{
1132 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1133 %}
1134 ins_pipe(pipe_slow);
1135 %}
1137 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
1138 predicate(UseAVX > 0);
1139 match(Set dst (SubD src1 (LoadD src2)));
1141 format %{ "vsubsd $dst, $src1, $src2" %}
1142 ins_cost(150);
1143 ins_encode %{
1144 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1145 %}
1146 ins_pipe(pipe_slow);
1147 %}
1149 instruct subD_reg_imm(regD dst, regD src, immD con) %{
1150 predicate(UseAVX > 0);
1151 match(Set dst (SubD src con));
1153 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
1154 ins_cost(150);
1155 ins_encode %{
1156 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1157 %}
1158 ins_pipe(pipe_slow);
1159 %}
1161 instruct mulF_reg(regF dst, regF src) %{
1162 predicate((UseSSE>=1) && (UseAVX == 0));
1163 match(Set dst (MulF dst src));
1165 format %{ "mulss $dst, $src" %}
1166 ins_cost(150);
1167 ins_encode %{
1168 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
1169 %}
1170 ins_pipe(pipe_slow);
1171 %}
1173 instruct mulF_mem(regF dst, memory src) %{
1174 predicate((UseSSE>=1) && (UseAVX == 0));
1175 match(Set dst (MulF dst (LoadF src)));
1177 format %{ "mulss $dst, $src" %}
1178 ins_cost(150);
1179 ins_encode %{
1180 __ mulss($dst$$XMMRegister, $src$$Address);
1181 %}
1182 ins_pipe(pipe_slow);
1183 %}
1185 instruct mulF_imm(regF dst, immF con) %{
1186 predicate((UseSSE>=1) && (UseAVX == 0));
1187 match(Set dst (MulF dst con));
1188 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
1189 ins_cost(150);
1190 ins_encode %{
1191 __ mulss($dst$$XMMRegister, $constantaddress($con));
1192 %}
1193 ins_pipe(pipe_slow);
1194 %}
1196 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
1197 predicate(UseAVX > 0);
1198 match(Set dst (MulF src1 src2));
1200 format %{ "vmulss $dst, $src1, $src2" %}
1201 ins_cost(150);
1202 ins_encode %{
1203 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1204 %}
1205 ins_pipe(pipe_slow);
1206 %}
1208 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
1209 predicate(UseAVX > 0);
1210 match(Set dst (MulF src1 (LoadF src2)));
1212 format %{ "vmulss $dst, $src1, $src2" %}
1213 ins_cost(150);
1214 ins_encode %{
1215 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1216 %}
1217 ins_pipe(pipe_slow);
1218 %}
1220 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
1221 predicate(UseAVX > 0);
1222 match(Set dst (MulF src con));
1224 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
1225 ins_cost(150);
1226 ins_encode %{
1227 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1228 %}
1229 ins_pipe(pipe_slow);
1230 %}
1232 instruct mulD_reg(regD dst, regD src) %{
1233 predicate((UseSSE>=2) && (UseAVX == 0));
1234 match(Set dst (MulD dst src));
1236 format %{ "mulsd $dst, $src" %}
1237 ins_cost(150);
1238 ins_encode %{
1239 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
1240 %}
1241 ins_pipe(pipe_slow);
1242 %}
1244 instruct mulD_mem(regD dst, memory src) %{
1245 predicate((UseSSE>=2) && (UseAVX == 0));
1246 match(Set dst (MulD dst (LoadD src)));
1248 format %{ "mulsd $dst, $src" %}
1249 ins_cost(150);
1250 ins_encode %{
1251 __ mulsd($dst$$XMMRegister, $src$$Address);
1252 %}
1253 ins_pipe(pipe_slow);
1254 %}
1256 instruct mulD_imm(regD dst, immD con) %{
1257 predicate((UseSSE>=2) && (UseAVX == 0));
1258 match(Set dst (MulD dst con));
1259 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
1260 ins_cost(150);
1261 ins_encode %{
1262 __ mulsd($dst$$XMMRegister, $constantaddress($con));
1263 %}
1264 ins_pipe(pipe_slow);
1265 %}
1267 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
1268 predicate(UseAVX > 0);
1269 match(Set dst (MulD src1 src2));
1271 format %{ "vmulsd $dst, $src1, $src2" %}
1272 ins_cost(150);
1273 ins_encode %{
1274 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1275 %}
1276 ins_pipe(pipe_slow);
1277 %}
1279 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
1280 predicate(UseAVX > 0);
1281 match(Set dst (MulD src1 (LoadD src2)));
1283 format %{ "vmulsd $dst, $src1, $src2" %}
1284 ins_cost(150);
1285 ins_encode %{
1286 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1287 %}
1288 ins_pipe(pipe_slow);
1289 %}
1291 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
1292 predicate(UseAVX > 0);
1293 match(Set dst (MulD src con));
1295 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
1296 ins_cost(150);
1297 ins_encode %{
1298 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1299 %}
1300 ins_pipe(pipe_slow);
1301 %}
1303 instruct divF_reg(regF dst, regF src) %{
1304 predicate((UseSSE>=1) && (UseAVX == 0));
1305 match(Set dst (DivF dst src));
1307 format %{ "divss $dst, $src" %}
1308 ins_cost(150);
1309 ins_encode %{
1310 __ divss($dst$$XMMRegister, $src$$XMMRegister);
1311 %}
1312 ins_pipe(pipe_slow);
1313 %}
1315 instruct divF_mem(regF dst, memory src) %{
1316 predicate((UseSSE>=1) && (UseAVX == 0));
1317 match(Set dst (DivF dst (LoadF src)));
1319 format %{ "divss $dst, $src" %}
1320 ins_cost(150);
1321 ins_encode %{
1322 __ divss($dst$$XMMRegister, $src$$Address);
1323 %}
1324 ins_pipe(pipe_slow);
1325 %}
1327 instruct divF_imm(regF dst, immF con) %{
1328 predicate((UseSSE>=1) && (UseAVX == 0));
1329 match(Set dst (DivF dst con));
1330 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
1331 ins_cost(150);
1332 ins_encode %{
1333 __ divss($dst$$XMMRegister, $constantaddress($con));
1334 %}
1335 ins_pipe(pipe_slow);
1336 %}
1338 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
1339 predicate(UseAVX > 0);
1340 match(Set dst (DivF src1 src2));
1342 format %{ "vdivss $dst, $src1, $src2" %}
1343 ins_cost(150);
1344 ins_encode %{
1345 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1346 %}
1347 ins_pipe(pipe_slow);
1348 %}
1350 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
1351 predicate(UseAVX > 0);
1352 match(Set dst (DivF src1 (LoadF src2)));
1354 format %{ "vdivss $dst, $src1, $src2" %}
1355 ins_cost(150);
1356 ins_encode %{
1357 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1358 %}
1359 ins_pipe(pipe_slow);
1360 %}
1362 instruct divF_reg_imm(regF dst, regF src, immF con) %{
1363 predicate(UseAVX > 0);
1364 match(Set dst (DivF src con));
1366 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
1367 ins_cost(150);
1368 ins_encode %{
1369 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1370 %}
1371 ins_pipe(pipe_slow);
1372 %}
1374 instruct divD_reg(regD dst, regD src) %{
1375 predicate((UseSSE>=2) && (UseAVX == 0));
1376 match(Set dst (DivD dst src));
1378 format %{ "divsd $dst, $src" %}
1379 ins_cost(150);
1380 ins_encode %{
1381 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
1382 %}
1383 ins_pipe(pipe_slow);
1384 %}
1386 instruct divD_mem(regD dst, memory src) %{
1387 predicate((UseSSE>=2) && (UseAVX == 0));
1388 match(Set dst (DivD dst (LoadD src)));
1390 format %{ "divsd $dst, $src" %}
1391 ins_cost(150);
1392 ins_encode %{
1393 __ divsd($dst$$XMMRegister, $src$$Address);
1394 %}
1395 ins_pipe(pipe_slow);
1396 %}
1398 instruct divD_imm(regD dst, immD con) %{
1399 predicate((UseSSE>=2) && (UseAVX == 0));
1400 match(Set dst (DivD dst con));
1401 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
1402 ins_cost(150);
1403 ins_encode %{
1404 __ divsd($dst$$XMMRegister, $constantaddress($con));
1405 %}
1406 ins_pipe(pipe_slow);
1407 %}
1409 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
1410 predicate(UseAVX > 0);
1411 match(Set dst (DivD src1 src2));
1413 format %{ "vdivsd $dst, $src1, $src2" %}
1414 ins_cost(150);
1415 ins_encode %{
1416 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1417 %}
1418 ins_pipe(pipe_slow);
1419 %}
1421 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
1422 predicate(UseAVX > 0);
1423 match(Set dst (DivD src1 (LoadD src2)));
1425 format %{ "vdivsd $dst, $src1, $src2" %}
1426 ins_cost(150);
1427 ins_encode %{
1428 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1429 %}
1430 ins_pipe(pipe_slow);
1431 %}
1433 instruct divD_reg_imm(regD dst, regD src, immD con) %{
1434 predicate(UseAVX > 0);
1435 match(Set dst (DivD src con));
1437 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
1438 ins_cost(150);
1439 ins_encode %{
1440 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1441 %}
1442 ins_pipe(pipe_slow);
1443 %}
1445 instruct absF_reg(regF dst) %{
1446 predicate((UseSSE>=1) && (UseAVX == 0));
1447 match(Set dst (AbsF dst));
1448 ins_cost(150);
1449 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
1450 ins_encode %{
1451 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
1452 %}
1453 ins_pipe(pipe_slow);
1454 %}
1456 instruct absF_reg_reg(regF dst, regF src) %{
1457 predicate(UseAVX > 0);
1458 match(Set dst (AbsF src));
1459 ins_cost(150);
1460 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
1461 ins_encode %{
1462 bool vector256 = false;
1463 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
1464 ExternalAddress(float_signmask()), vector256);
1465 %}
1466 ins_pipe(pipe_slow);
1467 %}
1469 instruct absD_reg(regD dst) %{
1470 predicate((UseSSE>=2) && (UseAVX == 0));
1471 match(Set dst (AbsD dst));
1472 ins_cost(150);
1473 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
1474 "# abs double by sign masking" %}
1475 ins_encode %{
1476 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
1477 %}
1478 ins_pipe(pipe_slow);
1479 %}
1481 instruct absD_reg_reg(regD dst, regD src) %{
1482 predicate(UseAVX > 0);
1483 match(Set dst (AbsD src));
1484 ins_cost(150);
1485 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
1486 "# abs double by sign masking" %}
1487 ins_encode %{
1488 bool vector256 = false;
1489 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
1490 ExternalAddress(double_signmask()), vector256);
1491 %}
1492 ins_pipe(pipe_slow);
1493 %}
1495 instruct negF_reg(regF dst) %{
1496 predicate((UseSSE>=1) && (UseAVX == 0));
1497 match(Set dst (NegF dst));
1498 ins_cost(150);
1499 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
1500 ins_encode %{
1501 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
1502 %}
1503 ins_pipe(pipe_slow);
1504 %}
1506 instruct negF_reg_reg(regF dst, regF src) %{
1507 predicate(UseAVX > 0);
1508 match(Set dst (NegF src));
1509 ins_cost(150);
1510 format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
1511 ins_encode %{
1512 bool vector256 = false;
1513 __ vxorps($dst$$XMMRegister, $src$$XMMRegister,
1514 ExternalAddress(float_signflip()), vector256);
1515 %}
1516 ins_pipe(pipe_slow);
1517 %}
1519 instruct negD_reg(regD dst) %{
1520 predicate((UseSSE>=2) && (UseAVX == 0));
1521 match(Set dst (NegD dst));
1522 ins_cost(150);
1523 format %{ "xorpd $dst, [0x8000000000000000]\t"
1524 "# neg double by sign flipping" %}
1525 ins_encode %{
1526 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
1527 %}
1528 ins_pipe(pipe_slow);
1529 %}
1531 instruct negD_reg_reg(regD dst, regD src) %{
1532 predicate(UseAVX > 0);
1533 match(Set dst (NegD src));
1534 ins_cost(150);
1535 format %{ "vxorpd $dst, $src, [0x8000000000000000]\t"
1536 "# neg double by sign flipping" %}
1537 ins_encode %{
1538 bool vector256 = false;
1539 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister,
1540 ExternalAddress(double_signflip()), vector256);
1541 %}
1542 ins_pipe(pipe_slow);
1543 %}
1545 instruct sqrtF_reg(regF dst, regF src) %{
1546 predicate(UseSSE>=1);
1547 match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
1549 format %{ "sqrtss $dst, $src" %}
1550 ins_cost(150);
1551 ins_encode %{
1552 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
1553 %}
1554 ins_pipe(pipe_slow);
1555 %}
1557 instruct sqrtF_mem(regF dst, memory src) %{
1558 predicate(UseSSE>=1);
1559 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
1561 format %{ "sqrtss $dst, $src" %}
1562 ins_cost(150);
1563 ins_encode %{
1564 __ sqrtss($dst$$XMMRegister, $src$$Address);
1565 %}
1566 ins_pipe(pipe_slow);
1567 %}
1569 instruct sqrtF_imm(regF dst, immF con) %{
1570 predicate(UseSSE>=1);
1571 match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
1572 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
1573 ins_cost(150);
1574 ins_encode %{
1575 __ sqrtss($dst$$XMMRegister, $constantaddress($con));
1576 %}
1577 ins_pipe(pipe_slow);
1578 %}
1580 instruct sqrtD_reg(regD dst, regD src) %{
1581 predicate(UseSSE>=2);
1582 match(Set dst (SqrtD src));
1584 format %{ "sqrtsd $dst, $src" %}
1585 ins_cost(150);
1586 ins_encode %{
1587 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
1588 %}
1589 ins_pipe(pipe_slow);
1590 %}
1592 instruct sqrtD_mem(regD dst, memory src) %{
1593 predicate(UseSSE>=2);
1594 match(Set dst (SqrtD (LoadD src)));
1596 format %{ "sqrtsd $dst, $src" %}
1597 ins_cost(150);
1598 ins_encode %{
1599 __ sqrtsd($dst$$XMMRegister, $src$$Address);
1600 %}
1601 ins_pipe(pipe_slow);
1602 %}
1604 instruct sqrtD_imm(regD dst, immD con) %{
1605 predicate(UseSSE>=2);
1606 match(Set dst (SqrtD con));
1607 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
1608 ins_cost(150);
1609 ins_encode %{
1610 __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
1611 %}
1612 ins_pipe(pipe_slow);
1613 %}
1616 // ====================VECTOR INSTRUCTIONS=====================================
1618 // Load vectors (4 bytes long)
1619 instruct loadV4(vecS dst, memory mem) %{
1620 predicate(n->as_LoadVector()->memory_size() == 4);
1621 match(Set dst (LoadVector mem));
1622 ins_cost(125);
1623 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %}
1624 ins_encode %{
1625 __ movdl($dst$$XMMRegister, $mem$$Address);
1626 %}
1627 ins_pipe( pipe_slow );
1628 %}
1630 // Load vectors (8 bytes long)
1631 instruct loadV8(vecD dst, memory mem) %{
1632 predicate(n->as_LoadVector()->memory_size() == 8);
1633 match(Set dst (LoadVector mem));
1634 ins_cost(125);
1635 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %}
1636 ins_encode %{
1637 __ movq($dst$$XMMRegister, $mem$$Address);
1638 %}
1639 ins_pipe( pipe_slow );
1640 %}
1642 // Load vectors (16 bytes long)
1643 instruct loadV16(vecX dst, memory mem) %{
1644 predicate(n->as_LoadVector()->memory_size() == 16);
1645 match(Set dst (LoadVector mem));
1646 ins_cost(125);
1647 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %}
1648 ins_encode %{
1649 __ movdqu($dst$$XMMRegister, $mem$$Address);
1650 %}
1651 ins_pipe( pipe_slow );
1652 %}
1654 // Load vectors (32 bytes long)
1655 instruct loadV32(vecY dst, memory mem) %{
1656 predicate(n->as_LoadVector()->memory_size() == 32);
1657 match(Set dst (LoadVector mem));
1658 ins_cost(125);
1659 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %}
1660 ins_encode %{
1661 __ vmovdqu($dst$$XMMRegister, $mem$$Address);
1662 %}
1663 ins_pipe( pipe_slow );
1664 %}
1666 // Store vectors
1667 instruct storeV4(memory mem, vecS src) %{
1668 predicate(n->as_StoreVector()->memory_size() == 4);
1669 match(Set mem (StoreVector mem src));
1670 ins_cost(145);
1671 format %{ "movd $mem,$src\t! store vector (4 bytes)" %}
1672 ins_encode %{
1673 __ movdl($mem$$Address, $src$$XMMRegister);
1674 %}
1675 ins_pipe( pipe_slow );
1676 %}
1678 instruct storeV8(memory mem, vecD src) %{
1679 predicate(n->as_StoreVector()->memory_size() == 8);
1680 match(Set mem (StoreVector mem src));
1681 ins_cost(145);
1682 format %{ "movq $mem,$src\t! store vector (8 bytes)" %}
1683 ins_encode %{
1684 __ movq($mem$$Address, $src$$XMMRegister);
1685 %}
1686 ins_pipe( pipe_slow );
1687 %}
1689 instruct storeV16(memory mem, vecX src) %{
1690 predicate(n->as_StoreVector()->memory_size() == 16);
1691 match(Set mem (StoreVector mem src));
1692 ins_cost(145);
1693 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %}
1694 ins_encode %{
1695 __ movdqu($mem$$Address, $src$$XMMRegister);
1696 %}
1697 ins_pipe( pipe_slow );
1698 %}
1700 instruct storeV32(memory mem, vecY src) %{
1701 predicate(n->as_StoreVector()->memory_size() == 32);
1702 match(Set mem (StoreVector mem src));
1703 ins_cost(145);
1704 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %}
1705 ins_encode %{
1706 __ vmovdqu($mem$$Address, $src$$XMMRegister);
1707 %}
1708 ins_pipe( pipe_slow );
1709 %}
1711 // Replicate byte scalar to be vector
1712 instruct Repl4B(vecS dst, rRegI src) %{
1713 predicate(n->as_Vector()->length() == 4);
1714 match(Set dst (ReplicateB src));
1715 format %{ "movd $dst,$src\n\t"
1716 "punpcklbw $dst,$dst\n\t"
1717 "pshuflw $dst,$dst,0x00\t! replicate4B" %}
1718 ins_encode %{
1719 __ movdl($dst$$XMMRegister, $src$$Register);
1720 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
1721 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1722 %}
1723 ins_pipe( pipe_slow );
1724 %}
1726 instruct Repl8B(vecD dst, rRegI src) %{
1727 predicate(n->as_Vector()->length() == 8);
1728 match(Set dst (ReplicateB src));
1729 format %{ "movd $dst,$src\n\t"
1730 "punpcklbw $dst,$dst\n\t"
1731 "pshuflw $dst,$dst,0x00\t! replicate8B" %}
1732 ins_encode %{
1733 __ movdl($dst$$XMMRegister, $src$$Register);
1734 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
1735 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1736 %}
1737 ins_pipe( pipe_slow );
1738 %}
1740 instruct Repl16B(vecX dst, rRegI src) %{
1741 predicate(n->as_Vector()->length() == 16);
1742 match(Set dst (ReplicateB src));
1743 format %{ "movd $dst,$src\n\t"
1744 "punpcklbw $dst,$dst\n\t"
1745 "pshuflw $dst,$dst,0x00\n\t"
1746 "punpcklqdq $dst,$dst\t! replicate16B" %}
1747 ins_encode %{
1748 __ movdl($dst$$XMMRegister, $src$$Register);
1749 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
1750 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1751 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1752 %}
1753 ins_pipe( pipe_slow );
1754 %}
1756 instruct Repl32B(vecY dst, rRegI src) %{
1757 predicate(n->as_Vector()->length() == 32);
1758 match(Set dst (ReplicateB src));
1759 format %{ "movd $dst,$src\n\t"
1760 "punpcklbw $dst,$dst\n\t"
1761 "pshuflw $dst,$dst,0x00\n\t"
1762 "punpcklqdq $dst,$dst\n\t"
1763 "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
1764 ins_encode %{
1765 __ movdl($dst$$XMMRegister, $src$$Register);
1766 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
1767 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1768 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1769 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
1770 %}
1771 ins_pipe( pipe_slow );
1772 %}
1774 // Replicate byte scalar immediate to be vector by loading from const table.
1775 instruct Repl4B_imm(vecS dst, immI con) %{
1776 predicate(n->as_Vector()->length() == 4);
1777 match(Set dst (ReplicateB con));
1778 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %}
1779 ins_encode %{
1780 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1)));
1781 %}
1782 ins_pipe( pipe_slow );
1783 %}
1785 instruct Repl8B_imm(vecD dst, immI con) %{
1786 predicate(n->as_Vector()->length() == 8);
1787 match(Set dst (ReplicateB con));
1788 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %}
1789 ins_encode %{
1790 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
1791 %}
1792 ins_pipe( pipe_slow );
1793 %}
1795 instruct Repl16B_imm(vecX dst, immI con) %{
1796 predicate(n->as_Vector()->length() == 16);
1797 match(Set dst (ReplicateB con));
1798 format %{ "movq $dst,[$constantaddress]\n\t"
1799 "punpcklqdq $dst,$dst\t! replicate16B($con)" %}
1800 ins_encode %{
1801 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
1802 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1803 %}
1804 ins_pipe( pipe_slow );
1805 %}
1807 instruct Repl32B_imm(vecY dst, immI con) %{
1808 predicate(n->as_Vector()->length() == 32);
1809 match(Set dst (ReplicateB con));
1810 format %{ "movq $dst,[$constantaddress]\n\t"
1811 "punpcklqdq $dst,$dst\n\t"
1812 "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %}
1813 ins_encode %{
1814 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
1815 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1816 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
1817 %}
1818 ins_pipe( pipe_slow );
1819 %}
1821 // Replicate byte scalar zero to be vector
1822 instruct Repl4B_zero(vecS dst, immI0 zero) %{
1823 predicate(n->as_Vector()->length() == 4);
1824 match(Set dst (ReplicateB zero));
1825 format %{ "pxor $dst,$dst\t! replicate4B zero" %}
1826 ins_encode %{
1827 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1828 %}
1829 ins_pipe( fpu_reg_reg );
1830 %}
1832 instruct Repl8B_zero(vecD dst, immI0 zero) %{
1833 predicate(n->as_Vector()->length() == 8);
1834 match(Set dst (ReplicateB zero));
1835 format %{ "pxor $dst,$dst\t! replicate8B zero" %}
1836 ins_encode %{
1837 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1838 %}
1839 ins_pipe( fpu_reg_reg );
1840 %}
1842 instruct Repl16B_zero(vecX dst, immI0 zero) %{
1843 predicate(n->as_Vector()->length() == 16);
1844 match(Set dst (ReplicateB zero));
1845 format %{ "pxor $dst,$dst\t! replicate16B zero" %}
1846 ins_encode %{
1847 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1848 %}
1849 ins_pipe( fpu_reg_reg );
1850 %}
1852 instruct Repl32B_zero(vecY dst, immI0 zero) %{
1853 predicate(n->as_Vector()->length() == 32);
1854 match(Set dst (ReplicateB zero));
1855 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %}
1856 ins_encode %{
1857 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
1858 bool vector256 = true;
1859 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
1860 %}
1861 ins_pipe( fpu_reg_reg );
1862 %}
1864 // Replicate char/short (2 byte) scalar to be vector
1865 instruct Repl2S(vecS dst, rRegI src) %{
1866 predicate(n->as_Vector()->length() == 2);
1867 match(Set dst (ReplicateS src));
1868 format %{ "movd $dst,$src\n\t"
1869 "pshuflw $dst,$dst,0x00\t! replicate2S" %}
1870 ins_encode %{
1871 __ movdl($dst$$XMMRegister, $src$$Register);
1872 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1873 %}
1874 ins_pipe( fpu_reg_reg );
1875 %}
1877 instruct Repl4S(vecD dst, rRegI src) %{
1878 predicate(n->as_Vector()->length() == 4);
1879 match(Set dst (ReplicateS src));
1880 format %{ "movd $dst,$src\n\t"
1881 "pshuflw $dst,$dst,0x00\t! replicate4S" %}
1882 ins_encode %{
1883 __ movdl($dst$$XMMRegister, $src$$Register);
1884 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1885 %}
1886 ins_pipe( fpu_reg_reg );
1887 %}
1889 instruct Repl8S(vecX dst, rRegI src) %{
1890 predicate(n->as_Vector()->length() == 8);
1891 match(Set dst (ReplicateS src));
1892 format %{ "movd $dst,$src\n\t"
1893 "pshuflw $dst,$dst,0x00\n\t"
1894 "punpcklqdq $dst,$dst\t! replicate8S" %}
1895 ins_encode %{
1896 __ movdl($dst$$XMMRegister, $src$$Register);
1897 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1898 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1899 %}
1900 ins_pipe( pipe_slow );
1901 %}
1903 instruct Repl16S(vecY dst, rRegI src) %{
1904 predicate(n->as_Vector()->length() == 16);
1905 match(Set dst (ReplicateS src));
1906 format %{ "movd $dst,$src\n\t"
1907 "pshuflw $dst,$dst,0x00\n\t"
1908 "punpcklqdq $dst,$dst\n\t"
1909 "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
1910 ins_encode %{
1911 __ movdl($dst$$XMMRegister, $src$$Register);
1912 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1913 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1914 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
1915 %}
1916 ins_pipe( pipe_slow );
1917 %}
1919 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table.
1920 instruct Repl2S_imm(vecS dst, immI con) %{
1921 predicate(n->as_Vector()->length() == 2);
1922 match(Set dst (ReplicateS con));
1923 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %}
1924 ins_encode %{
1925 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2)));
1926 %}
1927 ins_pipe( fpu_reg_reg );
1928 %}
1930 instruct Repl4S_imm(vecD dst, immI con) %{
1931 predicate(n->as_Vector()->length() == 4);
1932 match(Set dst (ReplicateS con));
1933 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %}
1934 ins_encode %{
1935 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
1936 %}
1937 ins_pipe( fpu_reg_reg );
1938 %}
1940 instruct Repl8S_imm(vecX dst, immI con) %{
1941 predicate(n->as_Vector()->length() == 8);
1942 match(Set dst (ReplicateS con));
1943 format %{ "movq $dst,[$constantaddress]\n\t"
1944 "punpcklqdq $dst,$dst\t! replicate8S($con)" %}
1945 ins_encode %{
1946 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
1947 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1948 %}
1949 ins_pipe( pipe_slow );
1950 %}
1952 instruct Repl16S_imm(vecY dst, immI con) %{
1953 predicate(n->as_Vector()->length() == 16);
1954 match(Set dst (ReplicateS con));
1955 format %{ "movq $dst,[$constantaddress]\n\t"
1956 "punpcklqdq $dst,$dst\n\t"
1957 "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %}
1958 ins_encode %{
1959 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
1960 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1961 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
1962 %}
1963 ins_pipe( pipe_slow );
1964 %}
1966 // Replicate char/short (2 byte) scalar zero to be vector
1967 instruct Repl2S_zero(vecS dst, immI0 zero) %{
1968 predicate(n->as_Vector()->length() == 2);
1969 match(Set dst (ReplicateS zero));
1970 format %{ "pxor $dst,$dst\t! replicate2S zero" %}
1971 ins_encode %{
1972 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1973 %}
1974 ins_pipe( fpu_reg_reg );
1975 %}
1977 instruct Repl4S_zero(vecD dst, immI0 zero) %{
1978 predicate(n->as_Vector()->length() == 4);
1979 match(Set dst (ReplicateS zero));
1980 format %{ "pxor $dst,$dst\t! replicate4S zero" %}
1981 ins_encode %{
1982 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1983 %}
1984 ins_pipe( fpu_reg_reg );
1985 %}
1987 instruct Repl8S_zero(vecX dst, immI0 zero) %{
1988 predicate(n->as_Vector()->length() == 8);
1989 match(Set dst (ReplicateS zero));
1990 format %{ "pxor $dst,$dst\t! replicate8S zero" %}
1991 ins_encode %{
1992 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1993 %}
1994 ins_pipe( fpu_reg_reg );
1995 %}
1997 instruct Repl16S_zero(vecY dst, immI0 zero) %{
1998 predicate(n->as_Vector()->length() == 16);
1999 match(Set dst (ReplicateS zero));
2000 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %}
2001 ins_encode %{
2002 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
2003 bool vector256 = true;
2004 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2005 %}
2006 ins_pipe( fpu_reg_reg );
2007 %}
2009 // Replicate integer (4 byte) scalar to be vector
2010 instruct Repl2I(vecD dst, rRegI src) %{
2011 predicate(n->as_Vector()->length() == 2);
2012 match(Set dst (ReplicateI src));
2013 format %{ "movd $dst,$src\n\t"
2014 "pshufd $dst,$dst,0x00\t! replicate2I" %}
2015 ins_encode %{
2016 __ movdl($dst$$XMMRegister, $src$$Register);
2017 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2018 %}
2019 ins_pipe( fpu_reg_reg );
2020 %}
2022 instruct Repl4I(vecX dst, rRegI src) %{
2023 predicate(n->as_Vector()->length() == 4);
2024 match(Set dst (ReplicateI src));
2025 format %{ "movd $dst,$src\n\t"
2026 "pshufd $dst,$dst,0x00\t! replicate4I" %}
2027 ins_encode %{
2028 __ movdl($dst$$XMMRegister, $src$$Register);
2029 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2030 %}
2031 ins_pipe( pipe_slow );
2032 %}
2034 instruct Repl8I(vecY dst, rRegI src) %{
2035 predicate(n->as_Vector()->length() == 8);
2036 match(Set dst (ReplicateI src));
2037 format %{ "movd $dst,$src\n\t"
2038 "pshufd $dst,$dst,0x00\n\t"
2039 "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
2040 ins_encode %{
2041 __ movdl($dst$$XMMRegister, $src$$Register);
2042 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2043 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2044 %}
2045 ins_pipe( pipe_slow );
2046 %}
2048 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
2049 instruct Repl2I_imm(vecD dst, immI con) %{
2050 predicate(n->as_Vector()->length() == 2);
2051 match(Set dst (ReplicateI con));
2052 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %}
2053 ins_encode %{
2054 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
2055 %}
2056 ins_pipe( fpu_reg_reg );
2057 %}
2059 instruct Repl4I_imm(vecX dst, immI con) %{
2060 predicate(n->as_Vector()->length() == 4);
2061 match(Set dst (ReplicateI con));
2062 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t"
2063 "punpcklqdq $dst,$dst" %}
2064 ins_encode %{
2065 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
2066 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2067 %}
2068 ins_pipe( pipe_slow );
2069 %}
2071 instruct Repl8I_imm(vecY dst, immI con) %{
2072 predicate(n->as_Vector()->length() == 8);
2073 match(Set dst (ReplicateI con));
2074 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t"
2075 "punpcklqdq $dst,$dst\n\t"
2076 "vinserti128h $dst,$dst,$dst" %}
2077 ins_encode %{
2078 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
2079 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2080 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2081 %}
2082 ins_pipe( pipe_slow );
2083 %}
2085 // Integer could be loaded into xmm register directly from memory.
2086 instruct Repl2I_mem(vecD dst, memory mem) %{
2087 predicate(n->as_Vector()->length() == 2);
2088 match(Set dst (ReplicateI (LoadI mem)));
2089 format %{ "movd $dst,$mem\n\t"
2090 "pshufd $dst,$dst,0x00\t! replicate2I" %}
2091 ins_encode %{
2092 __ movdl($dst$$XMMRegister, $mem$$Address);
2093 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2094 %}
2095 ins_pipe( fpu_reg_reg );
2096 %}
2098 instruct Repl4I_mem(vecX dst, memory mem) %{
2099 predicate(n->as_Vector()->length() == 4);
2100 match(Set dst (ReplicateI (LoadI mem)));
2101 format %{ "movd $dst,$mem\n\t"
2102 "pshufd $dst,$dst,0x00\t! replicate4I" %}
2103 ins_encode %{
2104 __ movdl($dst$$XMMRegister, $mem$$Address);
2105 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2106 %}
2107 ins_pipe( pipe_slow );
2108 %}
2110 instruct Repl8I_mem(vecY dst, memory mem) %{
2111 predicate(n->as_Vector()->length() == 8);
2112 match(Set dst (ReplicateI (LoadI mem)));
2113 format %{ "movd $dst,$mem\n\t"
2114 "pshufd $dst,$dst,0x00\n\t"
2115 "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
2116 ins_encode %{
2117 __ movdl($dst$$XMMRegister, $mem$$Address);
2118 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2119 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2120 %}
2121 ins_pipe( pipe_slow );
2122 %}
2124 // Replicate integer (4 byte) scalar zero to be vector
2125 instruct Repl2I_zero(vecD dst, immI0 zero) %{
2126 predicate(n->as_Vector()->length() == 2);
2127 match(Set dst (ReplicateI zero));
2128 format %{ "pxor $dst,$dst\t! replicate2I" %}
2129 ins_encode %{
2130 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2131 %}
2132 ins_pipe( fpu_reg_reg );
2133 %}
2135 instruct Repl4I_zero(vecX dst, immI0 zero) %{
2136 predicate(n->as_Vector()->length() == 4);
2137 match(Set dst (ReplicateI zero));
2138 format %{ "pxor $dst,$dst\t! replicate4I zero)" %}
2139 ins_encode %{
2140 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2141 %}
2142 ins_pipe( fpu_reg_reg );
2143 %}
2145 instruct Repl8I_zero(vecY dst, immI0 zero) %{
2146 predicate(n->as_Vector()->length() == 8);
2147 match(Set dst (ReplicateI zero));
2148 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %}
2149 ins_encode %{
2150 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
2151 bool vector256 = true;
2152 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2153 %}
2154 ins_pipe( fpu_reg_reg );
2155 %}
2157 // Replicate long (8 byte) scalar to be vector
2158 #ifdef _LP64
2159 instruct Repl2L(vecX dst, rRegL src) %{
2160 predicate(n->as_Vector()->length() == 2);
2161 match(Set dst (ReplicateL src));
2162 format %{ "movdq $dst,$src\n\t"
2163 "punpcklqdq $dst,$dst\t! replicate2L" %}
2164 ins_encode %{
2165 __ movdq($dst$$XMMRegister, $src$$Register);
2166 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2167 %}
2168 ins_pipe( pipe_slow );
2169 %}
2171 instruct Repl4L(vecY dst, rRegL src) %{
2172 predicate(n->as_Vector()->length() == 4);
2173 match(Set dst (ReplicateL src));
2174 format %{ "movdq $dst,$src\n\t"
2175 "punpcklqdq $dst,$dst\n\t"
2176 "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
2177 ins_encode %{
2178 __ movdq($dst$$XMMRegister, $src$$Register);
2179 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2180 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2181 %}
2182 ins_pipe( pipe_slow );
2183 %}
2184 #else // _LP64
2185 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{
2186 predicate(n->as_Vector()->length() == 2);
2187 match(Set dst (ReplicateL src));
2188 effect(TEMP dst, USE src, TEMP tmp);
2189 format %{ "movdl $dst,$src.lo\n\t"
2190 "movdl $tmp,$src.hi\n\t"
2191 "punpckldq $dst,$tmp\n\t"
2192 "punpcklqdq $dst,$dst\t! replicate2L"%}
2193 ins_encode %{
2194 __ movdl($dst$$XMMRegister, $src$$Register);
2195 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
2196 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
2197 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2198 %}
2199 ins_pipe( pipe_slow );
2200 %}
2202 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
2203 predicate(n->as_Vector()->length() == 4);
2204 match(Set dst (ReplicateL src));
2205 effect(TEMP dst, USE src, TEMP tmp);
2206 format %{ "movdl $dst,$src.lo\n\t"
2207 "movdl $tmp,$src.hi\n\t"
2208 "punpckldq $dst,$tmp\n\t"
2209 "punpcklqdq $dst,$dst\n\t"
2210 "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
2211 ins_encode %{
2212 __ movdl($dst$$XMMRegister, $src$$Register);
2213 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
2214 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
2215 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2216 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2217 %}
2218 ins_pipe( pipe_slow );
2219 %}
2220 #endif // _LP64
2222 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
2223 instruct Repl2L_imm(vecX dst, immL con) %{
2224 predicate(n->as_Vector()->length() == 2);
2225 match(Set dst (ReplicateL con));
2226 format %{ "movq $dst,[$constantaddress]\n\t"
2227 "punpcklqdq $dst,$dst\t! replicate2L($con)" %}
2228 ins_encode %{
2229 __ movq($dst$$XMMRegister, $constantaddress($con));
2230 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2231 %}
2232 ins_pipe( pipe_slow );
2233 %}
2235 instruct Repl4L_imm(vecY dst, immL con) %{
2236 predicate(n->as_Vector()->length() == 4);
2237 match(Set dst (ReplicateL con));
2238 format %{ "movq $dst,[$constantaddress]\n\t"
2239 "punpcklqdq $dst,$dst\n\t"
2240 "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %}
2241 ins_encode %{
2242 __ movq($dst$$XMMRegister, $constantaddress($con));
2243 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2244 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2245 %}
2246 ins_pipe( pipe_slow );
2247 %}
2249 // Long could be loaded into xmm register directly from memory.
2250 instruct Repl2L_mem(vecX dst, memory mem) %{
2251 predicate(n->as_Vector()->length() == 2);
2252 match(Set dst (ReplicateL (LoadL mem)));
2253 format %{ "movq $dst,$mem\n\t"
2254 "punpcklqdq $dst,$dst\t! replicate2L" %}
2255 ins_encode %{
2256 __ movq($dst$$XMMRegister, $mem$$Address);
2257 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2258 %}
2259 ins_pipe( pipe_slow );
2260 %}
2262 instruct Repl4L_mem(vecY dst, memory mem) %{
2263 predicate(n->as_Vector()->length() == 4);
2264 match(Set dst (ReplicateL (LoadL mem)));
2265 format %{ "movq $dst,$mem\n\t"
2266 "punpcklqdq $dst,$dst\n\t"
2267 "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
2268 ins_encode %{
2269 __ movq($dst$$XMMRegister, $mem$$Address);
2270 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2271 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2272 %}
2273 ins_pipe( pipe_slow );
2274 %}
2276 // Replicate long (8 byte) scalar zero to be vector
2277 instruct Repl2L_zero(vecX dst, immL0 zero) %{
2278 predicate(n->as_Vector()->length() == 2);
2279 match(Set dst (ReplicateL zero));
2280 format %{ "pxor $dst,$dst\t! replicate2L zero" %}
2281 ins_encode %{
2282 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2283 %}
2284 ins_pipe( fpu_reg_reg );
2285 %}
2287 instruct Repl4L_zero(vecY dst, immL0 zero) %{
2288 predicate(n->as_Vector()->length() == 4);
2289 match(Set dst (ReplicateL zero));
2290 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %}
2291 ins_encode %{
2292 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
2293 bool vector256 = true;
2294 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2295 %}
2296 ins_pipe( fpu_reg_reg );
2297 %}
2299 // Replicate float (4 byte) scalar to be vector
2300 instruct Repl2F(vecD dst, regF src) %{
2301 predicate(n->as_Vector()->length() == 2);
2302 match(Set dst (ReplicateF src));
2303 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %}
2304 ins_encode %{
2305 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
2306 %}
2307 ins_pipe( fpu_reg_reg );
2308 %}
2310 instruct Repl4F(vecX dst, regF src) %{
2311 predicate(n->as_Vector()->length() == 4);
2312 match(Set dst (ReplicateF src));
2313 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %}
2314 ins_encode %{
2315 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
2316 %}
2317 ins_pipe( pipe_slow );
2318 %}
2320 instruct Repl8F(vecY dst, regF src) %{
2321 predicate(n->as_Vector()->length() == 8);
2322 match(Set dst (ReplicateF src));
2323 format %{ "pshufd $dst,$src,0x00\n\t"
2324 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
2325 ins_encode %{
2326 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
2327 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2328 %}
2329 ins_pipe( pipe_slow );
2330 %}
2332 // Replicate float (4 byte) scalar zero to be vector
2333 instruct Repl2F_zero(vecD dst, immF0 zero) %{
2334 predicate(n->as_Vector()->length() == 2);
2335 match(Set dst (ReplicateF zero));
2336 format %{ "xorps $dst,$dst\t! replicate2F zero" %}
2337 ins_encode %{
2338 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
2339 %}
2340 ins_pipe( fpu_reg_reg );
2341 %}
2343 instruct Repl4F_zero(vecX dst, immF0 zero) %{
2344 predicate(n->as_Vector()->length() == 4);
2345 match(Set dst (ReplicateF zero));
2346 format %{ "xorps $dst,$dst\t! replicate4F zero" %}
2347 ins_encode %{
2348 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
2349 %}
2350 ins_pipe( fpu_reg_reg );
2351 %}
2353 instruct Repl8F_zero(vecY dst, immF0 zero) %{
2354 predicate(n->as_Vector()->length() == 8);
2355 match(Set dst (ReplicateF zero));
2356 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %}
2357 ins_encode %{
2358 bool vector256 = true;
2359 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2360 %}
2361 ins_pipe( fpu_reg_reg );
2362 %}
2364 // Replicate double (8 bytes) scalar to be vector
2365 instruct Repl2D(vecX dst, regD src) %{
2366 predicate(n->as_Vector()->length() == 2);
2367 match(Set dst (ReplicateD src));
2368 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %}
2369 ins_encode %{
2370 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
2371 %}
2372 ins_pipe( pipe_slow );
2373 %}
2375 instruct Repl4D(vecY dst, regD src) %{
2376 predicate(n->as_Vector()->length() == 4);
2377 match(Set dst (ReplicateD src));
2378 format %{ "pshufd $dst,$src,0x44\n\t"
2379 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
2380 ins_encode %{
2381 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
2382 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2383 %}
2384 ins_pipe( pipe_slow );
2385 %}
2387 // Replicate double (8 byte) scalar zero to be vector
2388 instruct Repl2D_zero(vecX dst, immD0 zero) %{
2389 predicate(n->as_Vector()->length() == 2);
2390 match(Set dst (ReplicateD zero));
2391 format %{ "xorpd $dst,$dst\t! replicate2D zero" %}
2392 ins_encode %{
2393 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
2394 %}
2395 ins_pipe( fpu_reg_reg );
2396 %}
2398 instruct Repl4D_zero(vecY dst, immD0 zero) %{
2399 predicate(n->as_Vector()->length() == 4);
2400 match(Set dst (ReplicateD zero));
2401 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %}
2402 ins_encode %{
2403 bool vector256 = true;
2404 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2405 %}
2406 ins_pipe( fpu_reg_reg );
2407 %}
2409 // ====================VECTOR ARITHMETIC=======================================
2411 // --------------------------------- ADD --------------------------------------
2413 // Bytes vector add
2414 instruct vadd4B(vecS dst, vecS src) %{
2415 predicate(n->as_Vector()->length() == 4);
2416 match(Set dst (AddVB dst src));
2417 format %{ "paddb $dst,$src\t! add packed4B" %}
2418 ins_encode %{
2419 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
2420 %}
2421 ins_pipe( pipe_slow );
2422 %}
2424 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{
2425 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2426 match(Set dst (AddVB src1 src2));
2427 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %}
2428 ins_encode %{
2429 bool vector256 = false;
2430 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2431 %}
2432 ins_pipe( pipe_slow );
2433 %}
2435 instruct vadd8B(vecD dst, vecD src) %{
2436 predicate(n->as_Vector()->length() == 8);
2437 match(Set dst (AddVB dst src));
2438 format %{ "paddb $dst,$src\t! add packed8B" %}
2439 ins_encode %{
2440 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
2441 %}
2442 ins_pipe( pipe_slow );
2443 %}
2445 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
2446 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2447 match(Set dst (AddVB src1 src2));
2448 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %}
2449 ins_encode %{
2450 bool vector256 = false;
2451 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2452 %}
2453 ins_pipe( pipe_slow );
2454 %}
2456 instruct vadd16B(vecX dst, vecX src) %{
2457 predicate(n->as_Vector()->length() == 16);
2458 match(Set dst (AddVB dst src));
2459 format %{ "paddb $dst,$src\t! add packed16B" %}
2460 ins_encode %{
2461 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
2462 %}
2463 ins_pipe( pipe_slow );
2464 %}
2466 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
2467 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
2468 match(Set dst (AddVB src1 src2));
2469 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %}
2470 ins_encode %{
2471 bool vector256 = false;
2472 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2473 %}
2474 ins_pipe( pipe_slow );
2475 %}
2477 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{
2478 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
2479 match(Set dst (AddVB src (LoadVector mem)));
2480 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %}
2481 ins_encode %{
2482 bool vector256 = false;
2483 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2484 %}
2485 ins_pipe( pipe_slow );
2486 %}
2488 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{
2489 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
2490 match(Set dst (AddVB src1 src2));
2491 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %}
2492 ins_encode %{
2493 bool vector256 = true;
2494 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2495 %}
2496 ins_pipe( pipe_slow );
2497 %}
2499 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{
2500 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
2501 match(Set dst (AddVB src (LoadVector mem)));
2502 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %}
2503 ins_encode %{
2504 bool vector256 = true;
2505 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2506 %}
2507 ins_pipe( pipe_slow );
2508 %}
2510 // Shorts/Chars vector add
2511 instruct vadd2S(vecS dst, vecS src) %{
2512 predicate(n->as_Vector()->length() == 2);
2513 match(Set dst (AddVS dst src));
2514 format %{ "paddw $dst,$src\t! add packed2S" %}
2515 ins_encode %{
2516 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
2517 %}
2518 ins_pipe( pipe_slow );
2519 %}
2521 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{
2522 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2523 match(Set dst (AddVS src1 src2));
2524 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %}
2525 ins_encode %{
2526 bool vector256 = false;
2527 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2528 %}
2529 ins_pipe( pipe_slow );
2530 %}
2532 instruct vadd4S(vecD dst, vecD src) %{
2533 predicate(n->as_Vector()->length() == 4);
2534 match(Set dst (AddVS dst src));
2535 format %{ "paddw $dst,$src\t! add packed4S" %}
2536 ins_encode %{
2537 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
2538 %}
2539 ins_pipe( pipe_slow );
2540 %}
2542 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
2543 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2544 match(Set dst (AddVS src1 src2));
2545 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %}
2546 ins_encode %{
2547 bool vector256 = false;
2548 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2549 %}
2550 ins_pipe( pipe_slow );
2551 %}
2553 instruct vadd8S(vecX dst, vecX src) %{
2554 predicate(n->as_Vector()->length() == 8);
2555 match(Set dst (AddVS dst src));
2556 format %{ "paddw $dst,$src\t! add packed8S" %}
2557 ins_encode %{
2558 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
2559 %}
2560 ins_pipe( pipe_slow );
2561 %}
2563 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
2564 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2565 match(Set dst (AddVS src1 src2));
2566 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %}
2567 ins_encode %{
2568 bool vector256 = false;
2569 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2570 %}
2571 ins_pipe( pipe_slow );
2572 %}
2574 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{
2575 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2576 match(Set dst (AddVS src (LoadVector mem)));
2577 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %}
2578 ins_encode %{
2579 bool vector256 = false;
2580 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2581 %}
2582 ins_pipe( pipe_slow );
2583 %}
2585 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{
2586 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
2587 match(Set dst (AddVS src1 src2));
2588 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %}
2589 ins_encode %{
2590 bool vector256 = true;
2591 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2592 %}
2593 ins_pipe( pipe_slow );
2594 %}
2596 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{
2597 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
2598 match(Set dst (AddVS src (LoadVector mem)));
2599 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %}
2600 ins_encode %{
2601 bool vector256 = true;
2602 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2603 %}
2604 ins_pipe( pipe_slow );
2605 %}
2607 // Integers vector add
2608 instruct vadd2I(vecD dst, vecD src) %{
2609 predicate(n->as_Vector()->length() == 2);
2610 match(Set dst (AddVI dst src));
2611 format %{ "paddd $dst,$src\t! add packed2I" %}
2612 ins_encode %{
2613 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
2614 %}
2615 ins_pipe( pipe_slow );
2616 %}
2618 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{
2619 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2620 match(Set dst (AddVI src1 src2));
2621 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %}
2622 ins_encode %{
2623 bool vector256 = false;
2624 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2625 %}
2626 ins_pipe( pipe_slow );
2627 %}
2629 instruct vadd4I(vecX dst, vecX src) %{
2630 predicate(n->as_Vector()->length() == 4);
2631 match(Set dst (AddVI dst src));
2632 format %{ "paddd $dst,$src\t! add packed4I" %}
2633 ins_encode %{
2634 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
2635 %}
2636 ins_pipe( pipe_slow );
2637 %}
2639 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
2640 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2641 match(Set dst (AddVI src1 src2));
2642 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %}
2643 ins_encode %{
2644 bool vector256 = false;
2645 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2646 %}
2647 ins_pipe( pipe_slow );
2648 %}
2650 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{
2651 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2652 match(Set dst (AddVI src (LoadVector mem)));
2653 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %}
2654 ins_encode %{
2655 bool vector256 = false;
2656 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2657 %}
2658 ins_pipe( pipe_slow );
2659 %}
2661 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{
2662 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
2663 match(Set dst (AddVI src1 src2));
2664 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %}
2665 ins_encode %{
2666 bool vector256 = true;
2667 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2668 %}
2669 ins_pipe( pipe_slow );
2670 %}
2672 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{
2673 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
2674 match(Set dst (AddVI src (LoadVector mem)));
2675 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %}
2676 ins_encode %{
2677 bool vector256 = true;
2678 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2679 %}
2680 ins_pipe( pipe_slow );
2681 %}
2683 // Longs vector add
2684 instruct vadd2L(vecX dst, vecX src) %{
2685 predicate(n->as_Vector()->length() == 2);
2686 match(Set dst (AddVL dst src));
2687 format %{ "paddq $dst,$src\t! add packed2L" %}
2688 ins_encode %{
2689 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
2690 %}
2691 ins_pipe( pipe_slow );
2692 %}
2694 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
2695 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2696 match(Set dst (AddVL src1 src2));
2697 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %}
2698 ins_encode %{
2699 bool vector256 = false;
2700 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2701 %}
2702 ins_pipe( pipe_slow );
2703 %}
2705 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{
2706 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2707 match(Set dst (AddVL src (LoadVector mem)));
2708 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %}
2709 ins_encode %{
2710 bool vector256 = false;
2711 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2712 %}
2713 ins_pipe( pipe_slow );
2714 %}
2716 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{
2717 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
2718 match(Set dst (AddVL src1 src2));
2719 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %}
2720 ins_encode %{
2721 bool vector256 = true;
2722 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2723 %}
2724 ins_pipe( pipe_slow );
2725 %}
2727 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{
2728 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
2729 match(Set dst (AddVL src (LoadVector mem)));
2730 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %}
2731 ins_encode %{
2732 bool vector256 = true;
2733 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2734 %}
2735 ins_pipe( pipe_slow );
2736 %}
2738 // Floats vector add
2739 instruct vadd2F(vecD dst, vecD src) %{
2740 predicate(n->as_Vector()->length() == 2);
2741 match(Set dst (AddVF dst src));
2742 format %{ "addps $dst,$src\t! add packed2F" %}
2743 ins_encode %{
2744 __ addps($dst$$XMMRegister, $src$$XMMRegister);
2745 %}
2746 ins_pipe( pipe_slow );
2747 %}
2749 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{
2750 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2751 match(Set dst (AddVF src1 src2));
2752 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %}
2753 ins_encode %{
2754 bool vector256 = false;
2755 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2756 %}
2757 ins_pipe( pipe_slow );
2758 %}
2760 instruct vadd4F(vecX dst, vecX src) %{
2761 predicate(n->as_Vector()->length() == 4);
2762 match(Set dst (AddVF dst src));
2763 format %{ "addps $dst,$src\t! add packed4F" %}
2764 ins_encode %{
2765 __ addps($dst$$XMMRegister, $src$$XMMRegister);
2766 %}
2767 ins_pipe( pipe_slow );
2768 %}
2770 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
2771 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2772 match(Set dst (AddVF src1 src2));
2773 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %}
2774 ins_encode %{
2775 bool vector256 = false;
2776 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2777 %}
2778 ins_pipe( pipe_slow );
2779 %}
2781 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{
2782 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2783 match(Set dst (AddVF src (LoadVector mem)));
2784 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %}
2785 ins_encode %{
2786 bool vector256 = false;
2787 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2788 %}
2789 ins_pipe( pipe_slow );
2790 %}
2792 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{
2793 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2794 match(Set dst (AddVF src1 src2));
2795 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %}
2796 ins_encode %{
2797 bool vector256 = true;
2798 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2799 %}
2800 ins_pipe( pipe_slow );
2801 %}
2803 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{
2804 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2805 match(Set dst (AddVF src (LoadVector mem)));
2806 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %}
2807 ins_encode %{
2808 bool vector256 = true;
2809 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2810 %}
2811 ins_pipe( pipe_slow );
2812 %}
2814 // Doubles vector add
2815 instruct vadd2D(vecX dst, vecX src) %{
2816 predicate(n->as_Vector()->length() == 2);
2817 match(Set dst (AddVD dst src));
2818 format %{ "addpd $dst,$src\t! add packed2D" %}
2819 ins_encode %{
2820 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
2821 %}
2822 ins_pipe( pipe_slow );
2823 %}
2825 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
2826 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2827 match(Set dst (AddVD src1 src2));
2828 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %}
2829 ins_encode %{
2830 bool vector256 = false;
2831 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2832 %}
2833 ins_pipe( pipe_slow );
2834 %}
2836 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{
2837 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2838 match(Set dst (AddVD src (LoadVector mem)));
2839 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %}
2840 ins_encode %{
2841 bool vector256 = false;
2842 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2843 %}
2844 ins_pipe( pipe_slow );
2845 %}
2847 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{
2848 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2849 match(Set dst (AddVD src1 src2));
2850 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %}
2851 ins_encode %{
2852 bool vector256 = true;
2853 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2854 %}
2855 ins_pipe( pipe_slow );
2856 %}
2858 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{
2859 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2860 match(Set dst (AddVD src (LoadVector mem)));
2861 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %}
2862 ins_encode %{
2863 bool vector256 = true;
2864 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2865 %}
2866 ins_pipe( pipe_slow );
2867 %}
2869 // --------------------------------- SUB --------------------------------------
2871 // Bytes vector sub
2872 instruct vsub4B(vecS dst, vecS src) %{
2873 predicate(n->as_Vector()->length() == 4);
2874 match(Set dst (SubVB dst src));
2875 format %{ "psubb $dst,$src\t! sub packed4B" %}
2876 ins_encode %{
2877 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
2878 %}
2879 ins_pipe( pipe_slow );
2880 %}
2882 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{
2883 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2884 match(Set dst (SubVB src1 src2));
2885 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %}
2886 ins_encode %{
2887 bool vector256 = false;
2888 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2889 %}
2890 ins_pipe( pipe_slow );
2891 %}
2893 instruct vsub8B(vecD dst, vecD src) %{
2894 predicate(n->as_Vector()->length() == 8);
2895 match(Set dst (SubVB dst src));
2896 format %{ "psubb $dst,$src\t! sub packed8B" %}
2897 ins_encode %{
2898 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
2899 %}
2900 ins_pipe( pipe_slow );
2901 %}
2903 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{
2904 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2905 match(Set dst (SubVB src1 src2));
2906 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %}
2907 ins_encode %{
2908 bool vector256 = false;
2909 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2910 %}
2911 ins_pipe( pipe_slow );
2912 %}
2914 instruct vsub16B(vecX dst, vecX src) %{
2915 predicate(n->as_Vector()->length() == 16);
2916 match(Set dst (SubVB dst src));
2917 format %{ "psubb $dst,$src\t! sub packed16B" %}
2918 ins_encode %{
2919 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
2920 %}
2921 ins_pipe( pipe_slow );
2922 %}
2924 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
2925 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
2926 match(Set dst (SubVB src1 src2));
2927 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %}
2928 ins_encode %{
2929 bool vector256 = false;
2930 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2931 %}
2932 ins_pipe( pipe_slow );
2933 %}
2935 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{
2936 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
2937 match(Set dst (SubVB src (LoadVector mem)));
2938 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %}
2939 ins_encode %{
2940 bool vector256 = false;
2941 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2942 %}
2943 ins_pipe( pipe_slow );
2944 %}
2946 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{
2947 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
2948 match(Set dst (SubVB src1 src2));
2949 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %}
2950 ins_encode %{
2951 bool vector256 = true;
2952 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2953 %}
2954 ins_pipe( pipe_slow );
2955 %}
2957 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{
2958 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
2959 match(Set dst (SubVB src (LoadVector mem)));
2960 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %}
2961 ins_encode %{
2962 bool vector256 = true;
2963 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2964 %}
2965 ins_pipe( pipe_slow );
2966 %}
2968 // Shorts/Chars vector sub
2969 instruct vsub2S(vecS dst, vecS src) %{
2970 predicate(n->as_Vector()->length() == 2);
2971 match(Set dst (SubVS dst src));
2972 format %{ "psubw $dst,$src\t! sub packed2S" %}
2973 ins_encode %{
2974 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
2975 %}
2976 ins_pipe( pipe_slow );
2977 %}
2979 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{
2980 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2981 match(Set dst (SubVS src1 src2));
2982 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %}
2983 ins_encode %{
2984 bool vector256 = false;
2985 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2986 %}
2987 ins_pipe( pipe_slow );
2988 %}
2990 instruct vsub4S(vecD dst, vecD src) %{
2991 predicate(n->as_Vector()->length() == 4);
2992 match(Set dst (SubVS dst src));
2993 format %{ "psubw $dst,$src\t! sub packed4S" %}
2994 ins_encode %{
2995 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
2996 %}
2997 ins_pipe( pipe_slow );
2998 %}
3000 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{
3001 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3002 match(Set dst (SubVS src1 src2));
3003 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %}
3004 ins_encode %{
3005 bool vector256 = false;
3006 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3007 %}
3008 ins_pipe( pipe_slow );
3009 %}
3011 instruct vsub8S(vecX dst, vecX src) %{
3012 predicate(n->as_Vector()->length() == 8);
3013 match(Set dst (SubVS dst src));
3014 format %{ "psubw $dst,$src\t! sub packed8S" %}
3015 ins_encode %{
3016 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
3017 %}
3018 ins_pipe( pipe_slow );
3019 %}
3021 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
3022 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3023 match(Set dst (SubVS src1 src2));
3024 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %}
3025 ins_encode %{
3026 bool vector256 = false;
3027 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3028 %}
3029 ins_pipe( pipe_slow );
3030 %}
3032 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{
3033 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3034 match(Set dst (SubVS src (LoadVector mem)));
3035 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %}
3036 ins_encode %{
3037 bool vector256 = false;
3038 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3039 %}
3040 ins_pipe( pipe_slow );
3041 %}
3043 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{
3044 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3045 match(Set dst (SubVS src1 src2));
3046 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %}
3047 ins_encode %{
3048 bool vector256 = true;
3049 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3050 %}
3051 ins_pipe( pipe_slow );
3052 %}
3054 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{
3055 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3056 match(Set dst (SubVS src (LoadVector mem)));
3057 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %}
3058 ins_encode %{
3059 bool vector256 = true;
3060 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3061 %}
3062 ins_pipe( pipe_slow );
3063 %}
3065 // Integers vector sub
3066 instruct vsub2I(vecD dst, vecD src) %{
3067 predicate(n->as_Vector()->length() == 2);
3068 match(Set dst (SubVI dst src));
3069 format %{ "psubd $dst,$src\t! sub packed2I" %}
3070 ins_encode %{
3071 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
3072 %}
3073 ins_pipe( pipe_slow );
3074 %}
3076 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{
3077 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3078 match(Set dst (SubVI src1 src2));
3079 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %}
3080 ins_encode %{
3081 bool vector256 = false;
3082 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3083 %}
3084 ins_pipe( pipe_slow );
3085 %}
3087 instruct vsub4I(vecX dst, vecX src) %{
3088 predicate(n->as_Vector()->length() == 4);
3089 match(Set dst (SubVI dst src));
3090 format %{ "psubd $dst,$src\t! sub packed4I" %}
3091 ins_encode %{
3092 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
3093 %}
3094 ins_pipe( pipe_slow );
3095 %}
3097 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
3098 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3099 match(Set dst (SubVI src1 src2));
3100 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %}
3101 ins_encode %{
3102 bool vector256 = false;
3103 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3104 %}
3105 ins_pipe( pipe_slow );
3106 %}
3108 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{
3109 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3110 match(Set dst (SubVI src (LoadVector mem)));
3111 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %}
3112 ins_encode %{
3113 bool vector256 = false;
3114 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3115 %}
3116 ins_pipe( pipe_slow );
3117 %}
3119 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{
3120 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3121 match(Set dst (SubVI src1 src2));
3122 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %}
3123 ins_encode %{
3124 bool vector256 = true;
3125 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3126 %}
3127 ins_pipe( pipe_slow );
3128 %}
3130 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{
3131 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3132 match(Set dst (SubVI src (LoadVector mem)));
3133 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %}
3134 ins_encode %{
3135 bool vector256 = true;
3136 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3137 %}
3138 ins_pipe( pipe_slow );
3139 %}
3141 // Longs vector sub
3142 instruct vsub2L(vecX dst, vecX src) %{
3143 predicate(n->as_Vector()->length() == 2);
3144 match(Set dst (SubVL dst src));
3145 format %{ "psubq $dst,$src\t! sub packed2L" %}
3146 ins_encode %{
3147 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
3148 %}
3149 ins_pipe( pipe_slow );
3150 %}
3152 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
3153 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3154 match(Set dst (SubVL src1 src2));
3155 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %}
3156 ins_encode %{
3157 bool vector256 = false;
3158 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3159 %}
3160 ins_pipe( pipe_slow );
3161 %}
3163 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{
3164 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3165 match(Set dst (SubVL src (LoadVector mem)));
3166 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %}
3167 ins_encode %{
3168 bool vector256 = false;
3169 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3170 %}
3171 ins_pipe( pipe_slow );
3172 %}
3174 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{
3175 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
3176 match(Set dst (SubVL src1 src2));
3177 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %}
3178 ins_encode %{
3179 bool vector256 = true;
3180 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3181 %}
3182 ins_pipe( pipe_slow );
3183 %}
3185 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{
3186 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
3187 match(Set dst (SubVL src (LoadVector mem)));
3188 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %}
3189 ins_encode %{
3190 bool vector256 = true;
3191 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3192 %}
3193 ins_pipe( pipe_slow );
3194 %}
3196 // Floats vector sub
3197 instruct vsub2F(vecD dst, vecD src) %{
3198 predicate(n->as_Vector()->length() == 2);
3199 match(Set dst (SubVF dst src));
3200 format %{ "subps $dst,$src\t! sub packed2F" %}
3201 ins_encode %{
3202 __ subps($dst$$XMMRegister, $src$$XMMRegister);
3203 %}
3204 ins_pipe( pipe_slow );
3205 %}
3207 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{
3208 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3209 match(Set dst (SubVF src1 src2));
3210 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %}
3211 ins_encode %{
3212 bool vector256 = false;
3213 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3214 %}
3215 ins_pipe( pipe_slow );
3216 %}
3218 instruct vsub4F(vecX dst, vecX src) %{
3219 predicate(n->as_Vector()->length() == 4);
3220 match(Set dst (SubVF dst src));
3221 format %{ "subps $dst,$src\t! sub packed4F" %}
3222 ins_encode %{
3223 __ subps($dst$$XMMRegister, $src$$XMMRegister);
3224 %}
3225 ins_pipe( pipe_slow );
3226 %}
3228 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
3229 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3230 match(Set dst (SubVF src1 src2));
3231 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %}
3232 ins_encode %{
3233 bool vector256 = false;
3234 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3235 %}
3236 ins_pipe( pipe_slow );
3237 %}
3239 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{
3240 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3241 match(Set dst (SubVF src (LoadVector mem)));
3242 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %}
3243 ins_encode %{
3244 bool vector256 = false;
3245 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3246 %}
3247 ins_pipe( pipe_slow );
3248 %}
3250 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{
3251 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3252 match(Set dst (SubVF src1 src2));
3253 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %}
3254 ins_encode %{
3255 bool vector256 = true;
3256 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3257 %}
3258 ins_pipe( pipe_slow );
3259 %}
3261 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{
3262 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3263 match(Set dst (SubVF src (LoadVector mem)));
3264 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %}
3265 ins_encode %{
3266 bool vector256 = true;
3267 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3268 %}
3269 ins_pipe( pipe_slow );
3270 %}
3272 // Doubles vector sub
3273 instruct vsub2D(vecX dst, vecX src) %{
3274 predicate(n->as_Vector()->length() == 2);
3275 match(Set dst (SubVD dst src));
3276 format %{ "subpd $dst,$src\t! sub packed2D" %}
3277 ins_encode %{
3278 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
3279 %}
3280 ins_pipe( pipe_slow );
3281 %}
3283 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{
3284 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3285 match(Set dst (SubVD src1 src2));
3286 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %}
3287 ins_encode %{
3288 bool vector256 = false;
3289 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3290 %}
3291 ins_pipe( pipe_slow );
3292 %}
3294 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{
3295 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3296 match(Set dst (SubVD src (LoadVector mem)));
3297 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %}
3298 ins_encode %{
3299 bool vector256 = false;
3300 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3301 %}
3302 ins_pipe( pipe_slow );
3303 %}
3305 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{
3306 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3307 match(Set dst (SubVD src1 src2));
3308 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %}
3309 ins_encode %{
3310 bool vector256 = true;
3311 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3312 %}
3313 ins_pipe( pipe_slow );
3314 %}
3316 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{
3317 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3318 match(Set dst (SubVD src (LoadVector mem)));
3319 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %}
3320 ins_encode %{
3321 bool vector256 = true;
3322 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3323 %}
3324 ins_pipe( pipe_slow );
3325 %}
3327 // --------------------------------- MUL --------------------------------------
3329 // Shorts/Chars vector mul
3330 instruct vmul2S(vecS dst, vecS src) %{
3331 predicate(n->as_Vector()->length() == 2);
3332 match(Set dst (MulVS dst src));
3333 format %{ "pmullw $dst,$src\t! mul packed2S" %}
3334 ins_encode %{
3335 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
3336 %}
3337 ins_pipe( pipe_slow );
3338 %}
3340 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{
3341 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3342 match(Set dst (MulVS src1 src2));
3343 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
3344 ins_encode %{
3345 bool vector256 = false;
3346 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3347 %}
3348 ins_pipe( pipe_slow );
3349 %}
3351 instruct vmul4S(vecD dst, vecD src) %{
3352 predicate(n->as_Vector()->length() == 4);
3353 match(Set dst (MulVS dst src));
3354 format %{ "pmullw $dst,$src\t! mul packed4S" %}
3355 ins_encode %{
3356 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
3357 %}
3358 ins_pipe( pipe_slow );
3359 %}
3361 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
3362 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3363 match(Set dst (MulVS src1 src2));
3364 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
3365 ins_encode %{
3366 bool vector256 = false;
3367 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3368 %}
3369 ins_pipe( pipe_slow );
3370 %}
3372 instruct vmul8S(vecX dst, vecX src) %{
3373 predicate(n->as_Vector()->length() == 8);
3374 match(Set dst (MulVS dst src));
3375 format %{ "pmullw $dst,$src\t! mul packed8S" %}
3376 ins_encode %{
3377 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
3378 %}
3379 ins_pipe( pipe_slow );
3380 %}
3382 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
3383 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3384 match(Set dst (MulVS src1 src2));
3385 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
3386 ins_encode %{
3387 bool vector256 = false;
3388 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3389 %}
3390 ins_pipe( pipe_slow );
3391 %}
3393 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{
3394 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3395 match(Set dst (MulVS src (LoadVector mem)));
3396 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
3397 ins_encode %{
3398 bool vector256 = false;
3399 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3400 %}
3401 ins_pipe( pipe_slow );
3402 %}
3404 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{
3405 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3406 match(Set dst (MulVS src1 src2));
3407 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
3408 ins_encode %{
3409 bool vector256 = true;
3410 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3411 %}
3412 ins_pipe( pipe_slow );
3413 %}
3415 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{
3416 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3417 match(Set dst (MulVS src (LoadVector mem)));
3418 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
3419 ins_encode %{
3420 bool vector256 = true;
3421 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3422 %}
3423 ins_pipe( pipe_slow );
3424 %}
3426 // Integers vector mul (sse4_1)
3427 instruct vmul2I(vecD dst, vecD src) %{
3428 predicate(UseSSE > 3 && n->as_Vector()->length() == 2);
3429 match(Set dst (MulVI dst src));
3430 format %{ "pmulld $dst,$src\t! mul packed2I" %}
3431 ins_encode %{
3432 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
3433 %}
3434 ins_pipe( pipe_slow );
3435 %}
3437 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{
3438 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3439 match(Set dst (MulVI src1 src2));
3440 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %}
3441 ins_encode %{
3442 bool vector256 = false;
3443 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3444 %}
3445 ins_pipe( pipe_slow );
3446 %}
3448 instruct vmul4I(vecX dst, vecX src) %{
3449 predicate(UseSSE > 3 && n->as_Vector()->length() == 4);
3450 match(Set dst (MulVI dst src));
3451 format %{ "pmulld $dst,$src\t! mul packed4I" %}
3452 ins_encode %{
3453 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
3454 %}
3455 ins_pipe( pipe_slow );
3456 %}
3458 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
3459 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3460 match(Set dst (MulVI src1 src2));
3461 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %}
3462 ins_encode %{
3463 bool vector256 = false;
3464 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3465 %}
3466 ins_pipe( pipe_slow );
3467 %}
3469 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{
3470 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3471 match(Set dst (MulVI src (LoadVector mem)));
3472 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %}
3473 ins_encode %{
3474 bool vector256 = false;
3475 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3476 %}
3477 ins_pipe( pipe_slow );
3478 %}
3480 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{
3481 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3482 match(Set dst (MulVI src1 src2));
3483 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %}
3484 ins_encode %{
3485 bool vector256 = true;
3486 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3487 %}
3488 ins_pipe( pipe_slow );
3489 %}
3491 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{
3492 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3493 match(Set dst (MulVI src (LoadVector mem)));
3494 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %}
3495 ins_encode %{
3496 bool vector256 = true;
3497 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3498 %}
3499 ins_pipe( pipe_slow );
3500 %}
3502 // Floats vector mul
3503 instruct vmul2F(vecD dst, vecD src) %{
3504 predicate(n->as_Vector()->length() == 2);
3505 match(Set dst (MulVF dst src));
3506 format %{ "mulps $dst,$src\t! mul packed2F" %}
3507 ins_encode %{
3508 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
3509 %}
3510 ins_pipe( pipe_slow );
3511 %}
3513 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{
3514 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3515 match(Set dst (MulVF src1 src2));
3516 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %}
3517 ins_encode %{
3518 bool vector256 = false;
3519 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3520 %}
3521 ins_pipe( pipe_slow );
3522 %}
3524 instruct vmul4F(vecX dst, vecX src) %{
3525 predicate(n->as_Vector()->length() == 4);
3526 match(Set dst (MulVF dst src));
3527 format %{ "mulps $dst,$src\t! mul packed4F" %}
3528 ins_encode %{
3529 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
3530 %}
3531 ins_pipe( pipe_slow );
3532 %}
3534 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
3535 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3536 match(Set dst (MulVF src1 src2));
3537 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %}
3538 ins_encode %{
3539 bool vector256 = false;
3540 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3541 %}
3542 ins_pipe( pipe_slow );
3543 %}
3545 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{
3546 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3547 match(Set dst (MulVF src (LoadVector mem)));
3548 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %}
3549 ins_encode %{
3550 bool vector256 = false;
3551 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3552 %}
3553 ins_pipe( pipe_slow );
3554 %}
3556 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{
3557 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3558 match(Set dst (MulVF src1 src2));
3559 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %}
3560 ins_encode %{
3561 bool vector256 = true;
3562 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3563 %}
3564 ins_pipe( pipe_slow );
3565 %}
3567 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{
3568 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3569 match(Set dst (MulVF src (LoadVector mem)));
3570 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %}
3571 ins_encode %{
3572 bool vector256 = true;
3573 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3574 %}
3575 ins_pipe( pipe_slow );
3576 %}
3578 // Doubles vector mul
3579 instruct vmul2D(vecX dst, vecX src) %{
3580 predicate(n->as_Vector()->length() == 2);
3581 match(Set dst (MulVD dst src));
3582 format %{ "mulpd $dst,$src\t! mul packed2D" %}
3583 ins_encode %{
3584 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
3585 %}
3586 ins_pipe( pipe_slow );
3587 %}
3589 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
3590 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3591 match(Set dst (MulVD src1 src2));
3592 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %}
3593 ins_encode %{
3594 bool vector256 = false;
3595 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3596 %}
3597 ins_pipe( pipe_slow );
3598 %}
3600 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{
3601 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3602 match(Set dst (MulVD src (LoadVector mem)));
3603 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %}
3604 ins_encode %{
3605 bool vector256 = false;
3606 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3607 %}
3608 ins_pipe( pipe_slow );
3609 %}
3611 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{
3612 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3613 match(Set dst (MulVD src1 src2));
3614 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %}
3615 ins_encode %{
3616 bool vector256 = true;
3617 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3618 %}
3619 ins_pipe( pipe_slow );
3620 %}
3622 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{
3623 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3624 match(Set dst (MulVD src (LoadVector mem)));
3625 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %}
3626 ins_encode %{
3627 bool vector256 = true;
3628 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3629 %}
3630 ins_pipe( pipe_slow );
3631 %}
3633 // --------------------------------- DIV --------------------------------------
3635 // Floats vector div
3636 instruct vdiv2F(vecD dst, vecD src) %{
3637 predicate(n->as_Vector()->length() == 2);
3638 match(Set dst (DivVF dst src));
3639 format %{ "divps $dst,$src\t! div packed2F" %}
3640 ins_encode %{
3641 __ divps($dst$$XMMRegister, $src$$XMMRegister);
3642 %}
3643 ins_pipe( pipe_slow );
3644 %}
3646 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{
3647 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3648 match(Set dst (DivVF src1 src2));
3649 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %}
3650 ins_encode %{
3651 bool vector256 = false;
3652 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3653 %}
3654 ins_pipe( pipe_slow );
3655 %}
3657 instruct vdiv4F(vecX dst, vecX src) %{
3658 predicate(n->as_Vector()->length() == 4);
3659 match(Set dst (DivVF dst src));
3660 format %{ "divps $dst,$src\t! div packed4F" %}
3661 ins_encode %{
3662 __ divps($dst$$XMMRegister, $src$$XMMRegister);
3663 %}
3664 ins_pipe( pipe_slow );
3665 %}
3667 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{
3668 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3669 match(Set dst (DivVF src1 src2));
3670 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %}
3671 ins_encode %{
3672 bool vector256 = false;
3673 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3674 %}
3675 ins_pipe( pipe_slow );
3676 %}
3678 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{
3679 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3680 match(Set dst (DivVF src (LoadVector mem)));
3681 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %}
3682 ins_encode %{
3683 bool vector256 = false;
3684 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3685 %}
3686 ins_pipe( pipe_slow );
3687 %}
3689 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{
3690 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3691 match(Set dst (DivVF src1 src2));
3692 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %}
3693 ins_encode %{
3694 bool vector256 = true;
3695 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3696 %}
3697 ins_pipe( pipe_slow );
3698 %}
3700 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{
3701 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3702 match(Set dst (DivVF src (LoadVector mem)));
3703 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %}
3704 ins_encode %{
3705 bool vector256 = true;
3706 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3707 %}
3708 ins_pipe( pipe_slow );
3709 %}
3711 // Doubles vector div
3712 instruct vdiv2D(vecX dst, vecX src) %{
3713 predicate(n->as_Vector()->length() == 2);
3714 match(Set dst (DivVD dst src));
3715 format %{ "divpd $dst,$src\t! div packed2D" %}
3716 ins_encode %{
3717 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
3718 %}
3719 ins_pipe( pipe_slow );
3720 %}
3722 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
3723 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3724 match(Set dst (DivVD src1 src2));
3725 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %}
3726 ins_encode %{
3727 bool vector256 = false;
3728 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3729 %}
3730 ins_pipe( pipe_slow );
3731 %}
3733 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{
3734 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3735 match(Set dst (DivVD src (LoadVector mem)));
3736 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %}
3737 ins_encode %{
3738 bool vector256 = false;
3739 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3740 %}
3741 ins_pipe( pipe_slow );
3742 %}
3744 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{
3745 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3746 match(Set dst (DivVD src1 src2));
3747 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %}
3748 ins_encode %{
3749 bool vector256 = true;
3750 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3751 %}
3752 ins_pipe( pipe_slow );
3753 %}
3755 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{
3756 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3757 match(Set dst (DivVD src (LoadVector mem)));
3758 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %}
3759 ins_encode %{
3760 bool vector256 = true;
3761 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3762 %}
3763 ins_pipe( pipe_slow );
3764 %}
3766 // ------------------------------ Shift ---------------------------------------
3768 // Left and right shift count vectors are the same on x86
3769 // (only lowest bits of xmm reg are used for count).
3770 instruct vshiftcnt(vecS dst, rRegI cnt) %{
3771 match(Set dst (LShiftCntV cnt));
3772 match(Set dst (RShiftCntV cnt));
3773 format %{ "movd $dst,$cnt\t! load shift count" %}
3774 ins_encode %{
3775 __ movdl($dst$$XMMRegister, $cnt$$Register);
3776 %}
3777 ins_pipe( pipe_slow );
3778 %}
3780 // ------------------------------ LeftShift -----------------------------------
3782 // Shorts/Chars vector left shift
3783 instruct vsll2S(vecS dst, vecS shift) %{
3784 predicate(n->as_Vector()->length() == 2);
3785 match(Set dst (LShiftVS dst shift));
3786 format %{ "psllw $dst,$shift\t! left shift packed2S" %}
3787 ins_encode %{
3788 __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
3789 %}
3790 ins_pipe( pipe_slow );
3791 %}
3793 instruct vsll2S_imm(vecS dst, immI8 shift) %{
3794 predicate(n->as_Vector()->length() == 2);
3795 match(Set dst (LShiftVS dst shift));
3796 format %{ "psllw $dst,$shift\t! left shift packed2S" %}
3797 ins_encode %{
3798 __ psllw($dst$$XMMRegister, (int)$shift$$constant);
3799 %}
3800 ins_pipe( pipe_slow );
3801 %}
3803 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{
3804 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3805 match(Set dst (LShiftVS src shift));
3806 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
3807 ins_encode %{
3808 bool vector256 = false;
3809 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3810 %}
3811 ins_pipe( pipe_slow );
3812 %}
3814 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
3815 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3816 match(Set dst (LShiftVS src shift));
3817 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
3818 ins_encode %{
3819 bool vector256 = false;
3820 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
3821 %}
3822 ins_pipe( pipe_slow );
3823 %}
3825 instruct vsll4S(vecD dst, vecS shift) %{
3826 predicate(n->as_Vector()->length() == 4);
3827 match(Set dst (LShiftVS dst shift));
3828 format %{ "psllw $dst,$shift\t! left shift packed4S" %}
3829 ins_encode %{
3830 __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
3831 %}
3832 ins_pipe( pipe_slow );
3833 %}
3835 instruct vsll4S_imm(vecD dst, immI8 shift) %{
3836 predicate(n->as_Vector()->length() == 4);
3837 match(Set dst (LShiftVS dst shift));
3838 format %{ "psllw $dst,$shift\t! left shift packed4S" %}
3839 ins_encode %{
3840 __ psllw($dst$$XMMRegister, (int)$shift$$constant);
3841 %}
3842 ins_pipe( pipe_slow );
3843 %}
3845 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{
3846 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3847 match(Set dst (LShiftVS src shift));
3848 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
3849 ins_encode %{
3850 bool vector256 = false;
3851 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3852 %}
3853 ins_pipe( pipe_slow );
3854 %}
3856 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
3857 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3858 match(Set dst (LShiftVS src shift));
3859 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
3860 ins_encode %{
3861 bool vector256 = false;
3862 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
3863 %}
3864 ins_pipe( pipe_slow );
3865 %}
3867 instruct vsll8S(vecX dst, vecS shift) %{
3868 predicate(n->as_Vector()->length() == 8);
3869 match(Set dst (LShiftVS dst shift));
3870 format %{ "psllw $dst,$shift\t! left shift packed8S" %}
3871 ins_encode %{
3872 __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
3873 %}
3874 ins_pipe( pipe_slow );
3875 %}
3877 instruct vsll8S_imm(vecX dst, immI8 shift) %{
3878 predicate(n->as_Vector()->length() == 8);
3879 match(Set dst (LShiftVS dst shift));
3880 format %{ "psllw $dst,$shift\t! left shift packed8S" %}
3881 ins_encode %{
3882 __ psllw($dst$$XMMRegister, (int)$shift$$constant);
3883 %}
3884 ins_pipe( pipe_slow );
3885 %}
3887 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{
3888 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3889 match(Set dst (LShiftVS src shift));
3890 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
3891 ins_encode %{
3892 bool vector256 = false;
3893 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3894 %}
3895 ins_pipe( pipe_slow );
3896 %}
3898 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
3899 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3900 match(Set dst (LShiftVS src shift));
3901 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
3902 ins_encode %{
3903 bool vector256 = false;
3904 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
3905 %}
3906 ins_pipe( pipe_slow );
3907 %}
3909 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{
3910 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3911 match(Set dst (LShiftVS src shift));
3912 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
3913 ins_encode %{
3914 bool vector256 = true;
3915 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3916 %}
3917 ins_pipe( pipe_slow );
3918 %}
3920 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
3921 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3922 match(Set dst (LShiftVS src shift));
3923 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
3924 ins_encode %{
3925 bool vector256 = true;
3926 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
3927 %}
3928 ins_pipe( pipe_slow );
3929 %}
3931 // Integers vector left shift
3932 instruct vsll2I(vecD dst, vecS shift) %{
3933 predicate(n->as_Vector()->length() == 2);
3934 match(Set dst (LShiftVI dst shift));
3935 format %{ "pslld $dst,$shift\t! left shift packed2I" %}
3936 ins_encode %{
3937 __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
3938 %}
3939 ins_pipe( pipe_slow );
3940 %}
3942 instruct vsll2I_imm(vecD dst, immI8 shift) %{
3943 predicate(n->as_Vector()->length() == 2);
3944 match(Set dst (LShiftVI dst shift));
3945 format %{ "pslld $dst,$shift\t! left shift packed2I" %}
3946 ins_encode %{
3947 __ pslld($dst$$XMMRegister, (int)$shift$$constant);
3948 %}
3949 ins_pipe( pipe_slow );
3950 %}
3952 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{
3953 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3954 match(Set dst (LShiftVI src shift));
3955 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %}
3956 ins_encode %{
3957 bool vector256 = false;
3958 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3959 %}
3960 ins_pipe( pipe_slow );
3961 %}
3963 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
3964 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3965 match(Set dst (LShiftVI src shift));
3966 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %}
3967 ins_encode %{
3968 bool vector256 = false;
3969 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
3970 %}
3971 ins_pipe( pipe_slow );
3972 %}
3974 instruct vsll4I(vecX dst, vecS shift) %{
3975 predicate(n->as_Vector()->length() == 4);
3976 match(Set dst (LShiftVI dst shift));
3977 format %{ "pslld $dst,$shift\t! left shift packed4I" %}
3978 ins_encode %{
3979 __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
3980 %}
3981 ins_pipe( pipe_slow );
3982 %}
3984 instruct vsll4I_imm(vecX dst, immI8 shift) %{
3985 predicate(n->as_Vector()->length() == 4);
3986 match(Set dst (LShiftVI dst shift));
3987 format %{ "pslld $dst,$shift\t! left shift packed4I" %}
3988 ins_encode %{
3989 __ pslld($dst$$XMMRegister, (int)$shift$$constant);
3990 %}
3991 ins_pipe( pipe_slow );
3992 %}
3994 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{
3995 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3996 match(Set dst (LShiftVI src shift));
3997 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %}
3998 ins_encode %{
3999 bool vector256 = false;
4000 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4001 %}
4002 ins_pipe( pipe_slow );
4003 %}
4005 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
4006 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4007 match(Set dst (LShiftVI src shift));
4008 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %}
4009 ins_encode %{
4010 bool vector256 = false;
4011 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4012 %}
4013 ins_pipe( pipe_slow );
4014 %}
4016 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{
4017 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4018 match(Set dst (LShiftVI src shift));
4019 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %}
4020 ins_encode %{
4021 bool vector256 = true;
4022 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4023 %}
4024 ins_pipe( pipe_slow );
4025 %}
4027 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
4028 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4029 match(Set dst (LShiftVI src shift));
4030 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %}
4031 ins_encode %{
4032 bool vector256 = true;
4033 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4034 %}
4035 ins_pipe( pipe_slow );
4036 %}
4038 // Longs vector left shift
4039 instruct vsll2L(vecX dst, vecS shift) %{
4040 predicate(n->as_Vector()->length() == 2);
4041 match(Set dst (LShiftVL dst shift));
4042 format %{ "psllq $dst,$shift\t! left shift packed2L" %}
4043 ins_encode %{
4044 __ psllq($dst$$XMMRegister, $shift$$XMMRegister);
4045 %}
4046 ins_pipe( pipe_slow );
4047 %}
4049 instruct vsll2L_imm(vecX dst, immI8 shift) %{
4050 predicate(n->as_Vector()->length() == 2);
4051 match(Set dst (LShiftVL dst shift));
4052 format %{ "psllq $dst,$shift\t! left shift packed2L" %}
4053 ins_encode %{
4054 __ psllq($dst$$XMMRegister, (int)$shift$$constant);
4055 %}
4056 ins_pipe( pipe_slow );
4057 %}
4059 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{
4060 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4061 match(Set dst (LShiftVL src shift));
4062 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %}
4063 ins_encode %{
4064 bool vector256 = false;
4065 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4066 %}
4067 ins_pipe( pipe_slow );
4068 %}
4070 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
4071 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4072 match(Set dst (LShiftVL src shift));
4073 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %}
4074 ins_encode %{
4075 bool vector256 = false;
4076 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4077 %}
4078 ins_pipe( pipe_slow );
4079 %}
4081 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{
4082 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4083 match(Set dst (LShiftVL src shift));
4084 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %}
4085 ins_encode %{
4086 bool vector256 = true;
4087 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4088 %}
4089 ins_pipe( pipe_slow );
4090 %}
4092 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
4093 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4094 match(Set dst (LShiftVL src shift));
4095 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %}
4096 ins_encode %{
4097 bool vector256 = true;
4098 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4099 %}
4100 ins_pipe( pipe_slow );
4101 %}
4103 // ----------------------- LogicalRightShift -----------------------------------
4105 // Shorts vector logical right shift produces incorrect Java result
4106 // for negative data because java code convert short value into int with
4107 // sign extension before a shift. But char vectors are fine since chars are
4108 // unsigned values.
4110 instruct vsrl2S(vecS dst, vecS shift) %{
4111 predicate(n->as_Vector()->length() == 2);
4112 match(Set dst (URShiftVS dst shift));
4113 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %}
4114 ins_encode %{
4115 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
4116 %}
4117 ins_pipe( pipe_slow );
4118 %}
4120 instruct vsrl2S_imm(vecS dst, immI8 shift) %{
4121 predicate(n->as_Vector()->length() == 2);
4122 match(Set dst (URShiftVS dst shift));
4123 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %}
4124 ins_encode %{
4125 __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
4126 %}
4127 ins_pipe( pipe_slow );
4128 %}
4130 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{
4131 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4132 match(Set dst (URShiftVS src shift));
4133 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
4134 ins_encode %{
4135 bool vector256 = false;
4136 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4137 %}
4138 ins_pipe( pipe_slow );
4139 %}
4141 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
4142 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4143 match(Set dst (URShiftVS src shift));
4144 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
4145 ins_encode %{
4146 bool vector256 = false;
4147 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4148 %}
4149 ins_pipe( pipe_slow );
4150 %}
4152 instruct vsrl4S(vecD dst, vecS shift) %{
4153 predicate(n->as_Vector()->length() == 4);
4154 match(Set dst (URShiftVS dst shift));
4155 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %}
4156 ins_encode %{
4157 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
4158 %}
4159 ins_pipe( pipe_slow );
4160 %}
4162 instruct vsrl4S_imm(vecD dst, immI8 shift) %{
4163 predicate(n->as_Vector()->length() == 4);
4164 match(Set dst (URShiftVS dst shift));
4165 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %}
4166 ins_encode %{
4167 __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
4168 %}
4169 ins_pipe( pipe_slow );
4170 %}
4172 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{
4173 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4174 match(Set dst (URShiftVS src shift));
4175 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
4176 ins_encode %{
4177 bool vector256 = false;
4178 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4179 %}
4180 ins_pipe( pipe_slow );
4181 %}
4183 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
4184 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4185 match(Set dst (URShiftVS src shift));
4186 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
4187 ins_encode %{
4188 bool vector256 = false;
4189 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4190 %}
4191 ins_pipe( pipe_slow );
4192 %}
4194 instruct vsrl8S(vecX dst, vecS shift) %{
4195 predicate(n->as_Vector()->length() == 8);
4196 match(Set dst (URShiftVS dst shift));
4197 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %}
4198 ins_encode %{
4199 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
4200 %}
4201 ins_pipe( pipe_slow );
4202 %}
4204 instruct vsrl8S_imm(vecX dst, immI8 shift) %{
4205 predicate(n->as_Vector()->length() == 8);
4206 match(Set dst (URShiftVS dst shift));
4207 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %}
4208 ins_encode %{
4209 __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
4210 %}
4211 ins_pipe( pipe_slow );
4212 %}
4214 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{
4215 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4216 match(Set dst (URShiftVS src shift));
4217 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
4218 ins_encode %{
4219 bool vector256 = false;
4220 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4221 %}
4222 ins_pipe( pipe_slow );
4223 %}
4225 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
4226 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4227 match(Set dst (URShiftVS src shift));
4228 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
4229 ins_encode %{
4230 bool vector256 = false;
4231 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4232 %}
4233 ins_pipe( pipe_slow );
4234 %}
4236 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{
4237 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4238 match(Set dst (URShiftVS src shift));
4239 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
4240 ins_encode %{
4241 bool vector256 = true;
4242 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4243 %}
4244 ins_pipe( pipe_slow );
4245 %}
4247 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
4248 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4249 match(Set dst (URShiftVS src shift));
4250 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
4251 ins_encode %{
4252 bool vector256 = true;
4253 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4254 %}
4255 ins_pipe( pipe_slow );
4256 %}
4258 // Integers vector logical right shift
4259 instruct vsrl2I(vecD dst, vecS shift) %{
4260 predicate(n->as_Vector()->length() == 2);
4261 match(Set dst (URShiftVI dst shift));
4262 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %}
4263 ins_encode %{
4264 __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
4265 %}
4266 ins_pipe( pipe_slow );
4267 %}
4269 instruct vsrl2I_imm(vecD dst, immI8 shift) %{
4270 predicate(n->as_Vector()->length() == 2);
4271 match(Set dst (URShiftVI dst shift));
4272 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %}
4273 ins_encode %{
4274 __ psrld($dst$$XMMRegister, (int)$shift$$constant);
4275 %}
4276 ins_pipe( pipe_slow );
4277 %}
4279 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{
4280 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4281 match(Set dst (URShiftVI src shift));
4282 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %}
4283 ins_encode %{
4284 bool vector256 = false;
4285 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4286 %}
4287 ins_pipe( pipe_slow );
4288 %}
4290 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
4291 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4292 match(Set dst (URShiftVI src shift));
4293 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %}
4294 ins_encode %{
4295 bool vector256 = false;
4296 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4297 %}
4298 ins_pipe( pipe_slow );
4299 %}
4301 instruct vsrl4I(vecX dst, vecS shift) %{
4302 predicate(n->as_Vector()->length() == 4);
4303 match(Set dst (URShiftVI dst shift));
4304 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %}
4305 ins_encode %{
4306 __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
4307 %}
4308 ins_pipe( pipe_slow );
4309 %}
4311 instruct vsrl4I_imm(vecX dst, immI8 shift) %{
4312 predicate(n->as_Vector()->length() == 4);
4313 match(Set dst (URShiftVI dst shift));
4314 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %}
4315 ins_encode %{
4316 __ psrld($dst$$XMMRegister, (int)$shift$$constant);
4317 %}
4318 ins_pipe( pipe_slow );
4319 %}
4321 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{
4322 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4323 match(Set dst (URShiftVI src shift));
4324 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %}
4325 ins_encode %{
4326 bool vector256 = false;
4327 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4328 %}
4329 ins_pipe( pipe_slow );
4330 %}
4332 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
4333 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4334 match(Set dst (URShiftVI src shift));
4335 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %}
4336 ins_encode %{
4337 bool vector256 = false;
4338 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4339 %}
4340 ins_pipe( pipe_slow );
4341 %}
4343 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{
4344 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4345 match(Set dst (URShiftVI src shift));
4346 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %}
4347 ins_encode %{
4348 bool vector256 = true;
4349 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4350 %}
4351 ins_pipe( pipe_slow );
4352 %}
4354 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
4355 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4356 match(Set dst (URShiftVI src shift));
4357 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %}
4358 ins_encode %{
4359 bool vector256 = true;
4360 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4361 %}
4362 ins_pipe( pipe_slow );
4363 %}
4365 // Longs vector logical right shift
4366 instruct vsrl2L(vecX dst, vecS shift) %{
4367 predicate(n->as_Vector()->length() == 2);
4368 match(Set dst (URShiftVL dst shift));
4369 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %}
4370 ins_encode %{
4371 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
4372 %}
4373 ins_pipe( pipe_slow );
4374 %}
4376 instruct vsrl2L_imm(vecX dst, immI8 shift) %{
4377 predicate(n->as_Vector()->length() == 2);
4378 match(Set dst (URShiftVL dst shift));
4379 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %}
4380 ins_encode %{
4381 __ psrlq($dst$$XMMRegister, (int)$shift$$constant);
4382 %}
4383 ins_pipe( pipe_slow );
4384 %}
4386 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{
4387 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4388 match(Set dst (URShiftVL src shift));
4389 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %}
4390 ins_encode %{
4391 bool vector256 = false;
4392 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4393 %}
4394 ins_pipe( pipe_slow );
4395 %}
4397 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
4398 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4399 match(Set dst (URShiftVL src shift));
4400 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %}
4401 ins_encode %{
4402 bool vector256 = false;
4403 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4404 %}
4405 ins_pipe( pipe_slow );
4406 %}
4408 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{
4409 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4410 match(Set dst (URShiftVL src shift));
4411 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %}
4412 ins_encode %{
4413 bool vector256 = true;
4414 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4415 %}
4416 ins_pipe( pipe_slow );
4417 %}
4419 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
4420 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4421 match(Set dst (URShiftVL src shift));
4422 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %}
4423 ins_encode %{
4424 bool vector256 = true;
4425 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4426 %}
4427 ins_pipe( pipe_slow );
4428 %}
4430 // ------------------- ArithmeticRightShift -----------------------------------
4432 // Shorts/Chars vector arithmetic right shift
4433 instruct vsra2S(vecS dst, vecS shift) %{
4434 predicate(n->as_Vector()->length() == 2);
4435 match(Set dst (RShiftVS dst shift));
4436 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %}
4437 ins_encode %{
4438 __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
4439 %}
4440 ins_pipe( pipe_slow );
4441 %}
4443 instruct vsra2S_imm(vecS dst, immI8 shift) %{
4444 predicate(n->as_Vector()->length() == 2);
4445 match(Set dst (RShiftVS dst shift));
4446 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %}
4447 ins_encode %{
4448 __ psraw($dst$$XMMRegister, (int)$shift$$constant);
4449 %}
4450 ins_pipe( pipe_slow );
4451 %}
4453 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{
4454 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4455 match(Set dst (RShiftVS src shift));
4456 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
4457 ins_encode %{
4458 bool vector256 = false;
4459 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4460 %}
4461 ins_pipe( pipe_slow );
4462 %}
4464 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
4465 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4466 match(Set dst (RShiftVS src shift));
4467 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
4468 ins_encode %{
4469 bool vector256 = false;
4470 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4471 %}
4472 ins_pipe( pipe_slow );
4473 %}
4475 instruct vsra4S(vecD dst, vecS shift) %{
4476 predicate(n->as_Vector()->length() == 4);
4477 match(Set dst (RShiftVS dst shift));
4478 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %}
4479 ins_encode %{
4480 __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
4481 %}
4482 ins_pipe( pipe_slow );
4483 %}
4485 instruct vsra4S_imm(vecD dst, immI8 shift) %{
4486 predicate(n->as_Vector()->length() == 4);
4487 match(Set dst (RShiftVS dst shift));
4488 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %}
4489 ins_encode %{
4490 __ psraw($dst$$XMMRegister, (int)$shift$$constant);
4491 %}
4492 ins_pipe( pipe_slow );
4493 %}
4495 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{
4496 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4497 match(Set dst (RShiftVS src shift));
4498 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
4499 ins_encode %{
4500 bool vector256 = false;
4501 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4502 %}
4503 ins_pipe( pipe_slow );
4504 %}
4506 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
4507 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4508 match(Set dst (RShiftVS src shift));
4509 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
4510 ins_encode %{
4511 bool vector256 = false;
4512 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4513 %}
4514 ins_pipe( pipe_slow );
4515 %}
4517 instruct vsra8S(vecX dst, vecS shift) %{
4518 predicate(n->as_Vector()->length() == 8);
4519 match(Set dst (RShiftVS dst shift));
4520 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %}
4521 ins_encode %{
4522 __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
4523 %}
4524 ins_pipe( pipe_slow );
4525 %}
4527 instruct vsra8S_imm(vecX dst, immI8 shift) %{
4528 predicate(n->as_Vector()->length() == 8);
4529 match(Set dst (RShiftVS dst shift));
4530 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %}
4531 ins_encode %{
4532 __ psraw($dst$$XMMRegister, (int)$shift$$constant);
4533 %}
4534 ins_pipe( pipe_slow );
4535 %}
4537 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{
4538 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4539 match(Set dst (RShiftVS src shift));
4540 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
4541 ins_encode %{
4542 bool vector256 = false;
4543 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4544 %}
4545 ins_pipe( pipe_slow );
4546 %}
4548 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
4549 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4550 match(Set dst (RShiftVS src shift));
4551 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
4552 ins_encode %{
4553 bool vector256 = false;
4554 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4555 %}
4556 ins_pipe( pipe_slow );
4557 %}
4559 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{
4560 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4561 match(Set dst (RShiftVS src shift));
4562 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
4563 ins_encode %{
4564 bool vector256 = true;
4565 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4566 %}
4567 ins_pipe( pipe_slow );
4568 %}
4570 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
4571 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4572 match(Set dst (RShiftVS src shift));
4573 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
4574 ins_encode %{
4575 bool vector256 = true;
4576 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4577 %}
4578 ins_pipe( pipe_slow );
4579 %}
4581 // Integers vector arithmetic right shift
4582 instruct vsra2I(vecD dst, vecS shift) %{
4583 predicate(n->as_Vector()->length() == 2);
4584 match(Set dst (RShiftVI dst shift));
4585 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %}
4586 ins_encode %{
4587 __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
4588 %}
4589 ins_pipe( pipe_slow );
4590 %}
4592 instruct vsra2I_imm(vecD dst, immI8 shift) %{
4593 predicate(n->as_Vector()->length() == 2);
4594 match(Set dst (RShiftVI dst shift));
4595 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %}
4596 ins_encode %{
4597 __ psrad($dst$$XMMRegister, (int)$shift$$constant);
4598 %}
4599 ins_pipe( pipe_slow );
4600 %}
4602 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{
4603 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4604 match(Set dst (RShiftVI src shift));
4605 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %}
4606 ins_encode %{
4607 bool vector256 = false;
4608 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4609 %}
4610 ins_pipe( pipe_slow );
4611 %}
4613 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
4614 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4615 match(Set dst (RShiftVI src shift));
4616 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %}
4617 ins_encode %{
4618 bool vector256 = false;
4619 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4620 %}
4621 ins_pipe( pipe_slow );
4622 %}
4624 instruct vsra4I(vecX dst, vecS shift) %{
4625 predicate(n->as_Vector()->length() == 4);
4626 match(Set dst (RShiftVI dst shift));
4627 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %}
4628 ins_encode %{
4629 __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
4630 %}
4631 ins_pipe( pipe_slow );
4632 %}
4634 instruct vsra4I_imm(vecX dst, immI8 shift) %{
4635 predicate(n->as_Vector()->length() == 4);
4636 match(Set dst (RShiftVI dst shift));
4637 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %}
4638 ins_encode %{
4639 __ psrad($dst$$XMMRegister, (int)$shift$$constant);
4640 %}
4641 ins_pipe( pipe_slow );
4642 %}
4644 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{
4645 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4646 match(Set dst (RShiftVI src shift));
4647 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %}
4648 ins_encode %{
4649 bool vector256 = false;
4650 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4651 %}
4652 ins_pipe( pipe_slow );
4653 %}
4655 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
4656 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4657 match(Set dst (RShiftVI src shift));
4658 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %}
4659 ins_encode %{
4660 bool vector256 = false;
4661 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4662 %}
4663 ins_pipe( pipe_slow );
4664 %}
4666 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{
4667 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4668 match(Set dst (RShiftVI src shift));
4669 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %}
4670 ins_encode %{
4671 bool vector256 = true;
4672 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4673 %}
4674 ins_pipe( pipe_slow );
4675 %}
4677 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
4678 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4679 match(Set dst (RShiftVI src shift));
4680 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %}
4681 ins_encode %{
4682 bool vector256 = true;
4683 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4684 %}
4685 ins_pipe( pipe_slow );
4686 %}
4688 // There are no longs vector arithmetic right shift instructions.
4691 // --------------------------------- AND --------------------------------------
4693 instruct vand4B(vecS dst, vecS src) %{
4694 predicate(n->as_Vector()->length_in_bytes() == 4);
4695 match(Set dst (AndV dst src));
4696 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %}
4697 ins_encode %{
4698 __ pand($dst$$XMMRegister, $src$$XMMRegister);
4699 %}
4700 ins_pipe( pipe_slow );
4701 %}
4703 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{
4704 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
4705 match(Set dst (AndV src1 src2));
4706 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %}
4707 ins_encode %{
4708 bool vector256 = false;
4709 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4710 %}
4711 ins_pipe( pipe_slow );
4712 %}
4714 instruct vand8B(vecD dst, vecD src) %{
4715 predicate(n->as_Vector()->length_in_bytes() == 8);
4716 match(Set dst (AndV dst src));
4717 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %}
4718 ins_encode %{
4719 __ pand($dst$$XMMRegister, $src$$XMMRegister);
4720 %}
4721 ins_pipe( pipe_slow );
4722 %}
4724 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{
4725 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
4726 match(Set dst (AndV src1 src2));
4727 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %}
4728 ins_encode %{
4729 bool vector256 = false;
4730 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4731 %}
4732 ins_pipe( pipe_slow );
4733 %}
4735 instruct vand16B(vecX dst, vecX src) %{
4736 predicate(n->as_Vector()->length_in_bytes() == 16);
4737 match(Set dst (AndV dst src));
4738 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %}
4739 ins_encode %{
4740 __ pand($dst$$XMMRegister, $src$$XMMRegister);
4741 %}
4742 ins_pipe( pipe_slow );
4743 %}
4745 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{
4746 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4747 match(Set dst (AndV src1 src2));
4748 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %}
4749 ins_encode %{
4750 bool vector256 = false;
4751 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4752 %}
4753 ins_pipe( pipe_slow );
4754 %}
4756 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{
4757 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4758 match(Set dst (AndV src (LoadVector mem)));
4759 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %}
4760 ins_encode %{
4761 bool vector256 = false;
4762 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4763 %}
4764 ins_pipe( pipe_slow );
4765 %}
4767 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{
4768 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4769 match(Set dst (AndV src1 src2));
4770 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %}
4771 ins_encode %{
4772 bool vector256 = true;
4773 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4774 %}
4775 ins_pipe( pipe_slow );
4776 %}
4778 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{
4779 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4780 match(Set dst (AndV src (LoadVector mem)));
4781 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %}
4782 ins_encode %{
4783 bool vector256 = true;
4784 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4785 %}
4786 ins_pipe( pipe_slow );
4787 %}
4789 // --------------------------------- OR ---------------------------------------
4791 instruct vor4B(vecS dst, vecS src) %{
4792 predicate(n->as_Vector()->length_in_bytes() == 4);
4793 match(Set dst (OrV dst src));
4794 format %{ "por $dst,$src\t! or vectors (4 bytes)" %}
4795 ins_encode %{
4796 __ por($dst$$XMMRegister, $src$$XMMRegister);
4797 %}
4798 ins_pipe( pipe_slow );
4799 %}
4801 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{
4802 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
4803 match(Set dst (OrV src1 src2));
4804 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %}
4805 ins_encode %{
4806 bool vector256 = false;
4807 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4808 %}
4809 ins_pipe( pipe_slow );
4810 %}
4812 instruct vor8B(vecD dst, vecD src) %{
4813 predicate(n->as_Vector()->length_in_bytes() == 8);
4814 match(Set dst (OrV dst src));
4815 format %{ "por $dst,$src\t! or vectors (8 bytes)" %}
4816 ins_encode %{
4817 __ por($dst$$XMMRegister, $src$$XMMRegister);
4818 %}
4819 ins_pipe( pipe_slow );
4820 %}
4822 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{
4823 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
4824 match(Set dst (OrV src1 src2));
4825 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %}
4826 ins_encode %{
4827 bool vector256 = false;
4828 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4829 %}
4830 ins_pipe( pipe_slow );
4831 %}
4833 instruct vor16B(vecX dst, vecX src) %{
4834 predicate(n->as_Vector()->length_in_bytes() == 16);
4835 match(Set dst (OrV dst src));
4836 format %{ "por $dst,$src\t! or vectors (16 bytes)" %}
4837 ins_encode %{
4838 __ por($dst$$XMMRegister, $src$$XMMRegister);
4839 %}
4840 ins_pipe( pipe_slow );
4841 %}
4843 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{
4844 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4845 match(Set dst (OrV src1 src2));
4846 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %}
4847 ins_encode %{
4848 bool vector256 = false;
4849 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4850 %}
4851 ins_pipe( pipe_slow );
4852 %}
4854 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{
4855 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4856 match(Set dst (OrV src (LoadVector mem)));
4857 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %}
4858 ins_encode %{
4859 bool vector256 = false;
4860 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4861 %}
4862 ins_pipe( pipe_slow );
4863 %}
4865 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{
4866 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4867 match(Set dst (OrV src1 src2));
4868 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %}
4869 ins_encode %{
4870 bool vector256 = true;
4871 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4872 %}
4873 ins_pipe( pipe_slow );
4874 %}
4876 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{
4877 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4878 match(Set dst (OrV src (LoadVector mem)));
4879 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %}
4880 ins_encode %{
4881 bool vector256 = true;
4882 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4883 %}
4884 ins_pipe( pipe_slow );
4885 %}
4887 // --------------------------------- XOR --------------------------------------
4889 instruct vxor4B(vecS dst, vecS src) %{
4890 predicate(n->as_Vector()->length_in_bytes() == 4);
4891 match(Set dst (XorV dst src));
4892 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %}
4893 ins_encode %{
4894 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
4895 %}
4896 ins_pipe( pipe_slow );
4897 %}
4899 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{
4900 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
4901 match(Set dst (XorV src1 src2));
4902 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %}
4903 ins_encode %{
4904 bool vector256 = false;
4905 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4906 %}
4907 ins_pipe( pipe_slow );
4908 %}
4910 instruct vxor8B(vecD dst, vecD src) %{
4911 predicate(n->as_Vector()->length_in_bytes() == 8);
4912 match(Set dst (XorV dst src));
4913 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %}
4914 ins_encode %{
4915 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
4916 %}
4917 ins_pipe( pipe_slow );
4918 %}
4920 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{
4921 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
4922 match(Set dst (XorV src1 src2));
4923 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %}
4924 ins_encode %{
4925 bool vector256 = false;
4926 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4927 %}
4928 ins_pipe( pipe_slow );
4929 %}
4931 instruct vxor16B(vecX dst, vecX src) %{
4932 predicate(n->as_Vector()->length_in_bytes() == 16);
4933 match(Set dst (XorV dst src));
4934 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %}
4935 ins_encode %{
4936 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
4937 %}
4938 ins_pipe( pipe_slow );
4939 %}
4941 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{
4942 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4943 match(Set dst (XorV src1 src2));
4944 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %}
4945 ins_encode %{
4946 bool vector256 = false;
4947 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4948 %}
4949 ins_pipe( pipe_slow );
4950 %}
4952 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{
4953 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4954 match(Set dst (XorV src (LoadVector mem)));
4955 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %}
4956 ins_encode %{
4957 bool vector256 = false;
4958 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4959 %}
4960 ins_pipe( pipe_slow );
4961 %}
4963 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{
4964 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4965 match(Set dst (XorV src1 src2));
4966 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %}
4967 ins_encode %{
4968 bool vector256 = true;
4969 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4970 %}
4971 ins_pipe( pipe_slow );
4972 %}
4974 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{
4975 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4976 match(Set dst (XorV src (LoadVector mem)));
4977 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %}
4978 ins_encode %{
4979 bool vector256 = true;
4980 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4981 %}
4982 ins_pipe( pipe_slow );
4983 %}