Tue, 02 Sep 2014 12:48:45 -0700
8055494: Add C2 x86 intrinsic for BigInteger::multiplyToLen() method
Summary: Add new C2 intrinsic for BigInteger::multiplyToLen() on x86 in 64-bit VM.
Reviewed-by: roland
1 //
2 // Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
25 // X86 Common Architecture Description File
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // archtecture.
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
62 // XMM registers. 256-bit registers or 8 words each, labeled (a)-h.
63 // Word a in each register holds a Float, words ab hold a Double.
64 // The whole registers are used in SSE4.2 version intrinsics,
65 // array copy stubs and superword operations (see UseSSE42Intrinsics,
66 // UseXMMForArrayCopy and UseSuperword flags).
67 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX).
68 // Linux ABI: No register preserved across function calls
69 // XMM0-XMM7 might hold parameters
70 // Windows ABI: XMM6-XMM15 preserved across function calls
71 // XMM0-XMM3 might hold parameters
73 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
74 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
75 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
76 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
77 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
78 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
79 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
80 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
82 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
83 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
84 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
85 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
86 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
87 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
88 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
89 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
91 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
92 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
93 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
94 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
95 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
96 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
97 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
98 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
100 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
101 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
102 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
103 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
104 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
105 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
106 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
107 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
109 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
110 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
111 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
112 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
113 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
114 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
115 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
116 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
118 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
119 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
120 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
121 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
122 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
123 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
124 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
125 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
127 #ifdef _WIN64
129 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg());
130 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1));
131 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2));
132 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3));
133 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4));
134 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5));
135 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6));
136 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7));
138 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg());
139 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1));
140 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2));
141 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3));
142 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4));
143 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5));
144 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6));
145 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7));
147 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg());
148 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1));
149 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2));
150 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3));
151 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4));
152 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5));
153 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6));
154 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7));
156 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg());
157 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1));
158 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2));
159 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3));
160 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4));
161 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5));
162 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6));
163 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7));
165 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
166 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1));
167 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2));
168 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3));
169 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4));
170 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5));
171 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6));
172 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7));
174 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
175 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1));
176 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2));
177 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3));
178 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4));
179 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5));
180 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6));
181 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7));
183 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
184 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1));
185 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2));
186 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3));
187 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4));
188 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5));
189 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6));
190 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7));
192 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
193 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1));
194 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2));
195 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3));
196 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4));
197 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5));
198 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6));
199 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7));
201 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
202 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1));
203 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2));
204 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3));
205 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4));
206 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5));
207 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6));
208 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7));
210 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
211 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1));
212 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2));
213 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3));
214 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4));
215 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5));
216 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6));
217 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7));
219 #else // _WIN64
221 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
222 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
223 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
224 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
225 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
226 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
227 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
228 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
230 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
231 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
232 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
233 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
234 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
235 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
236 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
237 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
239 #ifdef _LP64
241 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
242 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
243 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
244 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
245 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
246 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
247 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
248 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
250 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
251 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
252 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
253 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
254 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
255 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
256 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
257 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
259 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
260 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
261 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
262 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
263 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
264 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
265 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
266 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
268 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
269 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
270 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
271 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
272 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
273 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
274 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
275 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
277 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
278 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
279 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
280 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
281 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
282 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
283 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
284 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
286 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
287 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
288 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
289 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
290 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
291 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
292 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
293 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
295 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
296 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
297 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
298 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
299 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
300 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
301 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
302 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
304 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
305 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
306 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
307 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
308 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
309 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
310 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
311 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
313 #endif // _LP64
315 #endif // _WIN64
317 #ifdef _LP64
318 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
319 #else
320 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
321 #endif // _LP64
323 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
324 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
325 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
326 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
327 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
328 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
329 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
330 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h
331 #ifdef _LP64
332 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
333 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
334 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
335 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
336 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
337 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
338 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
339 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
340 #endif
341 );
343 // flags allocation class should be last.
344 alloc_class chunk2(RFLAGS);
346 // Singleton class for condition codes
347 reg_class int_flags(RFLAGS);
349 // Class for all float registers
350 reg_class float_reg(XMM0,
351 XMM1,
352 XMM2,
353 XMM3,
354 XMM4,
355 XMM5,
356 XMM6,
357 XMM7
358 #ifdef _LP64
359 ,XMM8,
360 XMM9,
361 XMM10,
362 XMM11,
363 XMM12,
364 XMM13,
365 XMM14,
366 XMM15
367 #endif
368 );
370 // Class for all double registers
371 reg_class double_reg(XMM0, XMM0b,
372 XMM1, XMM1b,
373 XMM2, XMM2b,
374 XMM3, XMM3b,
375 XMM4, XMM4b,
376 XMM5, XMM5b,
377 XMM6, XMM6b,
378 XMM7, XMM7b
379 #ifdef _LP64
380 ,XMM8, XMM8b,
381 XMM9, XMM9b,
382 XMM10, XMM10b,
383 XMM11, XMM11b,
384 XMM12, XMM12b,
385 XMM13, XMM13b,
386 XMM14, XMM14b,
387 XMM15, XMM15b
388 #endif
389 );
391 // Class for all 32bit vector registers
392 reg_class vectors_reg(XMM0,
393 XMM1,
394 XMM2,
395 XMM3,
396 XMM4,
397 XMM5,
398 XMM6,
399 XMM7
400 #ifdef _LP64
401 ,XMM8,
402 XMM9,
403 XMM10,
404 XMM11,
405 XMM12,
406 XMM13,
407 XMM14,
408 XMM15
409 #endif
410 );
412 // Class for all 64bit vector registers
413 reg_class vectord_reg(XMM0, XMM0b,
414 XMM1, XMM1b,
415 XMM2, XMM2b,
416 XMM3, XMM3b,
417 XMM4, XMM4b,
418 XMM5, XMM5b,
419 XMM6, XMM6b,
420 XMM7, XMM7b
421 #ifdef _LP64
422 ,XMM8, XMM8b,
423 XMM9, XMM9b,
424 XMM10, XMM10b,
425 XMM11, XMM11b,
426 XMM12, XMM12b,
427 XMM13, XMM13b,
428 XMM14, XMM14b,
429 XMM15, XMM15b
430 #endif
431 );
433 // Class for all 128bit vector registers
434 reg_class vectorx_reg(XMM0, XMM0b, XMM0c, XMM0d,
435 XMM1, XMM1b, XMM1c, XMM1d,
436 XMM2, XMM2b, XMM2c, XMM2d,
437 XMM3, XMM3b, XMM3c, XMM3d,
438 XMM4, XMM4b, XMM4c, XMM4d,
439 XMM5, XMM5b, XMM5c, XMM5d,
440 XMM6, XMM6b, XMM6c, XMM6d,
441 XMM7, XMM7b, XMM7c, XMM7d
442 #ifdef _LP64
443 ,XMM8, XMM8b, XMM8c, XMM8d,
444 XMM9, XMM9b, XMM9c, XMM9d,
445 XMM10, XMM10b, XMM10c, XMM10d,
446 XMM11, XMM11b, XMM11c, XMM11d,
447 XMM12, XMM12b, XMM12c, XMM12d,
448 XMM13, XMM13b, XMM13c, XMM13d,
449 XMM14, XMM14b, XMM14c, XMM14d,
450 XMM15, XMM15b, XMM15c, XMM15d
451 #endif
452 );
454 // Class for all 256bit vector registers
455 reg_class vectory_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
456 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
457 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
458 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
459 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
460 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
461 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
462 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h
463 #ifdef _LP64
464 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
465 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
466 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
467 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
468 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
469 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
470 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
471 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
472 #endif
473 );
475 %}
478 //----------SOURCE BLOCK-------------------------------------------------------
479 // This is a block of C++ code which provides values, functions, and
480 // definitions necessary in the rest of the architecture description
482 source_hpp %{
483 // Header information of the source block.
484 // Method declarations/definitions which are used outside
485 // the ad-scope can conveniently be defined here.
486 //
487 // To keep related declarations/definitions/uses close together,
488 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
490 class CallStubImpl {
492 //--------------------------------------------------------------
493 //---< Used for optimization in Compile::shorten_branches >---
494 //--------------------------------------------------------------
496 public:
497 // Size of call trampoline stub.
498 static uint size_call_trampoline() {
499 return 0; // no call trampolines on this platform
500 }
502 // number of relocations needed by a call trampoline stub
503 static uint reloc_call_trampoline() {
504 return 0; // no call trampolines on this platform
505 }
506 };
508 class HandlerImpl {
510 public:
512 static int emit_exception_handler(CodeBuffer &cbuf);
513 static int emit_deopt_handler(CodeBuffer& cbuf);
515 static uint size_exception_handler() {
516 // NativeCall instruction size is the same as NativeJump.
517 // exception handler starts out as jump and can be patched to
518 // a call be deoptimization. (4932387)
519 // Note that this value is also credited (in output.cpp) to
520 // the size of the code section.
521 return NativeJump::instruction_size;
522 }
524 #ifdef _LP64
525 static uint size_deopt_handler() {
526 // three 5 byte instructions
527 return 15;
528 }
529 #else
530 static uint size_deopt_handler() {
531 // NativeCall instruction size is the same as NativeJump.
532 // exception handler starts out as jump and can be patched to
533 // a call be deoptimization. (4932387)
534 // Note that this value is also credited (in output.cpp) to
535 // the size of the code section.
536 return 5 + NativeJump::instruction_size; // pushl(); jmp;
537 }
538 #endif
539 };
541 %} // end source_hpp
543 source %{
545 // Emit exception handler code.
546 // Stuff framesize into a register and call a VM stub routine.
547 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
549 // Note that the code buffer's insts_mark is always relative to insts.
550 // That's why we must use the macroassembler to generate a handler.
551 MacroAssembler _masm(&cbuf);
552 address base = __ start_a_stub(size_exception_handler());
553 if (base == NULL) return 0; // CodeBuffer::expand failed
554 int offset = __ offset();
555 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
556 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
557 __ end_a_stub();
558 return offset;
559 }
561 // Emit deopt handler code.
562 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
564 // Note that the code buffer's insts_mark is always relative to insts.
565 // That's why we must use the macroassembler to generate a handler.
566 MacroAssembler _masm(&cbuf);
567 address base = __ start_a_stub(size_deopt_handler());
568 if (base == NULL) return 0; // CodeBuffer::expand failed
569 int offset = __ offset();
571 #ifdef _LP64
572 address the_pc = (address) __ pc();
573 Label next;
574 // push a "the_pc" on the stack without destroying any registers
575 // as they all may be live.
577 // push address of "next"
578 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
579 __ bind(next);
580 // adjust it so it matches "the_pc"
581 __ subptr(Address(rsp, 0), __ offset() - offset);
582 #else
583 InternalAddress here(__ pc());
584 __ pushptr(here.addr());
585 #endif
587 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
588 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
589 __ end_a_stub();
590 return offset;
591 }
594 //=============================================================================
596 // Float masks come from different places depending on platform.
597 #ifdef _LP64
598 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
599 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
600 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
601 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
602 #else
603 static address float_signmask() { return (address)float_signmask_pool; }
604 static address float_signflip() { return (address)float_signflip_pool; }
605 static address double_signmask() { return (address)double_signmask_pool; }
606 static address double_signflip() { return (address)double_signflip_pool; }
607 #endif
610 const bool Matcher::match_rule_supported(int opcode) {
611 if (!has_match_rule(opcode))
612 return false;
614 switch (opcode) {
615 case Op_PopCountI:
616 case Op_PopCountL:
617 if (!UsePopCountInstruction)
618 return false;
619 break;
620 case Op_MulVI:
621 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
622 return false;
623 break;
624 case Op_CompareAndSwapL:
625 #ifdef _LP64
626 case Op_CompareAndSwapP:
627 #endif
628 if (!VM_Version::supports_cx8())
629 return false;
630 break;
631 }
633 return true; // Per default match rules are supported.
634 }
636 // Max vector size in bytes. 0 if not supported.
637 const int Matcher::vector_width_in_bytes(BasicType bt) {
638 assert(is_java_primitive(bt), "only primitive type vectors");
639 if (UseSSE < 2) return 0;
640 // SSE2 supports 128bit vectors for all types.
641 // AVX2 supports 256bit vectors for all types.
642 int size = (UseAVX > 1) ? 32 : 16;
643 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
644 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
645 size = 32;
646 // Use flag to limit vector size.
647 size = MIN2(size,(int)MaxVectorSize);
648 // Minimum 2 values in vector (or 4 for bytes).
649 switch (bt) {
650 case T_DOUBLE:
651 case T_LONG:
652 if (size < 16) return 0;
653 case T_FLOAT:
654 case T_INT:
655 if (size < 8) return 0;
656 case T_BOOLEAN:
657 case T_BYTE:
658 case T_CHAR:
659 case T_SHORT:
660 if (size < 4) return 0;
661 break;
662 default:
663 ShouldNotReachHere();
664 }
665 return size;
666 }
668 // Limits on vector size (number of elements) loaded into vector.
669 const int Matcher::max_vector_size(const BasicType bt) {
670 return vector_width_in_bytes(bt)/type2aelembytes(bt);
671 }
672 const int Matcher::min_vector_size(const BasicType bt) {
673 int max_size = max_vector_size(bt);
674 // Min size which can be loaded into vector is 4 bytes.
675 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
676 return MIN2(size,max_size);
677 }
679 // Vector ideal reg corresponding to specidied size in bytes
680 const int Matcher::vector_ideal_reg(int size) {
681 assert(MaxVectorSize >= size, "");
682 switch(size) {
683 case 4: return Op_VecS;
684 case 8: return Op_VecD;
685 case 16: return Op_VecX;
686 case 32: return Op_VecY;
687 }
688 ShouldNotReachHere();
689 return 0;
690 }
692 // Only lowest bits of xmm reg are used for vector shift count.
693 const int Matcher::vector_shift_count_ideal_reg(int size) {
694 return Op_VecS;
695 }
697 // x86 supports misaligned vectors store/load.
698 const bool Matcher::misaligned_vectors_ok() {
699 return !AlignVector; // can be changed by flag
700 }
702 // x86 AES instructions are compatible with SunJCE expanded
703 // keys, hence we do not need to pass the original key to stubs
704 const bool Matcher::pass_original_key_for_aes() {
705 return false;
706 }
708 // Helper methods for MachSpillCopyNode::implementation().
709 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
710 int src_hi, int dst_hi, uint ireg, outputStream* st) {
711 // In 64-bit VM size calculation is very complex. Emitting instructions
712 // into scratch buffer is used to get size in 64-bit VM.
713 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
714 assert(ireg == Op_VecS || // 32bit vector
715 (src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
716 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
717 "no non-adjacent vector moves" );
718 if (cbuf) {
719 MacroAssembler _masm(cbuf);
720 int offset = __ offset();
721 switch (ireg) {
722 case Op_VecS: // copy whole register
723 case Op_VecD:
724 case Op_VecX:
725 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
726 break;
727 case Op_VecY:
728 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
729 break;
730 default:
731 ShouldNotReachHere();
732 }
733 int size = __ offset() - offset;
734 #ifdef ASSERT
735 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
736 assert(!do_size || size == 4, "incorrect size calculattion");
737 #endif
738 return size;
739 #ifndef PRODUCT
740 } else if (!do_size) {
741 switch (ireg) {
742 case Op_VecS:
743 case Op_VecD:
744 case Op_VecX:
745 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
746 break;
747 case Op_VecY:
748 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
749 break;
750 default:
751 ShouldNotReachHere();
752 }
753 #endif
754 }
755 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
756 return 4;
757 }
759 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
760 int stack_offset, int reg, uint ireg, outputStream* st) {
761 // In 64-bit VM size calculation is very complex. Emitting instructions
762 // into scratch buffer is used to get size in 64-bit VM.
763 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
764 if (cbuf) {
765 MacroAssembler _masm(cbuf);
766 int offset = __ offset();
767 if (is_load) {
768 switch (ireg) {
769 case Op_VecS:
770 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
771 break;
772 case Op_VecD:
773 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
774 break;
775 case Op_VecX:
776 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
777 break;
778 case Op_VecY:
779 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
780 break;
781 default:
782 ShouldNotReachHere();
783 }
784 } else { // store
785 switch (ireg) {
786 case Op_VecS:
787 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
788 break;
789 case Op_VecD:
790 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
791 break;
792 case Op_VecX:
793 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
794 break;
795 case Op_VecY:
796 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
797 break;
798 default:
799 ShouldNotReachHere();
800 }
801 }
802 int size = __ offset() - offset;
803 #ifdef ASSERT
804 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
805 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
806 assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
807 #endif
808 return size;
809 #ifndef PRODUCT
810 } else if (!do_size) {
811 if (is_load) {
812 switch (ireg) {
813 case Op_VecS:
814 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
815 break;
816 case Op_VecD:
817 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
818 break;
819 case Op_VecX:
820 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
821 break;
822 case Op_VecY:
823 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
824 break;
825 default:
826 ShouldNotReachHere();
827 }
828 } else { // store
829 switch (ireg) {
830 case Op_VecS:
831 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
832 break;
833 case Op_VecD:
834 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
835 break;
836 case Op_VecX:
837 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
838 break;
839 case Op_VecY:
840 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
841 break;
842 default:
843 ShouldNotReachHere();
844 }
845 }
846 #endif
847 }
848 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
849 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
850 return 5+offset_size;
851 }
853 static inline jfloat replicate4_imm(int con, int width) {
854 // Load a constant of "width" (in bytes) and replicate it to fill 32bit.
855 assert(width == 1 || width == 2, "only byte or short types here");
856 int bit_width = width * 8;
857 jint val = con;
858 val &= (1 << bit_width) - 1; // mask off sign bits
859 while(bit_width < 32) {
860 val |= (val << bit_width);
861 bit_width <<= 1;
862 }
863 jfloat fval = *((jfloat*) &val); // coerce to float type
864 return fval;
865 }
867 static inline jdouble replicate8_imm(int con, int width) {
868 // Load a constant of "width" (in bytes) and replicate it to fill 64bit.
869 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here");
870 int bit_width = width * 8;
871 jlong val = con;
872 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits
873 while(bit_width < 64) {
874 val |= (val << bit_width);
875 bit_width <<= 1;
876 }
877 jdouble dval = *((jdouble*) &val); // coerce to double type
878 return dval;
879 }
881 #ifndef PRODUCT
882 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
883 st->print("nop \t# %d bytes pad for loops and calls", _count);
884 }
885 #endif
887 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
888 MacroAssembler _masm(&cbuf);
889 __ nop(_count);
890 }
892 uint MachNopNode::size(PhaseRegAlloc*) const {
893 return _count;
894 }
896 #ifndef PRODUCT
897 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
898 st->print("# breakpoint");
899 }
900 #endif
902 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
903 MacroAssembler _masm(&cbuf);
904 __ int3();
905 }
907 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
908 return MachNode::size(ra_);
909 }
911 %}
913 encode %{
915 enc_class preserve_SP %{
916 debug_only(int off0 = cbuf.insts_size());
917 MacroAssembler _masm(&cbuf);
918 // RBP is preserved across all calls, even compiled calls.
919 // Use it to preserve RSP in places where the callee might change the SP.
920 __ movptr(rbp_mh_SP_save, rsp);
921 debug_only(int off1 = cbuf.insts_size());
922 assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
923 %}
925 enc_class restore_SP %{
926 MacroAssembler _masm(&cbuf);
927 __ movptr(rsp, rbp_mh_SP_save);
928 %}
930 enc_class call_epilog %{
931 if (VerifyStackAtCalls) {
932 // Check that stack depth is unchanged: find majik cookie on stack
933 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
934 MacroAssembler _masm(&cbuf);
935 Label L;
936 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
937 __ jccb(Assembler::equal, L);
938 // Die if stack mismatch
939 __ int3();
940 __ bind(L);
941 }
942 %}
944 %}
947 //----------OPERANDS-----------------------------------------------------------
948 // Operand definitions must precede instruction definitions for correct parsing
949 // in the ADLC because operands constitute user defined types which are used in
950 // instruction definitions.
952 // Vectors
953 operand vecS() %{
954 constraint(ALLOC_IN_RC(vectors_reg));
955 match(VecS);
957 format %{ %}
958 interface(REG_INTER);
959 %}
961 operand vecD() %{
962 constraint(ALLOC_IN_RC(vectord_reg));
963 match(VecD);
965 format %{ %}
966 interface(REG_INTER);
967 %}
969 operand vecX() %{
970 constraint(ALLOC_IN_RC(vectorx_reg));
971 match(VecX);
973 format %{ %}
974 interface(REG_INTER);
975 %}
977 operand vecY() %{
978 constraint(ALLOC_IN_RC(vectory_reg));
979 match(VecY);
981 format %{ %}
982 interface(REG_INTER);
983 %}
986 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
988 // ============================================================================
990 instruct ShouldNotReachHere() %{
991 match(Halt);
992 format %{ "int3\t# ShouldNotReachHere" %}
993 ins_encode %{
994 __ int3();
995 %}
996 ins_pipe(pipe_slow);
997 %}
999 // ============================================================================
1001 instruct addF_reg(regF dst, regF src) %{
1002 predicate((UseSSE>=1) && (UseAVX == 0));
1003 match(Set dst (AddF dst src));
1005 format %{ "addss $dst, $src" %}
1006 ins_cost(150);
1007 ins_encode %{
1008 __ addss($dst$$XMMRegister, $src$$XMMRegister);
1009 %}
1010 ins_pipe(pipe_slow);
1011 %}
1013 instruct addF_mem(regF dst, memory src) %{
1014 predicate((UseSSE>=1) && (UseAVX == 0));
1015 match(Set dst (AddF dst (LoadF src)));
1017 format %{ "addss $dst, $src" %}
1018 ins_cost(150);
1019 ins_encode %{
1020 __ addss($dst$$XMMRegister, $src$$Address);
1021 %}
1022 ins_pipe(pipe_slow);
1023 %}
1025 instruct addF_imm(regF dst, immF con) %{
1026 predicate((UseSSE>=1) && (UseAVX == 0));
1027 match(Set dst (AddF dst con));
1028 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
1029 ins_cost(150);
1030 ins_encode %{
1031 __ addss($dst$$XMMRegister, $constantaddress($con));
1032 %}
1033 ins_pipe(pipe_slow);
1034 %}
1036 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
1037 predicate(UseAVX > 0);
1038 match(Set dst (AddF src1 src2));
1040 format %{ "vaddss $dst, $src1, $src2" %}
1041 ins_cost(150);
1042 ins_encode %{
1043 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1044 %}
1045 ins_pipe(pipe_slow);
1046 %}
1048 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
1049 predicate(UseAVX > 0);
1050 match(Set dst (AddF src1 (LoadF src2)));
1052 format %{ "vaddss $dst, $src1, $src2" %}
1053 ins_cost(150);
1054 ins_encode %{
1055 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1056 %}
1057 ins_pipe(pipe_slow);
1058 %}
1060 instruct addF_reg_imm(regF dst, regF src, immF con) %{
1061 predicate(UseAVX > 0);
1062 match(Set dst (AddF src con));
1064 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
1065 ins_cost(150);
1066 ins_encode %{
1067 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1068 %}
1069 ins_pipe(pipe_slow);
1070 %}
1072 instruct addD_reg(regD dst, regD src) %{
1073 predicate((UseSSE>=2) && (UseAVX == 0));
1074 match(Set dst (AddD dst src));
1076 format %{ "addsd $dst, $src" %}
1077 ins_cost(150);
1078 ins_encode %{
1079 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
1080 %}
1081 ins_pipe(pipe_slow);
1082 %}
1084 instruct addD_mem(regD dst, memory src) %{
1085 predicate((UseSSE>=2) && (UseAVX == 0));
1086 match(Set dst (AddD dst (LoadD src)));
1088 format %{ "addsd $dst, $src" %}
1089 ins_cost(150);
1090 ins_encode %{
1091 __ addsd($dst$$XMMRegister, $src$$Address);
1092 %}
1093 ins_pipe(pipe_slow);
1094 %}
1096 instruct addD_imm(regD dst, immD con) %{
1097 predicate((UseSSE>=2) && (UseAVX == 0));
1098 match(Set dst (AddD dst con));
1099 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
1100 ins_cost(150);
1101 ins_encode %{
1102 __ addsd($dst$$XMMRegister, $constantaddress($con));
1103 %}
1104 ins_pipe(pipe_slow);
1105 %}
1107 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
1108 predicate(UseAVX > 0);
1109 match(Set dst (AddD src1 src2));
1111 format %{ "vaddsd $dst, $src1, $src2" %}
1112 ins_cost(150);
1113 ins_encode %{
1114 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1115 %}
1116 ins_pipe(pipe_slow);
1117 %}
1119 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
1120 predicate(UseAVX > 0);
1121 match(Set dst (AddD src1 (LoadD src2)));
1123 format %{ "vaddsd $dst, $src1, $src2" %}
1124 ins_cost(150);
1125 ins_encode %{
1126 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1127 %}
1128 ins_pipe(pipe_slow);
1129 %}
1131 instruct addD_reg_imm(regD dst, regD src, immD con) %{
1132 predicate(UseAVX > 0);
1133 match(Set dst (AddD src con));
1135 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
1136 ins_cost(150);
1137 ins_encode %{
1138 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1139 %}
1140 ins_pipe(pipe_slow);
1141 %}
1143 instruct subF_reg(regF dst, regF src) %{
1144 predicate((UseSSE>=1) && (UseAVX == 0));
1145 match(Set dst (SubF dst src));
1147 format %{ "subss $dst, $src" %}
1148 ins_cost(150);
1149 ins_encode %{
1150 __ subss($dst$$XMMRegister, $src$$XMMRegister);
1151 %}
1152 ins_pipe(pipe_slow);
1153 %}
1155 instruct subF_mem(regF dst, memory src) %{
1156 predicate((UseSSE>=1) && (UseAVX == 0));
1157 match(Set dst (SubF dst (LoadF src)));
1159 format %{ "subss $dst, $src" %}
1160 ins_cost(150);
1161 ins_encode %{
1162 __ subss($dst$$XMMRegister, $src$$Address);
1163 %}
1164 ins_pipe(pipe_slow);
1165 %}
1167 instruct subF_imm(regF dst, immF con) %{
1168 predicate((UseSSE>=1) && (UseAVX == 0));
1169 match(Set dst (SubF dst con));
1170 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
1171 ins_cost(150);
1172 ins_encode %{
1173 __ subss($dst$$XMMRegister, $constantaddress($con));
1174 %}
1175 ins_pipe(pipe_slow);
1176 %}
1178 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
1179 predicate(UseAVX > 0);
1180 match(Set dst (SubF src1 src2));
1182 format %{ "vsubss $dst, $src1, $src2" %}
1183 ins_cost(150);
1184 ins_encode %{
1185 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1186 %}
1187 ins_pipe(pipe_slow);
1188 %}
1190 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
1191 predicate(UseAVX > 0);
1192 match(Set dst (SubF src1 (LoadF src2)));
1194 format %{ "vsubss $dst, $src1, $src2" %}
1195 ins_cost(150);
1196 ins_encode %{
1197 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1198 %}
1199 ins_pipe(pipe_slow);
1200 %}
1202 instruct subF_reg_imm(regF dst, regF src, immF con) %{
1203 predicate(UseAVX > 0);
1204 match(Set dst (SubF src con));
1206 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
1207 ins_cost(150);
1208 ins_encode %{
1209 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1210 %}
1211 ins_pipe(pipe_slow);
1212 %}
1214 instruct subD_reg(regD dst, regD src) %{
1215 predicate((UseSSE>=2) && (UseAVX == 0));
1216 match(Set dst (SubD dst src));
1218 format %{ "subsd $dst, $src" %}
1219 ins_cost(150);
1220 ins_encode %{
1221 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
1222 %}
1223 ins_pipe(pipe_slow);
1224 %}
1226 instruct subD_mem(regD dst, memory src) %{
1227 predicate((UseSSE>=2) && (UseAVX == 0));
1228 match(Set dst (SubD dst (LoadD src)));
1230 format %{ "subsd $dst, $src" %}
1231 ins_cost(150);
1232 ins_encode %{
1233 __ subsd($dst$$XMMRegister, $src$$Address);
1234 %}
1235 ins_pipe(pipe_slow);
1236 %}
1238 instruct subD_imm(regD dst, immD con) %{
1239 predicate((UseSSE>=2) && (UseAVX == 0));
1240 match(Set dst (SubD dst con));
1241 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
1242 ins_cost(150);
1243 ins_encode %{
1244 __ subsd($dst$$XMMRegister, $constantaddress($con));
1245 %}
1246 ins_pipe(pipe_slow);
1247 %}
1249 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
1250 predicate(UseAVX > 0);
1251 match(Set dst (SubD src1 src2));
1253 format %{ "vsubsd $dst, $src1, $src2" %}
1254 ins_cost(150);
1255 ins_encode %{
1256 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1257 %}
1258 ins_pipe(pipe_slow);
1259 %}
1261 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
1262 predicate(UseAVX > 0);
1263 match(Set dst (SubD src1 (LoadD src2)));
1265 format %{ "vsubsd $dst, $src1, $src2" %}
1266 ins_cost(150);
1267 ins_encode %{
1268 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1269 %}
1270 ins_pipe(pipe_slow);
1271 %}
1273 instruct subD_reg_imm(regD dst, regD src, immD con) %{
1274 predicate(UseAVX > 0);
1275 match(Set dst (SubD src con));
1277 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
1278 ins_cost(150);
1279 ins_encode %{
1280 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1281 %}
1282 ins_pipe(pipe_slow);
1283 %}
1285 instruct mulF_reg(regF dst, regF src) %{
1286 predicate((UseSSE>=1) && (UseAVX == 0));
1287 match(Set dst (MulF dst src));
1289 format %{ "mulss $dst, $src" %}
1290 ins_cost(150);
1291 ins_encode %{
1292 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
1293 %}
1294 ins_pipe(pipe_slow);
1295 %}
1297 instruct mulF_mem(regF dst, memory src) %{
1298 predicate((UseSSE>=1) && (UseAVX == 0));
1299 match(Set dst (MulF dst (LoadF src)));
1301 format %{ "mulss $dst, $src" %}
1302 ins_cost(150);
1303 ins_encode %{
1304 __ mulss($dst$$XMMRegister, $src$$Address);
1305 %}
1306 ins_pipe(pipe_slow);
1307 %}
1309 instruct mulF_imm(regF dst, immF con) %{
1310 predicate((UseSSE>=1) && (UseAVX == 0));
1311 match(Set dst (MulF dst con));
1312 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
1313 ins_cost(150);
1314 ins_encode %{
1315 __ mulss($dst$$XMMRegister, $constantaddress($con));
1316 %}
1317 ins_pipe(pipe_slow);
1318 %}
1320 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
1321 predicate(UseAVX > 0);
1322 match(Set dst (MulF src1 src2));
1324 format %{ "vmulss $dst, $src1, $src2" %}
1325 ins_cost(150);
1326 ins_encode %{
1327 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1328 %}
1329 ins_pipe(pipe_slow);
1330 %}
1332 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
1333 predicate(UseAVX > 0);
1334 match(Set dst (MulF src1 (LoadF src2)));
1336 format %{ "vmulss $dst, $src1, $src2" %}
1337 ins_cost(150);
1338 ins_encode %{
1339 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1340 %}
1341 ins_pipe(pipe_slow);
1342 %}
1344 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
1345 predicate(UseAVX > 0);
1346 match(Set dst (MulF src con));
1348 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
1349 ins_cost(150);
1350 ins_encode %{
1351 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1352 %}
1353 ins_pipe(pipe_slow);
1354 %}
1356 instruct mulD_reg(regD dst, regD src) %{
1357 predicate((UseSSE>=2) && (UseAVX == 0));
1358 match(Set dst (MulD dst src));
1360 format %{ "mulsd $dst, $src" %}
1361 ins_cost(150);
1362 ins_encode %{
1363 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
1364 %}
1365 ins_pipe(pipe_slow);
1366 %}
1368 instruct mulD_mem(regD dst, memory src) %{
1369 predicate((UseSSE>=2) && (UseAVX == 0));
1370 match(Set dst (MulD dst (LoadD src)));
1372 format %{ "mulsd $dst, $src" %}
1373 ins_cost(150);
1374 ins_encode %{
1375 __ mulsd($dst$$XMMRegister, $src$$Address);
1376 %}
1377 ins_pipe(pipe_slow);
1378 %}
1380 instruct mulD_imm(regD dst, immD con) %{
1381 predicate((UseSSE>=2) && (UseAVX == 0));
1382 match(Set dst (MulD dst con));
1383 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
1384 ins_cost(150);
1385 ins_encode %{
1386 __ mulsd($dst$$XMMRegister, $constantaddress($con));
1387 %}
1388 ins_pipe(pipe_slow);
1389 %}
1391 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
1392 predicate(UseAVX > 0);
1393 match(Set dst (MulD src1 src2));
1395 format %{ "vmulsd $dst, $src1, $src2" %}
1396 ins_cost(150);
1397 ins_encode %{
1398 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1399 %}
1400 ins_pipe(pipe_slow);
1401 %}
1403 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
1404 predicate(UseAVX > 0);
1405 match(Set dst (MulD src1 (LoadD src2)));
1407 format %{ "vmulsd $dst, $src1, $src2" %}
1408 ins_cost(150);
1409 ins_encode %{
1410 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1411 %}
1412 ins_pipe(pipe_slow);
1413 %}
1415 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
1416 predicate(UseAVX > 0);
1417 match(Set dst (MulD src con));
1419 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
1420 ins_cost(150);
1421 ins_encode %{
1422 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1423 %}
1424 ins_pipe(pipe_slow);
1425 %}
1427 instruct divF_reg(regF dst, regF src) %{
1428 predicate((UseSSE>=1) && (UseAVX == 0));
1429 match(Set dst (DivF dst src));
1431 format %{ "divss $dst, $src" %}
1432 ins_cost(150);
1433 ins_encode %{
1434 __ divss($dst$$XMMRegister, $src$$XMMRegister);
1435 %}
1436 ins_pipe(pipe_slow);
1437 %}
1439 instruct divF_mem(regF dst, memory src) %{
1440 predicate((UseSSE>=1) && (UseAVX == 0));
1441 match(Set dst (DivF dst (LoadF src)));
1443 format %{ "divss $dst, $src" %}
1444 ins_cost(150);
1445 ins_encode %{
1446 __ divss($dst$$XMMRegister, $src$$Address);
1447 %}
1448 ins_pipe(pipe_slow);
1449 %}
1451 instruct divF_imm(regF dst, immF con) %{
1452 predicate((UseSSE>=1) && (UseAVX == 0));
1453 match(Set dst (DivF dst con));
1454 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
1455 ins_cost(150);
1456 ins_encode %{
1457 __ divss($dst$$XMMRegister, $constantaddress($con));
1458 %}
1459 ins_pipe(pipe_slow);
1460 %}
1462 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
1463 predicate(UseAVX > 0);
1464 match(Set dst (DivF src1 src2));
1466 format %{ "vdivss $dst, $src1, $src2" %}
1467 ins_cost(150);
1468 ins_encode %{
1469 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1470 %}
1471 ins_pipe(pipe_slow);
1472 %}
1474 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
1475 predicate(UseAVX > 0);
1476 match(Set dst (DivF src1 (LoadF src2)));
1478 format %{ "vdivss $dst, $src1, $src2" %}
1479 ins_cost(150);
1480 ins_encode %{
1481 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1482 %}
1483 ins_pipe(pipe_slow);
1484 %}
1486 instruct divF_reg_imm(regF dst, regF src, immF con) %{
1487 predicate(UseAVX > 0);
1488 match(Set dst (DivF src con));
1490 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
1491 ins_cost(150);
1492 ins_encode %{
1493 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1494 %}
1495 ins_pipe(pipe_slow);
1496 %}
1498 instruct divD_reg(regD dst, regD src) %{
1499 predicate((UseSSE>=2) && (UseAVX == 0));
1500 match(Set dst (DivD dst src));
1502 format %{ "divsd $dst, $src" %}
1503 ins_cost(150);
1504 ins_encode %{
1505 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
1506 %}
1507 ins_pipe(pipe_slow);
1508 %}
1510 instruct divD_mem(regD dst, memory src) %{
1511 predicate((UseSSE>=2) && (UseAVX == 0));
1512 match(Set dst (DivD dst (LoadD src)));
1514 format %{ "divsd $dst, $src" %}
1515 ins_cost(150);
1516 ins_encode %{
1517 __ divsd($dst$$XMMRegister, $src$$Address);
1518 %}
1519 ins_pipe(pipe_slow);
1520 %}
1522 instruct divD_imm(regD dst, immD con) %{
1523 predicate((UseSSE>=2) && (UseAVX == 0));
1524 match(Set dst (DivD dst con));
1525 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
1526 ins_cost(150);
1527 ins_encode %{
1528 __ divsd($dst$$XMMRegister, $constantaddress($con));
1529 %}
1530 ins_pipe(pipe_slow);
1531 %}
1533 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
1534 predicate(UseAVX > 0);
1535 match(Set dst (DivD src1 src2));
1537 format %{ "vdivsd $dst, $src1, $src2" %}
1538 ins_cost(150);
1539 ins_encode %{
1540 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1541 %}
1542 ins_pipe(pipe_slow);
1543 %}
1545 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
1546 predicate(UseAVX > 0);
1547 match(Set dst (DivD src1 (LoadD src2)));
1549 format %{ "vdivsd $dst, $src1, $src2" %}
1550 ins_cost(150);
1551 ins_encode %{
1552 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1553 %}
1554 ins_pipe(pipe_slow);
1555 %}
1557 instruct divD_reg_imm(regD dst, regD src, immD con) %{
1558 predicate(UseAVX > 0);
1559 match(Set dst (DivD src con));
1561 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
1562 ins_cost(150);
1563 ins_encode %{
1564 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1565 %}
1566 ins_pipe(pipe_slow);
1567 %}
1569 instruct absF_reg(regF dst) %{
1570 predicate((UseSSE>=1) && (UseAVX == 0));
1571 match(Set dst (AbsF dst));
1572 ins_cost(150);
1573 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
1574 ins_encode %{
1575 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
1576 %}
1577 ins_pipe(pipe_slow);
1578 %}
1580 instruct absF_reg_reg(regF dst, regF src) %{
1581 predicate(UseAVX > 0);
1582 match(Set dst (AbsF src));
1583 ins_cost(150);
1584 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
1585 ins_encode %{
1586 bool vector256 = false;
1587 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
1588 ExternalAddress(float_signmask()), vector256);
1589 %}
1590 ins_pipe(pipe_slow);
1591 %}
1593 instruct absD_reg(regD dst) %{
1594 predicate((UseSSE>=2) && (UseAVX == 0));
1595 match(Set dst (AbsD dst));
1596 ins_cost(150);
1597 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
1598 "# abs double by sign masking" %}
1599 ins_encode %{
1600 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
1601 %}
1602 ins_pipe(pipe_slow);
1603 %}
1605 instruct absD_reg_reg(regD dst, regD src) %{
1606 predicate(UseAVX > 0);
1607 match(Set dst (AbsD src));
1608 ins_cost(150);
1609 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
1610 "# abs double by sign masking" %}
1611 ins_encode %{
1612 bool vector256 = false;
1613 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
1614 ExternalAddress(double_signmask()), vector256);
1615 %}
1616 ins_pipe(pipe_slow);
1617 %}
1619 instruct negF_reg(regF dst) %{
1620 predicate((UseSSE>=1) && (UseAVX == 0));
1621 match(Set dst (NegF dst));
1622 ins_cost(150);
1623 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
1624 ins_encode %{
1625 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
1626 %}
1627 ins_pipe(pipe_slow);
1628 %}
1630 instruct negF_reg_reg(regF dst, regF src) %{
1631 predicate(UseAVX > 0);
1632 match(Set dst (NegF src));
1633 ins_cost(150);
1634 format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
1635 ins_encode %{
1636 bool vector256 = false;
1637 __ vxorps($dst$$XMMRegister, $src$$XMMRegister,
1638 ExternalAddress(float_signflip()), vector256);
1639 %}
1640 ins_pipe(pipe_slow);
1641 %}
1643 instruct negD_reg(regD dst) %{
1644 predicate((UseSSE>=2) && (UseAVX == 0));
1645 match(Set dst (NegD dst));
1646 ins_cost(150);
1647 format %{ "xorpd $dst, [0x8000000000000000]\t"
1648 "# neg double by sign flipping" %}
1649 ins_encode %{
1650 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
1651 %}
1652 ins_pipe(pipe_slow);
1653 %}
1655 instruct negD_reg_reg(regD dst, regD src) %{
1656 predicate(UseAVX > 0);
1657 match(Set dst (NegD src));
1658 ins_cost(150);
1659 format %{ "vxorpd $dst, $src, [0x8000000000000000]\t"
1660 "# neg double by sign flipping" %}
1661 ins_encode %{
1662 bool vector256 = false;
1663 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister,
1664 ExternalAddress(double_signflip()), vector256);
1665 %}
1666 ins_pipe(pipe_slow);
1667 %}
1669 instruct sqrtF_reg(regF dst, regF src) %{
1670 predicate(UseSSE>=1);
1671 match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
1673 format %{ "sqrtss $dst, $src" %}
1674 ins_cost(150);
1675 ins_encode %{
1676 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
1677 %}
1678 ins_pipe(pipe_slow);
1679 %}
1681 instruct sqrtF_mem(regF dst, memory src) %{
1682 predicate(UseSSE>=1);
1683 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
1685 format %{ "sqrtss $dst, $src" %}
1686 ins_cost(150);
1687 ins_encode %{
1688 __ sqrtss($dst$$XMMRegister, $src$$Address);
1689 %}
1690 ins_pipe(pipe_slow);
1691 %}
1693 instruct sqrtF_imm(regF dst, immF con) %{
1694 predicate(UseSSE>=1);
1695 match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
1696 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
1697 ins_cost(150);
1698 ins_encode %{
1699 __ sqrtss($dst$$XMMRegister, $constantaddress($con));
1700 %}
1701 ins_pipe(pipe_slow);
1702 %}
1704 instruct sqrtD_reg(regD dst, regD src) %{
1705 predicate(UseSSE>=2);
1706 match(Set dst (SqrtD src));
1708 format %{ "sqrtsd $dst, $src" %}
1709 ins_cost(150);
1710 ins_encode %{
1711 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
1712 %}
1713 ins_pipe(pipe_slow);
1714 %}
1716 instruct sqrtD_mem(regD dst, memory src) %{
1717 predicate(UseSSE>=2);
1718 match(Set dst (SqrtD (LoadD src)));
1720 format %{ "sqrtsd $dst, $src" %}
1721 ins_cost(150);
1722 ins_encode %{
1723 __ sqrtsd($dst$$XMMRegister, $src$$Address);
1724 %}
1725 ins_pipe(pipe_slow);
1726 %}
1728 instruct sqrtD_imm(regD dst, immD con) %{
1729 predicate(UseSSE>=2);
1730 match(Set dst (SqrtD con));
1731 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
1732 ins_cost(150);
1733 ins_encode %{
1734 __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
1735 %}
1736 ins_pipe(pipe_slow);
1737 %}
1740 // ====================VECTOR INSTRUCTIONS=====================================
1742 // Load vectors (4 bytes long)
1743 instruct loadV4(vecS dst, memory mem) %{
1744 predicate(n->as_LoadVector()->memory_size() == 4);
1745 match(Set dst (LoadVector mem));
1746 ins_cost(125);
1747 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %}
1748 ins_encode %{
1749 __ movdl($dst$$XMMRegister, $mem$$Address);
1750 %}
1751 ins_pipe( pipe_slow );
1752 %}
1754 // Load vectors (8 bytes long)
1755 instruct loadV8(vecD dst, memory mem) %{
1756 predicate(n->as_LoadVector()->memory_size() == 8);
1757 match(Set dst (LoadVector mem));
1758 ins_cost(125);
1759 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %}
1760 ins_encode %{
1761 __ movq($dst$$XMMRegister, $mem$$Address);
1762 %}
1763 ins_pipe( pipe_slow );
1764 %}
1766 // Load vectors (16 bytes long)
1767 instruct loadV16(vecX dst, memory mem) %{
1768 predicate(n->as_LoadVector()->memory_size() == 16);
1769 match(Set dst (LoadVector mem));
1770 ins_cost(125);
1771 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %}
1772 ins_encode %{
1773 __ movdqu($dst$$XMMRegister, $mem$$Address);
1774 %}
1775 ins_pipe( pipe_slow );
1776 %}
1778 // Load vectors (32 bytes long)
1779 instruct loadV32(vecY dst, memory mem) %{
1780 predicate(n->as_LoadVector()->memory_size() == 32);
1781 match(Set dst (LoadVector mem));
1782 ins_cost(125);
1783 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %}
1784 ins_encode %{
1785 __ vmovdqu($dst$$XMMRegister, $mem$$Address);
1786 %}
1787 ins_pipe( pipe_slow );
1788 %}
1790 // Store vectors
1791 instruct storeV4(memory mem, vecS src) %{
1792 predicate(n->as_StoreVector()->memory_size() == 4);
1793 match(Set mem (StoreVector mem src));
1794 ins_cost(145);
1795 format %{ "movd $mem,$src\t! store vector (4 bytes)" %}
1796 ins_encode %{
1797 __ movdl($mem$$Address, $src$$XMMRegister);
1798 %}
1799 ins_pipe( pipe_slow );
1800 %}
1802 instruct storeV8(memory mem, vecD src) %{
1803 predicate(n->as_StoreVector()->memory_size() == 8);
1804 match(Set mem (StoreVector mem src));
1805 ins_cost(145);
1806 format %{ "movq $mem,$src\t! store vector (8 bytes)" %}
1807 ins_encode %{
1808 __ movq($mem$$Address, $src$$XMMRegister);
1809 %}
1810 ins_pipe( pipe_slow );
1811 %}
1813 instruct storeV16(memory mem, vecX src) %{
1814 predicate(n->as_StoreVector()->memory_size() == 16);
1815 match(Set mem (StoreVector mem src));
1816 ins_cost(145);
1817 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %}
1818 ins_encode %{
1819 __ movdqu($mem$$Address, $src$$XMMRegister);
1820 %}
1821 ins_pipe( pipe_slow );
1822 %}
1824 instruct storeV32(memory mem, vecY src) %{
1825 predicate(n->as_StoreVector()->memory_size() == 32);
1826 match(Set mem (StoreVector mem src));
1827 ins_cost(145);
1828 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %}
1829 ins_encode %{
1830 __ vmovdqu($mem$$Address, $src$$XMMRegister);
1831 %}
1832 ins_pipe( pipe_slow );
1833 %}
1835 // Replicate byte scalar to be vector
1836 instruct Repl4B(vecS dst, rRegI src) %{
1837 predicate(n->as_Vector()->length() == 4);
1838 match(Set dst (ReplicateB src));
1839 format %{ "movd $dst,$src\n\t"
1840 "punpcklbw $dst,$dst\n\t"
1841 "pshuflw $dst,$dst,0x00\t! replicate4B" %}
1842 ins_encode %{
1843 __ movdl($dst$$XMMRegister, $src$$Register);
1844 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
1845 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1846 %}
1847 ins_pipe( pipe_slow );
1848 %}
1850 instruct Repl8B(vecD dst, rRegI src) %{
1851 predicate(n->as_Vector()->length() == 8);
1852 match(Set dst (ReplicateB src));
1853 format %{ "movd $dst,$src\n\t"
1854 "punpcklbw $dst,$dst\n\t"
1855 "pshuflw $dst,$dst,0x00\t! replicate8B" %}
1856 ins_encode %{
1857 __ movdl($dst$$XMMRegister, $src$$Register);
1858 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
1859 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1860 %}
1861 ins_pipe( pipe_slow );
1862 %}
1864 instruct Repl16B(vecX dst, rRegI src) %{
1865 predicate(n->as_Vector()->length() == 16);
1866 match(Set dst (ReplicateB src));
1867 format %{ "movd $dst,$src\n\t"
1868 "punpcklbw $dst,$dst\n\t"
1869 "pshuflw $dst,$dst,0x00\n\t"
1870 "punpcklqdq $dst,$dst\t! replicate16B" %}
1871 ins_encode %{
1872 __ movdl($dst$$XMMRegister, $src$$Register);
1873 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
1874 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1875 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1876 %}
1877 ins_pipe( pipe_slow );
1878 %}
1880 instruct Repl32B(vecY dst, rRegI src) %{
1881 predicate(n->as_Vector()->length() == 32);
1882 match(Set dst (ReplicateB src));
1883 format %{ "movd $dst,$src\n\t"
1884 "punpcklbw $dst,$dst\n\t"
1885 "pshuflw $dst,$dst,0x00\n\t"
1886 "punpcklqdq $dst,$dst\n\t"
1887 "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
1888 ins_encode %{
1889 __ movdl($dst$$XMMRegister, $src$$Register);
1890 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
1891 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1892 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1893 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
1894 %}
1895 ins_pipe( pipe_slow );
1896 %}
1898 // Replicate byte scalar immediate to be vector by loading from const table.
1899 instruct Repl4B_imm(vecS dst, immI con) %{
1900 predicate(n->as_Vector()->length() == 4);
1901 match(Set dst (ReplicateB con));
1902 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %}
1903 ins_encode %{
1904 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1)));
1905 %}
1906 ins_pipe( pipe_slow );
1907 %}
1909 instruct Repl8B_imm(vecD dst, immI con) %{
1910 predicate(n->as_Vector()->length() == 8);
1911 match(Set dst (ReplicateB con));
1912 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %}
1913 ins_encode %{
1914 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
1915 %}
1916 ins_pipe( pipe_slow );
1917 %}
1919 instruct Repl16B_imm(vecX dst, immI con) %{
1920 predicate(n->as_Vector()->length() == 16);
1921 match(Set dst (ReplicateB con));
1922 format %{ "movq $dst,[$constantaddress]\n\t"
1923 "punpcklqdq $dst,$dst\t! replicate16B($con)" %}
1924 ins_encode %{
1925 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
1926 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1927 %}
1928 ins_pipe( pipe_slow );
1929 %}
1931 instruct Repl32B_imm(vecY dst, immI con) %{
1932 predicate(n->as_Vector()->length() == 32);
1933 match(Set dst (ReplicateB con));
1934 format %{ "movq $dst,[$constantaddress]\n\t"
1935 "punpcklqdq $dst,$dst\n\t"
1936 "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %}
1937 ins_encode %{
1938 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
1939 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1940 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
1941 %}
1942 ins_pipe( pipe_slow );
1943 %}
1945 // Replicate byte scalar zero to be vector
1946 instruct Repl4B_zero(vecS dst, immI0 zero) %{
1947 predicate(n->as_Vector()->length() == 4);
1948 match(Set dst (ReplicateB zero));
1949 format %{ "pxor $dst,$dst\t! replicate4B zero" %}
1950 ins_encode %{
1951 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1952 %}
1953 ins_pipe( fpu_reg_reg );
1954 %}
1956 instruct Repl8B_zero(vecD dst, immI0 zero) %{
1957 predicate(n->as_Vector()->length() == 8);
1958 match(Set dst (ReplicateB zero));
1959 format %{ "pxor $dst,$dst\t! replicate8B zero" %}
1960 ins_encode %{
1961 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1962 %}
1963 ins_pipe( fpu_reg_reg );
1964 %}
1966 instruct Repl16B_zero(vecX dst, immI0 zero) %{
1967 predicate(n->as_Vector()->length() == 16);
1968 match(Set dst (ReplicateB zero));
1969 format %{ "pxor $dst,$dst\t! replicate16B zero" %}
1970 ins_encode %{
1971 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1972 %}
1973 ins_pipe( fpu_reg_reg );
1974 %}
1976 instruct Repl32B_zero(vecY dst, immI0 zero) %{
1977 predicate(n->as_Vector()->length() == 32);
1978 match(Set dst (ReplicateB zero));
1979 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %}
1980 ins_encode %{
1981 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
1982 bool vector256 = true;
1983 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
1984 %}
1985 ins_pipe( fpu_reg_reg );
1986 %}
1988 // Replicate char/short (2 byte) scalar to be vector
1989 instruct Repl2S(vecS dst, rRegI src) %{
1990 predicate(n->as_Vector()->length() == 2);
1991 match(Set dst (ReplicateS src));
1992 format %{ "movd $dst,$src\n\t"
1993 "pshuflw $dst,$dst,0x00\t! replicate2S" %}
1994 ins_encode %{
1995 __ movdl($dst$$XMMRegister, $src$$Register);
1996 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1997 %}
1998 ins_pipe( fpu_reg_reg );
1999 %}
2001 instruct Repl4S(vecD dst, rRegI src) %{
2002 predicate(n->as_Vector()->length() == 4);
2003 match(Set dst (ReplicateS src));
2004 format %{ "movd $dst,$src\n\t"
2005 "pshuflw $dst,$dst,0x00\t! replicate4S" %}
2006 ins_encode %{
2007 __ movdl($dst$$XMMRegister, $src$$Register);
2008 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2009 %}
2010 ins_pipe( fpu_reg_reg );
2011 %}
2013 instruct Repl8S(vecX dst, rRegI src) %{
2014 predicate(n->as_Vector()->length() == 8);
2015 match(Set dst (ReplicateS src));
2016 format %{ "movd $dst,$src\n\t"
2017 "pshuflw $dst,$dst,0x00\n\t"
2018 "punpcklqdq $dst,$dst\t! replicate8S" %}
2019 ins_encode %{
2020 __ movdl($dst$$XMMRegister, $src$$Register);
2021 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2022 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2023 %}
2024 ins_pipe( pipe_slow );
2025 %}
2027 instruct Repl16S(vecY dst, rRegI src) %{
2028 predicate(n->as_Vector()->length() == 16);
2029 match(Set dst (ReplicateS src));
2030 format %{ "movd $dst,$src\n\t"
2031 "pshuflw $dst,$dst,0x00\n\t"
2032 "punpcklqdq $dst,$dst\n\t"
2033 "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
2034 ins_encode %{
2035 __ movdl($dst$$XMMRegister, $src$$Register);
2036 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2037 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2038 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2039 %}
2040 ins_pipe( pipe_slow );
2041 %}
2043 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table.
2044 instruct Repl2S_imm(vecS dst, immI con) %{
2045 predicate(n->as_Vector()->length() == 2);
2046 match(Set dst (ReplicateS con));
2047 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %}
2048 ins_encode %{
2049 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2)));
2050 %}
2051 ins_pipe( fpu_reg_reg );
2052 %}
2054 instruct Repl4S_imm(vecD dst, immI con) %{
2055 predicate(n->as_Vector()->length() == 4);
2056 match(Set dst (ReplicateS con));
2057 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %}
2058 ins_encode %{
2059 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
2060 %}
2061 ins_pipe( fpu_reg_reg );
2062 %}
2064 instruct Repl8S_imm(vecX dst, immI con) %{
2065 predicate(n->as_Vector()->length() == 8);
2066 match(Set dst (ReplicateS con));
2067 format %{ "movq $dst,[$constantaddress]\n\t"
2068 "punpcklqdq $dst,$dst\t! replicate8S($con)" %}
2069 ins_encode %{
2070 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
2071 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2072 %}
2073 ins_pipe( pipe_slow );
2074 %}
2076 instruct Repl16S_imm(vecY dst, immI con) %{
2077 predicate(n->as_Vector()->length() == 16);
2078 match(Set dst (ReplicateS con));
2079 format %{ "movq $dst,[$constantaddress]\n\t"
2080 "punpcklqdq $dst,$dst\n\t"
2081 "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %}
2082 ins_encode %{
2083 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
2084 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2085 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2086 %}
2087 ins_pipe( pipe_slow );
2088 %}
2090 // Replicate char/short (2 byte) scalar zero to be vector
2091 instruct Repl2S_zero(vecS dst, immI0 zero) %{
2092 predicate(n->as_Vector()->length() == 2);
2093 match(Set dst (ReplicateS zero));
2094 format %{ "pxor $dst,$dst\t! replicate2S zero" %}
2095 ins_encode %{
2096 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2097 %}
2098 ins_pipe( fpu_reg_reg );
2099 %}
2101 instruct Repl4S_zero(vecD dst, immI0 zero) %{
2102 predicate(n->as_Vector()->length() == 4);
2103 match(Set dst (ReplicateS zero));
2104 format %{ "pxor $dst,$dst\t! replicate4S zero" %}
2105 ins_encode %{
2106 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2107 %}
2108 ins_pipe( fpu_reg_reg );
2109 %}
2111 instruct Repl8S_zero(vecX dst, immI0 zero) %{
2112 predicate(n->as_Vector()->length() == 8);
2113 match(Set dst (ReplicateS zero));
2114 format %{ "pxor $dst,$dst\t! replicate8S zero" %}
2115 ins_encode %{
2116 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2117 %}
2118 ins_pipe( fpu_reg_reg );
2119 %}
2121 instruct Repl16S_zero(vecY dst, immI0 zero) %{
2122 predicate(n->as_Vector()->length() == 16);
2123 match(Set dst (ReplicateS zero));
2124 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %}
2125 ins_encode %{
2126 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
2127 bool vector256 = true;
2128 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2129 %}
2130 ins_pipe( fpu_reg_reg );
2131 %}
2133 // Replicate integer (4 byte) scalar to be vector
2134 instruct Repl2I(vecD dst, rRegI src) %{
2135 predicate(n->as_Vector()->length() == 2);
2136 match(Set dst (ReplicateI src));
2137 format %{ "movd $dst,$src\n\t"
2138 "pshufd $dst,$dst,0x00\t! replicate2I" %}
2139 ins_encode %{
2140 __ movdl($dst$$XMMRegister, $src$$Register);
2141 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2142 %}
2143 ins_pipe( fpu_reg_reg );
2144 %}
2146 instruct Repl4I(vecX dst, rRegI src) %{
2147 predicate(n->as_Vector()->length() == 4);
2148 match(Set dst (ReplicateI src));
2149 format %{ "movd $dst,$src\n\t"
2150 "pshufd $dst,$dst,0x00\t! replicate4I" %}
2151 ins_encode %{
2152 __ movdl($dst$$XMMRegister, $src$$Register);
2153 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2154 %}
2155 ins_pipe( pipe_slow );
2156 %}
2158 instruct Repl8I(vecY dst, rRegI src) %{
2159 predicate(n->as_Vector()->length() == 8);
2160 match(Set dst (ReplicateI src));
2161 format %{ "movd $dst,$src\n\t"
2162 "pshufd $dst,$dst,0x00\n\t"
2163 "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
2164 ins_encode %{
2165 __ movdl($dst$$XMMRegister, $src$$Register);
2166 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2167 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2168 %}
2169 ins_pipe( pipe_slow );
2170 %}
2172 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
2173 instruct Repl2I_imm(vecD dst, immI con) %{
2174 predicate(n->as_Vector()->length() == 2);
2175 match(Set dst (ReplicateI con));
2176 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %}
2177 ins_encode %{
2178 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
2179 %}
2180 ins_pipe( fpu_reg_reg );
2181 %}
2183 instruct Repl4I_imm(vecX dst, immI con) %{
2184 predicate(n->as_Vector()->length() == 4);
2185 match(Set dst (ReplicateI con));
2186 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t"
2187 "punpcklqdq $dst,$dst" %}
2188 ins_encode %{
2189 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
2190 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2191 %}
2192 ins_pipe( pipe_slow );
2193 %}
2195 instruct Repl8I_imm(vecY dst, immI con) %{
2196 predicate(n->as_Vector()->length() == 8);
2197 match(Set dst (ReplicateI con));
2198 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t"
2199 "punpcklqdq $dst,$dst\n\t"
2200 "vinserti128h $dst,$dst,$dst" %}
2201 ins_encode %{
2202 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
2203 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2204 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2205 %}
2206 ins_pipe( pipe_slow );
2207 %}
2209 // Integer could be loaded into xmm register directly from memory.
2210 instruct Repl2I_mem(vecD dst, memory mem) %{
2211 predicate(n->as_Vector()->length() == 2);
2212 match(Set dst (ReplicateI (LoadI mem)));
2213 format %{ "movd $dst,$mem\n\t"
2214 "pshufd $dst,$dst,0x00\t! replicate2I" %}
2215 ins_encode %{
2216 __ movdl($dst$$XMMRegister, $mem$$Address);
2217 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2218 %}
2219 ins_pipe( fpu_reg_reg );
2220 %}
2222 instruct Repl4I_mem(vecX dst, memory mem) %{
2223 predicate(n->as_Vector()->length() == 4);
2224 match(Set dst (ReplicateI (LoadI mem)));
2225 format %{ "movd $dst,$mem\n\t"
2226 "pshufd $dst,$dst,0x00\t! replicate4I" %}
2227 ins_encode %{
2228 __ movdl($dst$$XMMRegister, $mem$$Address);
2229 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2230 %}
2231 ins_pipe( pipe_slow );
2232 %}
2234 instruct Repl8I_mem(vecY dst, memory mem) %{
2235 predicate(n->as_Vector()->length() == 8);
2236 match(Set dst (ReplicateI (LoadI mem)));
2237 format %{ "movd $dst,$mem\n\t"
2238 "pshufd $dst,$dst,0x00\n\t"
2239 "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
2240 ins_encode %{
2241 __ movdl($dst$$XMMRegister, $mem$$Address);
2242 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2243 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2244 %}
2245 ins_pipe( pipe_slow );
2246 %}
2248 // Replicate integer (4 byte) scalar zero to be vector
2249 instruct Repl2I_zero(vecD dst, immI0 zero) %{
2250 predicate(n->as_Vector()->length() == 2);
2251 match(Set dst (ReplicateI zero));
2252 format %{ "pxor $dst,$dst\t! replicate2I" %}
2253 ins_encode %{
2254 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2255 %}
2256 ins_pipe( fpu_reg_reg );
2257 %}
2259 instruct Repl4I_zero(vecX dst, immI0 zero) %{
2260 predicate(n->as_Vector()->length() == 4);
2261 match(Set dst (ReplicateI zero));
2262 format %{ "pxor $dst,$dst\t! replicate4I zero)" %}
2263 ins_encode %{
2264 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2265 %}
2266 ins_pipe( fpu_reg_reg );
2267 %}
2269 instruct Repl8I_zero(vecY dst, immI0 zero) %{
2270 predicate(n->as_Vector()->length() == 8);
2271 match(Set dst (ReplicateI zero));
2272 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %}
2273 ins_encode %{
2274 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
2275 bool vector256 = true;
2276 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2277 %}
2278 ins_pipe( fpu_reg_reg );
2279 %}
2281 // Replicate long (8 byte) scalar to be vector
2282 #ifdef _LP64
2283 instruct Repl2L(vecX dst, rRegL src) %{
2284 predicate(n->as_Vector()->length() == 2);
2285 match(Set dst (ReplicateL src));
2286 format %{ "movdq $dst,$src\n\t"
2287 "punpcklqdq $dst,$dst\t! replicate2L" %}
2288 ins_encode %{
2289 __ movdq($dst$$XMMRegister, $src$$Register);
2290 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2291 %}
2292 ins_pipe( pipe_slow );
2293 %}
2295 instruct Repl4L(vecY dst, rRegL src) %{
2296 predicate(n->as_Vector()->length() == 4);
2297 match(Set dst (ReplicateL src));
2298 format %{ "movdq $dst,$src\n\t"
2299 "punpcklqdq $dst,$dst\n\t"
2300 "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
2301 ins_encode %{
2302 __ movdq($dst$$XMMRegister, $src$$Register);
2303 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2304 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2305 %}
2306 ins_pipe( pipe_slow );
2307 %}
2308 #else // _LP64
2309 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{
2310 predicate(n->as_Vector()->length() == 2);
2311 match(Set dst (ReplicateL src));
2312 effect(TEMP dst, USE src, TEMP tmp);
2313 format %{ "movdl $dst,$src.lo\n\t"
2314 "movdl $tmp,$src.hi\n\t"
2315 "punpckldq $dst,$tmp\n\t"
2316 "punpcklqdq $dst,$dst\t! replicate2L"%}
2317 ins_encode %{
2318 __ movdl($dst$$XMMRegister, $src$$Register);
2319 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
2320 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
2321 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2322 %}
2323 ins_pipe( pipe_slow );
2324 %}
2326 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
2327 predicate(n->as_Vector()->length() == 4);
2328 match(Set dst (ReplicateL src));
2329 effect(TEMP dst, USE src, TEMP tmp);
2330 format %{ "movdl $dst,$src.lo\n\t"
2331 "movdl $tmp,$src.hi\n\t"
2332 "punpckldq $dst,$tmp\n\t"
2333 "punpcklqdq $dst,$dst\n\t"
2334 "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
2335 ins_encode %{
2336 __ movdl($dst$$XMMRegister, $src$$Register);
2337 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
2338 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
2339 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2340 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2341 %}
2342 ins_pipe( pipe_slow );
2343 %}
2344 #endif // _LP64
2346 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
2347 instruct Repl2L_imm(vecX dst, immL con) %{
2348 predicate(n->as_Vector()->length() == 2);
2349 match(Set dst (ReplicateL con));
2350 format %{ "movq $dst,[$constantaddress]\n\t"
2351 "punpcklqdq $dst,$dst\t! replicate2L($con)" %}
2352 ins_encode %{
2353 __ movq($dst$$XMMRegister, $constantaddress($con));
2354 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2355 %}
2356 ins_pipe( pipe_slow );
2357 %}
2359 instruct Repl4L_imm(vecY dst, immL con) %{
2360 predicate(n->as_Vector()->length() == 4);
2361 match(Set dst (ReplicateL con));
2362 format %{ "movq $dst,[$constantaddress]\n\t"
2363 "punpcklqdq $dst,$dst\n\t"
2364 "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %}
2365 ins_encode %{
2366 __ movq($dst$$XMMRegister, $constantaddress($con));
2367 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2368 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2369 %}
2370 ins_pipe( pipe_slow );
2371 %}
2373 // Long could be loaded into xmm register directly from memory.
2374 instruct Repl2L_mem(vecX dst, memory mem) %{
2375 predicate(n->as_Vector()->length() == 2);
2376 match(Set dst (ReplicateL (LoadL mem)));
2377 format %{ "movq $dst,$mem\n\t"
2378 "punpcklqdq $dst,$dst\t! replicate2L" %}
2379 ins_encode %{
2380 __ movq($dst$$XMMRegister, $mem$$Address);
2381 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2382 %}
2383 ins_pipe( pipe_slow );
2384 %}
2386 instruct Repl4L_mem(vecY dst, memory mem) %{
2387 predicate(n->as_Vector()->length() == 4);
2388 match(Set dst (ReplicateL (LoadL mem)));
2389 format %{ "movq $dst,$mem\n\t"
2390 "punpcklqdq $dst,$dst\n\t"
2391 "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
2392 ins_encode %{
2393 __ movq($dst$$XMMRegister, $mem$$Address);
2394 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2395 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2396 %}
2397 ins_pipe( pipe_slow );
2398 %}
2400 // Replicate long (8 byte) scalar zero to be vector
2401 instruct Repl2L_zero(vecX dst, immL0 zero) %{
2402 predicate(n->as_Vector()->length() == 2);
2403 match(Set dst (ReplicateL zero));
2404 format %{ "pxor $dst,$dst\t! replicate2L zero" %}
2405 ins_encode %{
2406 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2407 %}
2408 ins_pipe( fpu_reg_reg );
2409 %}
2411 instruct Repl4L_zero(vecY dst, immL0 zero) %{
2412 predicate(n->as_Vector()->length() == 4);
2413 match(Set dst (ReplicateL zero));
2414 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %}
2415 ins_encode %{
2416 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
2417 bool vector256 = true;
2418 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2419 %}
2420 ins_pipe( fpu_reg_reg );
2421 %}
2423 // Replicate float (4 byte) scalar to be vector
2424 instruct Repl2F(vecD dst, regF src) %{
2425 predicate(n->as_Vector()->length() == 2);
2426 match(Set dst (ReplicateF src));
2427 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %}
2428 ins_encode %{
2429 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
2430 %}
2431 ins_pipe( fpu_reg_reg );
2432 %}
2434 instruct Repl4F(vecX dst, regF src) %{
2435 predicate(n->as_Vector()->length() == 4);
2436 match(Set dst (ReplicateF src));
2437 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %}
2438 ins_encode %{
2439 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
2440 %}
2441 ins_pipe( pipe_slow );
2442 %}
2444 instruct Repl8F(vecY dst, regF src) %{
2445 predicate(n->as_Vector()->length() == 8);
2446 match(Set dst (ReplicateF src));
2447 format %{ "pshufd $dst,$src,0x00\n\t"
2448 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
2449 ins_encode %{
2450 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
2451 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2452 %}
2453 ins_pipe( pipe_slow );
2454 %}
2456 // Replicate float (4 byte) scalar zero to be vector
2457 instruct Repl2F_zero(vecD dst, immF0 zero) %{
2458 predicate(n->as_Vector()->length() == 2);
2459 match(Set dst (ReplicateF zero));
2460 format %{ "xorps $dst,$dst\t! replicate2F zero" %}
2461 ins_encode %{
2462 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
2463 %}
2464 ins_pipe( fpu_reg_reg );
2465 %}
2467 instruct Repl4F_zero(vecX dst, immF0 zero) %{
2468 predicate(n->as_Vector()->length() == 4);
2469 match(Set dst (ReplicateF zero));
2470 format %{ "xorps $dst,$dst\t! replicate4F zero" %}
2471 ins_encode %{
2472 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
2473 %}
2474 ins_pipe( fpu_reg_reg );
2475 %}
2477 instruct Repl8F_zero(vecY dst, immF0 zero) %{
2478 predicate(n->as_Vector()->length() == 8);
2479 match(Set dst (ReplicateF zero));
2480 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %}
2481 ins_encode %{
2482 bool vector256 = true;
2483 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2484 %}
2485 ins_pipe( fpu_reg_reg );
2486 %}
2488 // Replicate double (8 bytes) scalar to be vector
2489 instruct Repl2D(vecX dst, regD src) %{
2490 predicate(n->as_Vector()->length() == 2);
2491 match(Set dst (ReplicateD src));
2492 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %}
2493 ins_encode %{
2494 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
2495 %}
2496 ins_pipe( pipe_slow );
2497 %}
2499 instruct Repl4D(vecY dst, regD src) %{
2500 predicate(n->as_Vector()->length() == 4);
2501 match(Set dst (ReplicateD src));
2502 format %{ "pshufd $dst,$src,0x44\n\t"
2503 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
2504 ins_encode %{
2505 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
2506 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2507 %}
2508 ins_pipe( pipe_slow );
2509 %}
2511 // Replicate double (8 byte) scalar zero to be vector
2512 instruct Repl2D_zero(vecX dst, immD0 zero) %{
2513 predicate(n->as_Vector()->length() == 2);
2514 match(Set dst (ReplicateD zero));
2515 format %{ "xorpd $dst,$dst\t! replicate2D zero" %}
2516 ins_encode %{
2517 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
2518 %}
2519 ins_pipe( fpu_reg_reg );
2520 %}
2522 instruct Repl4D_zero(vecY dst, immD0 zero) %{
2523 predicate(n->as_Vector()->length() == 4);
2524 match(Set dst (ReplicateD zero));
2525 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %}
2526 ins_encode %{
2527 bool vector256 = true;
2528 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2529 %}
2530 ins_pipe( fpu_reg_reg );
2531 %}
2533 // ====================VECTOR ARITHMETIC=======================================
2535 // --------------------------------- ADD --------------------------------------
2537 // Bytes vector add
2538 instruct vadd4B(vecS dst, vecS src) %{
2539 predicate(n->as_Vector()->length() == 4);
2540 match(Set dst (AddVB dst src));
2541 format %{ "paddb $dst,$src\t! add packed4B" %}
2542 ins_encode %{
2543 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
2544 %}
2545 ins_pipe( pipe_slow );
2546 %}
2548 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{
2549 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2550 match(Set dst (AddVB src1 src2));
2551 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %}
2552 ins_encode %{
2553 bool vector256 = false;
2554 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2555 %}
2556 ins_pipe( pipe_slow );
2557 %}
2559 instruct vadd8B(vecD dst, vecD src) %{
2560 predicate(n->as_Vector()->length() == 8);
2561 match(Set dst (AddVB dst src));
2562 format %{ "paddb $dst,$src\t! add packed8B" %}
2563 ins_encode %{
2564 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
2565 %}
2566 ins_pipe( pipe_slow );
2567 %}
2569 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
2570 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2571 match(Set dst (AddVB src1 src2));
2572 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %}
2573 ins_encode %{
2574 bool vector256 = false;
2575 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2576 %}
2577 ins_pipe( pipe_slow );
2578 %}
2580 instruct vadd16B(vecX dst, vecX src) %{
2581 predicate(n->as_Vector()->length() == 16);
2582 match(Set dst (AddVB dst src));
2583 format %{ "paddb $dst,$src\t! add packed16B" %}
2584 ins_encode %{
2585 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
2586 %}
2587 ins_pipe( pipe_slow );
2588 %}
2590 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
2591 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
2592 match(Set dst (AddVB src1 src2));
2593 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %}
2594 ins_encode %{
2595 bool vector256 = false;
2596 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2597 %}
2598 ins_pipe( pipe_slow );
2599 %}
2601 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{
2602 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
2603 match(Set dst (AddVB src (LoadVector mem)));
2604 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %}
2605 ins_encode %{
2606 bool vector256 = false;
2607 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2608 %}
2609 ins_pipe( pipe_slow );
2610 %}
2612 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{
2613 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
2614 match(Set dst (AddVB src1 src2));
2615 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %}
2616 ins_encode %{
2617 bool vector256 = true;
2618 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2619 %}
2620 ins_pipe( pipe_slow );
2621 %}
2623 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{
2624 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
2625 match(Set dst (AddVB src (LoadVector mem)));
2626 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %}
2627 ins_encode %{
2628 bool vector256 = true;
2629 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2630 %}
2631 ins_pipe( pipe_slow );
2632 %}
2634 // Shorts/Chars vector add
2635 instruct vadd2S(vecS dst, vecS src) %{
2636 predicate(n->as_Vector()->length() == 2);
2637 match(Set dst (AddVS dst src));
2638 format %{ "paddw $dst,$src\t! add packed2S" %}
2639 ins_encode %{
2640 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
2641 %}
2642 ins_pipe( pipe_slow );
2643 %}
2645 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{
2646 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2647 match(Set dst (AddVS src1 src2));
2648 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %}
2649 ins_encode %{
2650 bool vector256 = false;
2651 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2652 %}
2653 ins_pipe( pipe_slow );
2654 %}
2656 instruct vadd4S(vecD dst, vecD src) %{
2657 predicate(n->as_Vector()->length() == 4);
2658 match(Set dst (AddVS dst src));
2659 format %{ "paddw $dst,$src\t! add packed4S" %}
2660 ins_encode %{
2661 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
2662 %}
2663 ins_pipe( pipe_slow );
2664 %}
2666 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
2667 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2668 match(Set dst (AddVS src1 src2));
2669 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %}
2670 ins_encode %{
2671 bool vector256 = false;
2672 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2673 %}
2674 ins_pipe( pipe_slow );
2675 %}
2677 instruct vadd8S(vecX dst, vecX src) %{
2678 predicate(n->as_Vector()->length() == 8);
2679 match(Set dst (AddVS dst src));
2680 format %{ "paddw $dst,$src\t! add packed8S" %}
2681 ins_encode %{
2682 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
2683 %}
2684 ins_pipe( pipe_slow );
2685 %}
2687 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
2688 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2689 match(Set dst (AddVS src1 src2));
2690 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %}
2691 ins_encode %{
2692 bool vector256 = false;
2693 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2694 %}
2695 ins_pipe( pipe_slow );
2696 %}
2698 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{
2699 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2700 match(Set dst (AddVS src (LoadVector mem)));
2701 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %}
2702 ins_encode %{
2703 bool vector256 = false;
2704 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2705 %}
2706 ins_pipe( pipe_slow );
2707 %}
2709 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{
2710 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
2711 match(Set dst (AddVS src1 src2));
2712 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %}
2713 ins_encode %{
2714 bool vector256 = true;
2715 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2716 %}
2717 ins_pipe( pipe_slow );
2718 %}
2720 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{
2721 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
2722 match(Set dst (AddVS src (LoadVector mem)));
2723 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %}
2724 ins_encode %{
2725 bool vector256 = true;
2726 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2727 %}
2728 ins_pipe( pipe_slow );
2729 %}
2731 // Integers vector add
2732 instruct vadd2I(vecD dst, vecD src) %{
2733 predicate(n->as_Vector()->length() == 2);
2734 match(Set dst (AddVI dst src));
2735 format %{ "paddd $dst,$src\t! add packed2I" %}
2736 ins_encode %{
2737 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
2738 %}
2739 ins_pipe( pipe_slow );
2740 %}
2742 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{
2743 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2744 match(Set dst (AddVI src1 src2));
2745 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %}
2746 ins_encode %{
2747 bool vector256 = false;
2748 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2749 %}
2750 ins_pipe( pipe_slow );
2751 %}
2753 instruct vadd4I(vecX dst, vecX src) %{
2754 predicate(n->as_Vector()->length() == 4);
2755 match(Set dst (AddVI dst src));
2756 format %{ "paddd $dst,$src\t! add packed4I" %}
2757 ins_encode %{
2758 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
2759 %}
2760 ins_pipe( pipe_slow );
2761 %}
2763 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
2764 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2765 match(Set dst (AddVI src1 src2));
2766 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %}
2767 ins_encode %{
2768 bool vector256 = false;
2769 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2770 %}
2771 ins_pipe( pipe_slow );
2772 %}
2774 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{
2775 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2776 match(Set dst (AddVI src (LoadVector mem)));
2777 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %}
2778 ins_encode %{
2779 bool vector256 = false;
2780 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2781 %}
2782 ins_pipe( pipe_slow );
2783 %}
2785 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{
2786 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
2787 match(Set dst (AddVI src1 src2));
2788 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %}
2789 ins_encode %{
2790 bool vector256 = true;
2791 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2792 %}
2793 ins_pipe( pipe_slow );
2794 %}
2796 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{
2797 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
2798 match(Set dst (AddVI src (LoadVector mem)));
2799 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %}
2800 ins_encode %{
2801 bool vector256 = true;
2802 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2803 %}
2804 ins_pipe( pipe_slow );
2805 %}
2807 // Longs vector add
2808 instruct vadd2L(vecX dst, vecX src) %{
2809 predicate(n->as_Vector()->length() == 2);
2810 match(Set dst (AddVL dst src));
2811 format %{ "paddq $dst,$src\t! add packed2L" %}
2812 ins_encode %{
2813 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
2814 %}
2815 ins_pipe( pipe_slow );
2816 %}
2818 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
2819 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2820 match(Set dst (AddVL src1 src2));
2821 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %}
2822 ins_encode %{
2823 bool vector256 = false;
2824 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2825 %}
2826 ins_pipe( pipe_slow );
2827 %}
2829 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{
2830 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2831 match(Set dst (AddVL src (LoadVector mem)));
2832 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %}
2833 ins_encode %{
2834 bool vector256 = false;
2835 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2836 %}
2837 ins_pipe( pipe_slow );
2838 %}
2840 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{
2841 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
2842 match(Set dst (AddVL src1 src2));
2843 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %}
2844 ins_encode %{
2845 bool vector256 = true;
2846 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2847 %}
2848 ins_pipe( pipe_slow );
2849 %}
2851 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{
2852 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
2853 match(Set dst (AddVL src (LoadVector mem)));
2854 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %}
2855 ins_encode %{
2856 bool vector256 = true;
2857 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2858 %}
2859 ins_pipe( pipe_slow );
2860 %}
2862 // Floats vector add
2863 instruct vadd2F(vecD dst, vecD src) %{
2864 predicate(n->as_Vector()->length() == 2);
2865 match(Set dst (AddVF dst src));
2866 format %{ "addps $dst,$src\t! add packed2F" %}
2867 ins_encode %{
2868 __ addps($dst$$XMMRegister, $src$$XMMRegister);
2869 %}
2870 ins_pipe( pipe_slow );
2871 %}
2873 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{
2874 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2875 match(Set dst (AddVF src1 src2));
2876 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %}
2877 ins_encode %{
2878 bool vector256 = false;
2879 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2880 %}
2881 ins_pipe( pipe_slow );
2882 %}
2884 instruct vadd4F(vecX dst, vecX src) %{
2885 predicate(n->as_Vector()->length() == 4);
2886 match(Set dst (AddVF dst src));
2887 format %{ "addps $dst,$src\t! add packed4F" %}
2888 ins_encode %{
2889 __ addps($dst$$XMMRegister, $src$$XMMRegister);
2890 %}
2891 ins_pipe( pipe_slow );
2892 %}
2894 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
2895 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2896 match(Set dst (AddVF src1 src2));
2897 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %}
2898 ins_encode %{
2899 bool vector256 = false;
2900 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2901 %}
2902 ins_pipe( pipe_slow );
2903 %}
2905 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{
2906 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2907 match(Set dst (AddVF src (LoadVector mem)));
2908 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %}
2909 ins_encode %{
2910 bool vector256 = false;
2911 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2912 %}
2913 ins_pipe( pipe_slow );
2914 %}
2916 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{
2917 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2918 match(Set dst (AddVF src1 src2));
2919 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %}
2920 ins_encode %{
2921 bool vector256 = true;
2922 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2923 %}
2924 ins_pipe( pipe_slow );
2925 %}
2927 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{
2928 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2929 match(Set dst (AddVF src (LoadVector mem)));
2930 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %}
2931 ins_encode %{
2932 bool vector256 = true;
2933 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2934 %}
2935 ins_pipe( pipe_slow );
2936 %}
2938 // Doubles vector add
2939 instruct vadd2D(vecX dst, vecX src) %{
2940 predicate(n->as_Vector()->length() == 2);
2941 match(Set dst (AddVD dst src));
2942 format %{ "addpd $dst,$src\t! add packed2D" %}
2943 ins_encode %{
2944 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
2945 %}
2946 ins_pipe( pipe_slow );
2947 %}
2949 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
2950 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2951 match(Set dst (AddVD src1 src2));
2952 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %}
2953 ins_encode %{
2954 bool vector256 = false;
2955 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2956 %}
2957 ins_pipe( pipe_slow );
2958 %}
2960 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{
2961 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2962 match(Set dst (AddVD src (LoadVector mem)));
2963 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %}
2964 ins_encode %{
2965 bool vector256 = false;
2966 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2967 %}
2968 ins_pipe( pipe_slow );
2969 %}
2971 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{
2972 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2973 match(Set dst (AddVD src1 src2));
2974 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %}
2975 ins_encode %{
2976 bool vector256 = true;
2977 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2978 %}
2979 ins_pipe( pipe_slow );
2980 %}
2982 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{
2983 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2984 match(Set dst (AddVD src (LoadVector mem)));
2985 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %}
2986 ins_encode %{
2987 bool vector256 = true;
2988 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2989 %}
2990 ins_pipe( pipe_slow );
2991 %}
2993 // --------------------------------- SUB --------------------------------------
2995 // Bytes vector sub
2996 instruct vsub4B(vecS dst, vecS src) %{
2997 predicate(n->as_Vector()->length() == 4);
2998 match(Set dst (SubVB dst src));
2999 format %{ "psubb $dst,$src\t! sub packed4B" %}
3000 ins_encode %{
3001 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
3002 %}
3003 ins_pipe( pipe_slow );
3004 %}
3006 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{
3007 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3008 match(Set dst (SubVB src1 src2));
3009 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %}
3010 ins_encode %{
3011 bool vector256 = false;
3012 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3013 %}
3014 ins_pipe( pipe_slow );
3015 %}
3017 instruct vsub8B(vecD dst, vecD src) %{
3018 predicate(n->as_Vector()->length() == 8);
3019 match(Set dst (SubVB dst src));
3020 format %{ "psubb $dst,$src\t! sub packed8B" %}
3021 ins_encode %{
3022 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
3023 %}
3024 ins_pipe( pipe_slow );
3025 %}
3027 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{
3028 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3029 match(Set dst (SubVB src1 src2));
3030 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %}
3031 ins_encode %{
3032 bool vector256 = false;
3033 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3034 %}
3035 ins_pipe( pipe_slow );
3036 %}
3038 instruct vsub16B(vecX dst, vecX src) %{
3039 predicate(n->as_Vector()->length() == 16);
3040 match(Set dst (SubVB dst src));
3041 format %{ "psubb $dst,$src\t! sub packed16B" %}
3042 ins_encode %{
3043 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
3044 %}
3045 ins_pipe( pipe_slow );
3046 %}
3048 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
3049 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
3050 match(Set dst (SubVB src1 src2));
3051 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %}
3052 ins_encode %{
3053 bool vector256 = false;
3054 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3055 %}
3056 ins_pipe( pipe_slow );
3057 %}
3059 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{
3060 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
3061 match(Set dst (SubVB src (LoadVector mem)));
3062 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %}
3063 ins_encode %{
3064 bool vector256 = false;
3065 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3066 %}
3067 ins_pipe( pipe_slow );
3068 %}
3070 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{
3071 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
3072 match(Set dst (SubVB src1 src2));
3073 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %}
3074 ins_encode %{
3075 bool vector256 = true;
3076 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3077 %}
3078 ins_pipe( pipe_slow );
3079 %}
3081 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{
3082 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
3083 match(Set dst (SubVB src (LoadVector mem)));
3084 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %}
3085 ins_encode %{
3086 bool vector256 = true;
3087 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3088 %}
3089 ins_pipe( pipe_slow );
3090 %}
3092 // Shorts/Chars vector sub
3093 instruct vsub2S(vecS dst, vecS src) %{
3094 predicate(n->as_Vector()->length() == 2);
3095 match(Set dst (SubVS dst src));
3096 format %{ "psubw $dst,$src\t! sub packed2S" %}
3097 ins_encode %{
3098 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
3099 %}
3100 ins_pipe( pipe_slow );
3101 %}
3103 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{
3104 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3105 match(Set dst (SubVS src1 src2));
3106 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %}
3107 ins_encode %{
3108 bool vector256 = false;
3109 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3110 %}
3111 ins_pipe( pipe_slow );
3112 %}
3114 instruct vsub4S(vecD dst, vecD src) %{
3115 predicate(n->as_Vector()->length() == 4);
3116 match(Set dst (SubVS dst src));
3117 format %{ "psubw $dst,$src\t! sub packed4S" %}
3118 ins_encode %{
3119 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
3120 %}
3121 ins_pipe( pipe_slow );
3122 %}
3124 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{
3125 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3126 match(Set dst (SubVS src1 src2));
3127 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %}
3128 ins_encode %{
3129 bool vector256 = false;
3130 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3131 %}
3132 ins_pipe( pipe_slow );
3133 %}
3135 instruct vsub8S(vecX dst, vecX src) %{
3136 predicate(n->as_Vector()->length() == 8);
3137 match(Set dst (SubVS dst src));
3138 format %{ "psubw $dst,$src\t! sub packed8S" %}
3139 ins_encode %{
3140 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
3141 %}
3142 ins_pipe( pipe_slow );
3143 %}
3145 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
3146 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3147 match(Set dst (SubVS src1 src2));
3148 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %}
3149 ins_encode %{
3150 bool vector256 = false;
3151 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3152 %}
3153 ins_pipe( pipe_slow );
3154 %}
3156 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{
3157 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3158 match(Set dst (SubVS src (LoadVector mem)));
3159 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %}
3160 ins_encode %{
3161 bool vector256 = false;
3162 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3163 %}
3164 ins_pipe( pipe_slow );
3165 %}
3167 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{
3168 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3169 match(Set dst (SubVS src1 src2));
3170 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %}
3171 ins_encode %{
3172 bool vector256 = true;
3173 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3174 %}
3175 ins_pipe( pipe_slow );
3176 %}
3178 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{
3179 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3180 match(Set dst (SubVS src (LoadVector mem)));
3181 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %}
3182 ins_encode %{
3183 bool vector256 = true;
3184 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3185 %}
3186 ins_pipe( pipe_slow );
3187 %}
3189 // Integers vector sub
3190 instruct vsub2I(vecD dst, vecD src) %{
3191 predicate(n->as_Vector()->length() == 2);
3192 match(Set dst (SubVI dst src));
3193 format %{ "psubd $dst,$src\t! sub packed2I" %}
3194 ins_encode %{
3195 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
3196 %}
3197 ins_pipe( pipe_slow );
3198 %}
3200 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{
3201 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3202 match(Set dst (SubVI src1 src2));
3203 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %}
3204 ins_encode %{
3205 bool vector256 = false;
3206 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3207 %}
3208 ins_pipe( pipe_slow );
3209 %}
3211 instruct vsub4I(vecX dst, vecX src) %{
3212 predicate(n->as_Vector()->length() == 4);
3213 match(Set dst (SubVI dst src));
3214 format %{ "psubd $dst,$src\t! sub packed4I" %}
3215 ins_encode %{
3216 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
3217 %}
3218 ins_pipe( pipe_slow );
3219 %}
3221 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
3222 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3223 match(Set dst (SubVI src1 src2));
3224 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %}
3225 ins_encode %{
3226 bool vector256 = false;
3227 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3228 %}
3229 ins_pipe( pipe_slow );
3230 %}
3232 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{
3233 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3234 match(Set dst (SubVI src (LoadVector mem)));
3235 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %}
3236 ins_encode %{
3237 bool vector256 = false;
3238 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3239 %}
3240 ins_pipe( pipe_slow );
3241 %}
3243 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{
3244 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3245 match(Set dst (SubVI src1 src2));
3246 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %}
3247 ins_encode %{
3248 bool vector256 = true;
3249 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3250 %}
3251 ins_pipe( pipe_slow );
3252 %}
3254 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{
3255 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3256 match(Set dst (SubVI src (LoadVector mem)));
3257 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %}
3258 ins_encode %{
3259 bool vector256 = true;
3260 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3261 %}
3262 ins_pipe( pipe_slow );
3263 %}
3265 // Longs vector sub
3266 instruct vsub2L(vecX dst, vecX src) %{
3267 predicate(n->as_Vector()->length() == 2);
3268 match(Set dst (SubVL dst src));
3269 format %{ "psubq $dst,$src\t! sub packed2L" %}
3270 ins_encode %{
3271 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
3272 %}
3273 ins_pipe( pipe_slow );
3274 %}
3276 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
3277 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3278 match(Set dst (SubVL src1 src2));
3279 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %}
3280 ins_encode %{
3281 bool vector256 = false;
3282 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3283 %}
3284 ins_pipe( pipe_slow );
3285 %}
3287 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{
3288 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3289 match(Set dst (SubVL src (LoadVector mem)));
3290 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %}
3291 ins_encode %{
3292 bool vector256 = false;
3293 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3294 %}
3295 ins_pipe( pipe_slow );
3296 %}
3298 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{
3299 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
3300 match(Set dst (SubVL src1 src2));
3301 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %}
3302 ins_encode %{
3303 bool vector256 = true;
3304 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3305 %}
3306 ins_pipe( pipe_slow );
3307 %}
3309 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{
3310 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
3311 match(Set dst (SubVL src (LoadVector mem)));
3312 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %}
3313 ins_encode %{
3314 bool vector256 = true;
3315 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3316 %}
3317 ins_pipe( pipe_slow );
3318 %}
3320 // Floats vector sub
3321 instruct vsub2F(vecD dst, vecD src) %{
3322 predicate(n->as_Vector()->length() == 2);
3323 match(Set dst (SubVF dst src));
3324 format %{ "subps $dst,$src\t! sub packed2F" %}
3325 ins_encode %{
3326 __ subps($dst$$XMMRegister, $src$$XMMRegister);
3327 %}
3328 ins_pipe( pipe_slow );
3329 %}
3331 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{
3332 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3333 match(Set dst (SubVF src1 src2));
3334 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %}
3335 ins_encode %{
3336 bool vector256 = false;
3337 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3338 %}
3339 ins_pipe( pipe_slow );
3340 %}
3342 instruct vsub4F(vecX dst, vecX src) %{
3343 predicate(n->as_Vector()->length() == 4);
3344 match(Set dst (SubVF dst src));
3345 format %{ "subps $dst,$src\t! sub packed4F" %}
3346 ins_encode %{
3347 __ subps($dst$$XMMRegister, $src$$XMMRegister);
3348 %}
3349 ins_pipe( pipe_slow );
3350 %}
3352 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
3353 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3354 match(Set dst (SubVF src1 src2));
3355 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %}
3356 ins_encode %{
3357 bool vector256 = false;
3358 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3359 %}
3360 ins_pipe( pipe_slow );
3361 %}
3363 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{
3364 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3365 match(Set dst (SubVF src (LoadVector mem)));
3366 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %}
3367 ins_encode %{
3368 bool vector256 = false;
3369 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3370 %}
3371 ins_pipe( pipe_slow );
3372 %}
3374 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{
3375 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3376 match(Set dst (SubVF src1 src2));
3377 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %}
3378 ins_encode %{
3379 bool vector256 = true;
3380 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3381 %}
3382 ins_pipe( pipe_slow );
3383 %}
3385 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{
3386 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3387 match(Set dst (SubVF src (LoadVector mem)));
3388 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %}
3389 ins_encode %{
3390 bool vector256 = true;
3391 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3392 %}
3393 ins_pipe( pipe_slow );
3394 %}
3396 // Doubles vector sub
3397 instruct vsub2D(vecX dst, vecX src) %{
3398 predicate(n->as_Vector()->length() == 2);
3399 match(Set dst (SubVD dst src));
3400 format %{ "subpd $dst,$src\t! sub packed2D" %}
3401 ins_encode %{
3402 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
3403 %}
3404 ins_pipe( pipe_slow );
3405 %}
3407 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{
3408 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3409 match(Set dst (SubVD src1 src2));
3410 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %}
3411 ins_encode %{
3412 bool vector256 = false;
3413 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3414 %}
3415 ins_pipe( pipe_slow );
3416 %}
3418 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{
3419 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3420 match(Set dst (SubVD src (LoadVector mem)));
3421 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %}
3422 ins_encode %{
3423 bool vector256 = false;
3424 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3425 %}
3426 ins_pipe( pipe_slow );
3427 %}
3429 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{
3430 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3431 match(Set dst (SubVD src1 src2));
3432 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %}
3433 ins_encode %{
3434 bool vector256 = true;
3435 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3436 %}
3437 ins_pipe( pipe_slow );
3438 %}
3440 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{
3441 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3442 match(Set dst (SubVD src (LoadVector mem)));
3443 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %}
3444 ins_encode %{
3445 bool vector256 = true;
3446 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3447 %}
3448 ins_pipe( pipe_slow );
3449 %}
3451 // --------------------------------- MUL --------------------------------------
3453 // Shorts/Chars vector mul
3454 instruct vmul2S(vecS dst, vecS src) %{
3455 predicate(n->as_Vector()->length() == 2);
3456 match(Set dst (MulVS dst src));
3457 format %{ "pmullw $dst,$src\t! mul packed2S" %}
3458 ins_encode %{
3459 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
3460 %}
3461 ins_pipe( pipe_slow );
3462 %}
3464 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{
3465 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3466 match(Set dst (MulVS src1 src2));
3467 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
3468 ins_encode %{
3469 bool vector256 = false;
3470 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3471 %}
3472 ins_pipe( pipe_slow );
3473 %}
3475 instruct vmul4S(vecD dst, vecD src) %{
3476 predicate(n->as_Vector()->length() == 4);
3477 match(Set dst (MulVS dst src));
3478 format %{ "pmullw $dst,$src\t! mul packed4S" %}
3479 ins_encode %{
3480 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
3481 %}
3482 ins_pipe( pipe_slow );
3483 %}
3485 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
3486 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3487 match(Set dst (MulVS src1 src2));
3488 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
3489 ins_encode %{
3490 bool vector256 = false;
3491 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3492 %}
3493 ins_pipe( pipe_slow );
3494 %}
3496 instruct vmul8S(vecX dst, vecX src) %{
3497 predicate(n->as_Vector()->length() == 8);
3498 match(Set dst (MulVS dst src));
3499 format %{ "pmullw $dst,$src\t! mul packed8S" %}
3500 ins_encode %{
3501 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
3502 %}
3503 ins_pipe( pipe_slow );
3504 %}
3506 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
3507 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3508 match(Set dst (MulVS src1 src2));
3509 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
3510 ins_encode %{
3511 bool vector256 = false;
3512 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3513 %}
3514 ins_pipe( pipe_slow );
3515 %}
3517 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{
3518 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3519 match(Set dst (MulVS src (LoadVector mem)));
3520 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
3521 ins_encode %{
3522 bool vector256 = false;
3523 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3524 %}
3525 ins_pipe( pipe_slow );
3526 %}
3528 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{
3529 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3530 match(Set dst (MulVS src1 src2));
3531 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
3532 ins_encode %{
3533 bool vector256 = true;
3534 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3535 %}
3536 ins_pipe( pipe_slow );
3537 %}
3539 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{
3540 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3541 match(Set dst (MulVS src (LoadVector mem)));
3542 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
3543 ins_encode %{
3544 bool vector256 = true;
3545 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3546 %}
3547 ins_pipe( pipe_slow );
3548 %}
3550 // Integers vector mul (sse4_1)
3551 instruct vmul2I(vecD dst, vecD src) %{
3552 predicate(UseSSE > 3 && n->as_Vector()->length() == 2);
3553 match(Set dst (MulVI dst src));
3554 format %{ "pmulld $dst,$src\t! mul packed2I" %}
3555 ins_encode %{
3556 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
3557 %}
3558 ins_pipe( pipe_slow );
3559 %}
3561 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{
3562 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3563 match(Set dst (MulVI src1 src2));
3564 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %}
3565 ins_encode %{
3566 bool vector256 = false;
3567 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3568 %}
3569 ins_pipe( pipe_slow );
3570 %}
3572 instruct vmul4I(vecX dst, vecX src) %{
3573 predicate(UseSSE > 3 && n->as_Vector()->length() == 4);
3574 match(Set dst (MulVI dst src));
3575 format %{ "pmulld $dst,$src\t! mul packed4I" %}
3576 ins_encode %{
3577 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
3578 %}
3579 ins_pipe( pipe_slow );
3580 %}
3582 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
3583 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3584 match(Set dst (MulVI src1 src2));
3585 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %}
3586 ins_encode %{
3587 bool vector256 = false;
3588 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3589 %}
3590 ins_pipe( pipe_slow );
3591 %}
3593 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{
3594 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3595 match(Set dst (MulVI src (LoadVector mem)));
3596 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %}
3597 ins_encode %{
3598 bool vector256 = false;
3599 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3600 %}
3601 ins_pipe( pipe_slow );
3602 %}
3604 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{
3605 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3606 match(Set dst (MulVI src1 src2));
3607 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %}
3608 ins_encode %{
3609 bool vector256 = true;
3610 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3611 %}
3612 ins_pipe( pipe_slow );
3613 %}
3615 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{
3616 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3617 match(Set dst (MulVI src (LoadVector mem)));
3618 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %}
3619 ins_encode %{
3620 bool vector256 = true;
3621 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3622 %}
3623 ins_pipe( pipe_slow );
3624 %}
3626 // Floats vector mul
3627 instruct vmul2F(vecD dst, vecD src) %{
3628 predicate(n->as_Vector()->length() == 2);
3629 match(Set dst (MulVF dst src));
3630 format %{ "mulps $dst,$src\t! mul packed2F" %}
3631 ins_encode %{
3632 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
3633 %}
3634 ins_pipe( pipe_slow );
3635 %}
3637 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{
3638 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3639 match(Set dst (MulVF src1 src2));
3640 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %}
3641 ins_encode %{
3642 bool vector256 = false;
3643 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3644 %}
3645 ins_pipe( pipe_slow );
3646 %}
3648 instruct vmul4F(vecX dst, vecX src) %{
3649 predicate(n->as_Vector()->length() == 4);
3650 match(Set dst (MulVF dst src));
3651 format %{ "mulps $dst,$src\t! mul packed4F" %}
3652 ins_encode %{
3653 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
3654 %}
3655 ins_pipe( pipe_slow );
3656 %}
3658 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
3659 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3660 match(Set dst (MulVF src1 src2));
3661 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %}
3662 ins_encode %{
3663 bool vector256 = false;
3664 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3665 %}
3666 ins_pipe( pipe_slow );
3667 %}
3669 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{
3670 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3671 match(Set dst (MulVF src (LoadVector mem)));
3672 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %}
3673 ins_encode %{
3674 bool vector256 = false;
3675 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3676 %}
3677 ins_pipe( pipe_slow );
3678 %}
3680 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{
3681 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3682 match(Set dst (MulVF src1 src2));
3683 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %}
3684 ins_encode %{
3685 bool vector256 = true;
3686 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3687 %}
3688 ins_pipe( pipe_slow );
3689 %}
3691 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{
3692 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3693 match(Set dst (MulVF src (LoadVector mem)));
3694 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %}
3695 ins_encode %{
3696 bool vector256 = true;
3697 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3698 %}
3699 ins_pipe( pipe_slow );
3700 %}
3702 // Doubles vector mul
3703 instruct vmul2D(vecX dst, vecX src) %{
3704 predicate(n->as_Vector()->length() == 2);
3705 match(Set dst (MulVD dst src));
3706 format %{ "mulpd $dst,$src\t! mul packed2D" %}
3707 ins_encode %{
3708 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
3709 %}
3710 ins_pipe( pipe_slow );
3711 %}
3713 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
3714 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3715 match(Set dst (MulVD src1 src2));
3716 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %}
3717 ins_encode %{
3718 bool vector256 = false;
3719 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3720 %}
3721 ins_pipe( pipe_slow );
3722 %}
3724 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{
3725 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3726 match(Set dst (MulVD src (LoadVector mem)));
3727 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %}
3728 ins_encode %{
3729 bool vector256 = false;
3730 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3731 %}
3732 ins_pipe( pipe_slow );
3733 %}
3735 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{
3736 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3737 match(Set dst (MulVD src1 src2));
3738 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %}
3739 ins_encode %{
3740 bool vector256 = true;
3741 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3742 %}
3743 ins_pipe( pipe_slow );
3744 %}
3746 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{
3747 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3748 match(Set dst (MulVD src (LoadVector mem)));
3749 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %}
3750 ins_encode %{
3751 bool vector256 = true;
3752 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3753 %}
3754 ins_pipe( pipe_slow );
3755 %}
3757 // --------------------------------- DIV --------------------------------------
3759 // Floats vector div
3760 instruct vdiv2F(vecD dst, vecD src) %{
3761 predicate(n->as_Vector()->length() == 2);
3762 match(Set dst (DivVF dst src));
3763 format %{ "divps $dst,$src\t! div packed2F" %}
3764 ins_encode %{
3765 __ divps($dst$$XMMRegister, $src$$XMMRegister);
3766 %}
3767 ins_pipe( pipe_slow );
3768 %}
3770 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{
3771 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3772 match(Set dst (DivVF src1 src2));
3773 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %}
3774 ins_encode %{
3775 bool vector256 = false;
3776 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3777 %}
3778 ins_pipe( pipe_slow );
3779 %}
3781 instruct vdiv4F(vecX dst, vecX src) %{
3782 predicate(n->as_Vector()->length() == 4);
3783 match(Set dst (DivVF dst src));
3784 format %{ "divps $dst,$src\t! div packed4F" %}
3785 ins_encode %{
3786 __ divps($dst$$XMMRegister, $src$$XMMRegister);
3787 %}
3788 ins_pipe( pipe_slow );
3789 %}
3791 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{
3792 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3793 match(Set dst (DivVF src1 src2));
3794 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %}
3795 ins_encode %{
3796 bool vector256 = false;
3797 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3798 %}
3799 ins_pipe( pipe_slow );
3800 %}
3802 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{
3803 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3804 match(Set dst (DivVF src (LoadVector mem)));
3805 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %}
3806 ins_encode %{
3807 bool vector256 = false;
3808 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3809 %}
3810 ins_pipe( pipe_slow );
3811 %}
3813 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{
3814 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3815 match(Set dst (DivVF src1 src2));
3816 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %}
3817 ins_encode %{
3818 bool vector256 = true;
3819 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3820 %}
3821 ins_pipe( pipe_slow );
3822 %}
3824 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{
3825 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3826 match(Set dst (DivVF src (LoadVector mem)));
3827 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %}
3828 ins_encode %{
3829 bool vector256 = true;
3830 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3831 %}
3832 ins_pipe( pipe_slow );
3833 %}
3835 // Doubles vector div
3836 instruct vdiv2D(vecX dst, vecX src) %{
3837 predicate(n->as_Vector()->length() == 2);
3838 match(Set dst (DivVD dst src));
3839 format %{ "divpd $dst,$src\t! div packed2D" %}
3840 ins_encode %{
3841 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
3842 %}
3843 ins_pipe( pipe_slow );
3844 %}
3846 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
3847 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3848 match(Set dst (DivVD src1 src2));
3849 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %}
3850 ins_encode %{
3851 bool vector256 = false;
3852 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3853 %}
3854 ins_pipe( pipe_slow );
3855 %}
3857 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{
3858 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3859 match(Set dst (DivVD src (LoadVector mem)));
3860 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %}
3861 ins_encode %{
3862 bool vector256 = false;
3863 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3864 %}
3865 ins_pipe( pipe_slow );
3866 %}
3868 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{
3869 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3870 match(Set dst (DivVD src1 src2));
3871 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %}
3872 ins_encode %{
3873 bool vector256 = true;
3874 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3875 %}
3876 ins_pipe( pipe_slow );
3877 %}
3879 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{
3880 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3881 match(Set dst (DivVD src (LoadVector mem)));
3882 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %}
3883 ins_encode %{
3884 bool vector256 = true;
3885 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3886 %}
3887 ins_pipe( pipe_slow );
3888 %}
3890 // ------------------------------ Shift ---------------------------------------
3892 // Left and right shift count vectors are the same on x86
3893 // (only lowest bits of xmm reg are used for count).
3894 instruct vshiftcnt(vecS dst, rRegI cnt) %{
3895 match(Set dst (LShiftCntV cnt));
3896 match(Set dst (RShiftCntV cnt));
3897 format %{ "movd $dst,$cnt\t! load shift count" %}
3898 ins_encode %{
3899 __ movdl($dst$$XMMRegister, $cnt$$Register);
3900 %}
3901 ins_pipe( pipe_slow );
3902 %}
3904 // ------------------------------ LeftShift -----------------------------------
3906 // Shorts/Chars vector left shift
3907 instruct vsll2S(vecS dst, vecS shift) %{
3908 predicate(n->as_Vector()->length() == 2);
3909 match(Set dst (LShiftVS dst shift));
3910 format %{ "psllw $dst,$shift\t! left shift packed2S" %}
3911 ins_encode %{
3912 __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
3913 %}
3914 ins_pipe( pipe_slow );
3915 %}
3917 instruct vsll2S_imm(vecS dst, immI8 shift) %{
3918 predicate(n->as_Vector()->length() == 2);
3919 match(Set dst (LShiftVS dst shift));
3920 format %{ "psllw $dst,$shift\t! left shift packed2S" %}
3921 ins_encode %{
3922 __ psllw($dst$$XMMRegister, (int)$shift$$constant);
3923 %}
3924 ins_pipe( pipe_slow );
3925 %}
3927 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{
3928 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3929 match(Set dst (LShiftVS src shift));
3930 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
3931 ins_encode %{
3932 bool vector256 = false;
3933 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3934 %}
3935 ins_pipe( pipe_slow );
3936 %}
3938 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
3939 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3940 match(Set dst (LShiftVS src shift));
3941 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
3942 ins_encode %{
3943 bool vector256 = false;
3944 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
3945 %}
3946 ins_pipe( pipe_slow );
3947 %}
3949 instruct vsll4S(vecD dst, vecS shift) %{
3950 predicate(n->as_Vector()->length() == 4);
3951 match(Set dst (LShiftVS dst shift));
3952 format %{ "psllw $dst,$shift\t! left shift packed4S" %}
3953 ins_encode %{
3954 __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
3955 %}
3956 ins_pipe( pipe_slow );
3957 %}
3959 instruct vsll4S_imm(vecD dst, immI8 shift) %{
3960 predicate(n->as_Vector()->length() == 4);
3961 match(Set dst (LShiftVS dst shift));
3962 format %{ "psllw $dst,$shift\t! left shift packed4S" %}
3963 ins_encode %{
3964 __ psllw($dst$$XMMRegister, (int)$shift$$constant);
3965 %}
3966 ins_pipe( pipe_slow );
3967 %}
3969 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{
3970 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3971 match(Set dst (LShiftVS src shift));
3972 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
3973 ins_encode %{
3974 bool vector256 = false;
3975 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3976 %}
3977 ins_pipe( pipe_slow );
3978 %}
3980 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
3981 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3982 match(Set dst (LShiftVS src shift));
3983 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
3984 ins_encode %{
3985 bool vector256 = false;
3986 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
3987 %}
3988 ins_pipe( pipe_slow );
3989 %}
3991 instruct vsll8S(vecX dst, vecS shift) %{
3992 predicate(n->as_Vector()->length() == 8);
3993 match(Set dst (LShiftVS dst shift));
3994 format %{ "psllw $dst,$shift\t! left shift packed8S" %}
3995 ins_encode %{
3996 __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
3997 %}
3998 ins_pipe( pipe_slow );
3999 %}
4001 instruct vsll8S_imm(vecX dst, immI8 shift) %{
4002 predicate(n->as_Vector()->length() == 8);
4003 match(Set dst (LShiftVS dst shift));
4004 format %{ "psllw $dst,$shift\t! left shift packed8S" %}
4005 ins_encode %{
4006 __ psllw($dst$$XMMRegister, (int)$shift$$constant);
4007 %}
4008 ins_pipe( pipe_slow );
4009 %}
4011 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{
4012 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4013 match(Set dst (LShiftVS src shift));
4014 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
4015 ins_encode %{
4016 bool vector256 = false;
4017 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4018 %}
4019 ins_pipe( pipe_slow );
4020 %}
4022 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
4023 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4024 match(Set dst (LShiftVS src shift));
4025 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
4026 ins_encode %{
4027 bool vector256 = false;
4028 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4029 %}
4030 ins_pipe( pipe_slow );
4031 %}
4033 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{
4034 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4035 match(Set dst (LShiftVS src shift));
4036 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
4037 ins_encode %{
4038 bool vector256 = true;
4039 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4040 %}
4041 ins_pipe( pipe_slow );
4042 %}
4044 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
4045 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4046 match(Set dst (LShiftVS src shift));
4047 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
4048 ins_encode %{
4049 bool vector256 = true;
4050 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4051 %}
4052 ins_pipe( pipe_slow );
4053 %}
4055 // Integers vector left shift
4056 instruct vsll2I(vecD dst, vecS shift) %{
4057 predicate(n->as_Vector()->length() == 2);
4058 match(Set dst (LShiftVI dst shift));
4059 format %{ "pslld $dst,$shift\t! left shift packed2I" %}
4060 ins_encode %{
4061 __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
4062 %}
4063 ins_pipe( pipe_slow );
4064 %}
4066 instruct vsll2I_imm(vecD dst, immI8 shift) %{
4067 predicate(n->as_Vector()->length() == 2);
4068 match(Set dst (LShiftVI dst shift));
4069 format %{ "pslld $dst,$shift\t! left shift packed2I" %}
4070 ins_encode %{
4071 __ pslld($dst$$XMMRegister, (int)$shift$$constant);
4072 %}
4073 ins_pipe( pipe_slow );
4074 %}
4076 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{
4077 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4078 match(Set dst (LShiftVI src shift));
4079 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %}
4080 ins_encode %{
4081 bool vector256 = false;
4082 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4083 %}
4084 ins_pipe( pipe_slow );
4085 %}
4087 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
4088 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4089 match(Set dst (LShiftVI src shift));
4090 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %}
4091 ins_encode %{
4092 bool vector256 = false;
4093 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4094 %}
4095 ins_pipe( pipe_slow );
4096 %}
4098 instruct vsll4I(vecX dst, vecS shift) %{
4099 predicate(n->as_Vector()->length() == 4);
4100 match(Set dst (LShiftVI dst shift));
4101 format %{ "pslld $dst,$shift\t! left shift packed4I" %}
4102 ins_encode %{
4103 __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
4104 %}
4105 ins_pipe( pipe_slow );
4106 %}
4108 instruct vsll4I_imm(vecX dst, immI8 shift) %{
4109 predicate(n->as_Vector()->length() == 4);
4110 match(Set dst (LShiftVI dst shift));
4111 format %{ "pslld $dst,$shift\t! left shift packed4I" %}
4112 ins_encode %{
4113 __ pslld($dst$$XMMRegister, (int)$shift$$constant);
4114 %}
4115 ins_pipe( pipe_slow );
4116 %}
4118 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{
4119 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4120 match(Set dst (LShiftVI src shift));
4121 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %}
4122 ins_encode %{
4123 bool vector256 = false;
4124 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4125 %}
4126 ins_pipe( pipe_slow );
4127 %}
4129 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
4130 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4131 match(Set dst (LShiftVI src shift));
4132 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %}
4133 ins_encode %{
4134 bool vector256 = false;
4135 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4136 %}
4137 ins_pipe( pipe_slow );
4138 %}
4140 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{
4141 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4142 match(Set dst (LShiftVI src shift));
4143 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %}
4144 ins_encode %{
4145 bool vector256 = true;
4146 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4147 %}
4148 ins_pipe( pipe_slow );
4149 %}
4151 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
4152 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4153 match(Set dst (LShiftVI src shift));
4154 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %}
4155 ins_encode %{
4156 bool vector256 = true;
4157 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4158 %}
4159 ins_pipe( pipe_slow );
4160 %}
4162 // Longs vector left shift
4163 instruct vsll2L(vecX dst, vecS shift) %{
4164 predicate(n->as_Vector()->length() == 2);
4165 match(Set dst (LShiftVL dst shift));
4166 format %{ "psllq $dst,$shift\t! left shift packed2L" %}
4167 ins_encode %{
4168 __ psllq($dst$$XMMRegister, $shift$$XMMRegister);
4169 %}
4170 ins_pipe( pipe_slow );
4171 %}
4173 instruct vsll2L_imm(vecX dst, immI8 shift) %{
4174 predicate(n->as_Vector()->length() == 2);
4175 match(Set dst (LShiftVL dst shift));
4176 format %{ "psllq $dst,$shift\t! left shift packed2L" %}
4177 ins_encode %{
4178 __ psllq($dst$$XMMRegister, (int)$shift$$constant);
4179 %}
4180 ins_pipe( pipe_slow );
4181 %}
4183 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{
4184 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4185 match(Set dst (LShiftVL src shift));
4186 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %}
4187 ins_encode %{
4188 bool vector256 = false;
4189 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4190 %}
4191 ins_pipe( pipe_slow );
4192 %}
4194 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
4195 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4196 match(Set dst (LShiftVL src shift));
4197 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %}
4198 ins_encode %{
4199 bool vector256 = false;
4200 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4201 %}
4202 ins_pipe( pipe_slow );
4203 %}
4205 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{
4206 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4207 match(Set dst (LShiftVL src shift));
4208 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %}
4209 ins_encode %{
4210 bool vector256 = true;
4211 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4212 %}
4213 ins_pipe( pipe_slow );
4214 %}
4216 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
4217 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4218 match(Set dst (LShiftVL src shift));
4219 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %}
4220 ins_encode %{
4221 bool vector256 = true;
4222 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4223 %}
4224 ins_pipe( pipe_slow );
4225 %}
4227 // ----------------------- LogicalRightShift -----------------------------------
4229 // Shorts vector logical right shift produces incorrect Java result
4230 // for negative data because java code convert short value into int with
4231 // sign extension before a shift. But char vectors are fine since chars are
4232 // unsigned values.
4234 instruct vsrl2S(vecS dst, vecS shift) %{
4235 predicate(n->as_Vector()->length() == 2);
4236 match(Set dst (URShiftVS dst shift));
4237 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %}
4238 ins_encode %{
4239 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
4240 %}
4241 ins_pipe( pipe_slow );
4242 %}
4244 instruct vsrl2S_imm(vecS dst, immI8 shift) %{
4245 predicate(n->as_Vector()->length() == 2);
4246 match(Set dst (URShiftVS dst shift));
4247 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %}
4248 ins_encode %{
4249 __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
4250 %}
4251 ins_pipe( pipe_slow );
4252 %}
4254 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{
4255 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4256 match(Set dst (URShiftVS src shift));
4257 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
4258 ins_encode %{
4259 bool vector256 = false;
4260 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4261 %}
4262 ins_pipe( pipe_slow );
4263 %}
4265 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
4266 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4267 match(Set dst (URShiftVS src shift));
4268 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
4269 ins_encode %{
4270 bool vector256 = false;
4271 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4272 %}
4273 ins_pipe( pipe_slow );
4274 %}
4276 instruct vsrl4S(vecD dst, vecS shift) %{
4277 predicate(n->as_Vector()->length() == 4);
4278 match(Set dst (URShiftVS dst shift));
4279 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %}
4280 ins_encode %{
4281 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
4282 %}
4283 ins_pipe( pipe_slow );
4284 %}
4286 instruct vsrl4S_imm(vecD dst, immI8 shift) %{
4287 predicate(n->as_Vector()->length() == 4);
4288 match(Set dst (URShiftVS dst shift));
4289 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %}
4290 ins_encode %{
4291 __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
4292 %}
4293 ins_pipe( pipe_slow );
4294 %}
4296 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{
4297 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4298 match(Set dst (URShiftVS src shift));
4299 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
4300 ins_encode %{
4301 bool vector256 = false;
4302 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4303 %}
4304 ins_pipe( pipe_slow );
4305 %}
4307 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
4308 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4309 match(Set dst (URShiftVS src shift));
4310 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
4311 ins_encode %{
4312 bool vector256 = false;
4313 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4314 %}
4315 ins_pipe( pipe_slow );
4316 %}
4318 instruct vsrl8S(vecX dst, vecS shift) %{
4319 predicate(n->as_Vector()->length() == 8);
4320 match(Set dst (URShiftVS dst shift));
4321 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %}
4322 ins_encode %{
4323 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
4324 %}
4325 ins_pipe( pipe_slow );
4326 %}
4328 instruct vsrl8S_imm(vecX dst, immI8 shift) %{
4329 predicate(n->as_Vector()->length() == 8);
4330 match(Set dst (URShiftVS dst shift));
4331 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %}
4332 ins_encode %{
4333 __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
4334 %}
4335 ins_pipe( pipe_slow );
4336 %}
4338 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{
4339 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4340 match(Set dst (URShiftVS src shift));
4341 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
4342 ins_encode %{
4343 bool vector256 = false;
4344 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4345 %}
4346 ins_pipe( pipe_slow );
4347 %}
4349 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
4350 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4351 match(Set dst (URShiftVS src shift));
4352 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
4353 ins_encode %{
4354 bool vector256 = false;
4355 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4356 %}
4357 ins_pipe( pipe_slow );
4358 %}
4360 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{
4361 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4362 match(Set dst (URShiftVS src shift));
4363 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
4364 ins_encode %{
4365 bool vector256 = true;
4366 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4367 %}
4368 ins_pipe( pipe_slow );
4369 %}
4371 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
4372 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4373 match(Set dst (URShiftVS src shift));
4374 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
4375 ins_encode %{
4376 bool vector256 = true;
4377 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4378 %}
4379 ins_pipe( pipe_slow );
4380 %}
4382 // Integers vector logical right shift
4383 instruct vsrl2I(vecD dst, vecS shift) %{
4384 predicate(n->as_Vector()->length() == 2);
4385 match(Set dst (URShiftVI dst shift));
4386 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %}
4387 ins_encode %{
4388 __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
4389 %}
4390 ins_pipe( pipe_slow );
4391 %}
4393 instruct vsrl2I_imm(vecD dst, immI8 shift) %{
4394 predicate(n->as_Vector()->length() == 2);
4395 match(Set dst (URShiftVI dst shift));
4396 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %}
4397 ins_encode %{
4398 __ psrld($dst$$XMMRegister, (int)$shift$$constant);
4399 %}
4400 ins_pipe( pipe_slow );
4401 %}
4403 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{
4404 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4405 match(Set dst (URShiftVI src shift));
4406 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %}
4407 ins_encode %{
4408 bool vector256 = false;
4409 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4410 %}
4411 ins_pipe( pipe_slow );
4412 %}
4414 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
4415 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4416 match(Set dst (URShiftVI src shift));
4417 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %}
4418 ins_encode %{
4419 bool vector256 = false;
4420 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4421 %}
4422 ins_pipe( pipe_slow );
4423 %}
4425 instruct vsrl4I(vecX dst, vecS shift) %{
4426 predicate(n->as_Vector()->length() == 4);
4427 match(Set dst (URShiftVI dst shift));
4428 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %}
4429 ins_encode %{
4430 __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
4431 %}
4432 ins_pipe( pipe_slow );
4433 %}
4435 instruct vsrl4I_imm(vecX dst, immI8 shift) %{
4436 predicate(n->as_Vector()->length() == 4);
4437 match(Set dst (URShiftVI dst shift));
4438 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %}
4439 ins_encode %{
4440 __ psrld($dst$$XMMRegister, (int)$shift$$constant);
4441 %}
4442 ins_pipe( pipe_slow );
4443 %}
4445 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{
4446 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4447 match(Set dst (URShiftVI src shift));
4448 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %}
4449 ins_encode %{
4450 bool vector256 = false;
4451 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4452 %}
4453 ins_pipe( pipe_slow );
4454 %}
4456 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
4457 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4458 match(Set dst (URShiftVI src shift));
4459 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %}
4460 ins_encode %{
4461 bool vector256 = false;
4462 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4463 %}
4464 ins_pipe( pipe_slow );
4465 %}
4467 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{
4468 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4469 match(Set dst (URShiftVI src shift));
4470 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %}
4471 ins_encode %{
4472 bool vector256 = true;
4473 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4474 %}
4475 ins_pipe( pipe_slow );
4476 %}
4478 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
4479 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4480 match(Set dst (URShiftVI src shift));
4481 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %}
4482 ins_encode %{
4483 bool vector256 = true;
4484 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4485 %}
4486 ins_pipe( pipe_slow );
4487 %}
4489 // Longs vector logical right shift
4490 instruct vsrl2L(vecX dst, vecS shift) %{
4491 predicate(n->as_Vector()->length() == 2);
4492 match(Set dst (URShiftVL dst shift));
4493 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %}
4494 ins_encode %{
4495 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
4496 %}
4497 ins_pipe( pipe_slow );
4498 %}
4500 instruct vsrl2L_imm(vecX dst, immI8 shift) %{
4501 predicate(n->as_Vector()->length() == 2);
4502 match(Set dst (URShiftVL dst shift));
4503 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %}
4504 ins_encode %{
4505 __ psrlq($dst$$XMMRegister, (int)$shift$$constant);
4506 %}
4507 ins_pipe( pipe_slow );
4508 %}
4510 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{
4511 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4512 match(Set dst (URShiftVL src shift));
4513 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %}
4514 ins_encode %{
4515 bool vector256 = false;
4516 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4517 %}
4518 ins_pipe( pipe_slow );
4519 %}
4521 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
4522 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4523 match(Set dst (URShiftVL src shift));
4524 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %}
4525 ins_encode %{
4526 bool vector256 = false;
4527 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4528 %}
4529 ins_pipe( pipe_slow );
4530 %}
4532 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{
4533 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4534 match(Set dst (URShiftVL src shift));
4535 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %}
4536 ins_encode %{
4537 bool vector256 = true;
4538 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4539 %}
4540 ins_pipe( pipe_slow );
4541 %}
4543 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
4544 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4545 match(Set dst (URShiftVL src shift));
4546 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %}
4547 ins_encode %{
4548 bool vector256 = true;
4549 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4550 %}
4551 ins_pipe( pipe_slow );
4552 %}
4554 // ------------------- ArithmeticRightShift -----------------------------------
4556 // Shorts/Chars vector arithmetic right shift
4557 instruct vsra2S(vecS dst, vecS shift) %{
4558 predicate(n->as_Vector()->length() == 2);
4559 match(Set dst (RShiftVS dst shift));
4560 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %}
4561 ins_encode %{
4562 __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
4563 %}
4564 ins_pipe( pipe_slow );
4565 %}
4567 instruct vsra2S_imm(vecS dst, immI8 shift) %{
4568 predicate(n->as_Vector()->length() == 2);
4569 match(Set dst (RShiftVS dst shift));
4570 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %}
4571 ins_encode %{
4572 __ psraw($dst$$XMMRegister, (int)$shift$$constant);
4573 %}
4574 ins_pipe( pipe_slow );
4575 %}
4577 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{
4578 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4579 match(Set dst (RShiftVS src shift));
4580 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
4581 ins_encode %{
4582 bool vector256 = false;
4583 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4584 %}
4585 ins_pipe( pipe_slow );
4586 %}
4588 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
4589 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4590 match(Set dst (RShiftVS src shift));
4591 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
4592 ins_encode %{
4593 bool vector256 = false;
4594 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4595 %}
4596 ins_pipe( pipe_slow );
4597 %}
4599 instruct vsra4S(vecD dst, vecS shift) %{
4600 predicate(n->as_Vector()->length() == 4);
4601 match(Set dst (RShiftVS dst shift));
4602 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %}
4603 ins_encode %{
4604 __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
4605 %}
4606 ins_pipe( pipe_slow );
4607 %}
4609 instruct vsra4S_imm(vecD dst, immI8 shift) %{
4610 predicate(n->as_Vector()->length() == 4);
4611 match(Set dst (RShiftVS dst shift));
4612 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %}
4613 ins_encode %{
4614 __ psraw($dst$$XMMRegister, (int)$shift$$constant);
4615 %}
4616 ins_pipe( pipe_slow );
4617 %}
4619 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{
4620 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4621 match(Set dst (RShiftVS src shift));
4622 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
4623 ins_encode %{
4624 bool vector256 = false;
4625 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4626 %}
4627 ins_pipe( pipe_slow );
4628 %}
4630 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
4631 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4632 match(Set dst (RShiftVS src shift));
4633 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
4634 ins_encode %{
4635 bool vector256 = false;
4636 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4637 %}
4638 ins_pipe( pipe_slow );
4639 %}
4641 instruct vsra8S(vecX dst, vecS shift) %{
4642 predicate(n->as_Vector()->length() == 8);
4643 match(Set dst (RShiftVS dst shift));
4644 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %}
4645 ins_encode %{
4646 __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
4647 %}
4648 ins_pipe( pipe_slow );
4649 %}
4651 instruct vsra8S_imm(vecX dst, immI8 shift) %{
4652 predicate(n->as_Vector()->length() == 8);
4653 match(Set dst (RShiftVS dst shift));
4654 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %}
4655 ins_encode %{
4656 __ psraw($dst$$XMMRegister, (int)$shift$$constant);
4657 %}
4658 ins_pipe( pipe_slow );
4659 %}
4661 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{
4662 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4663 match(Set dst (RShiftVS src shift));
4664 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
4665 ins_encode %{
4666 bool vector256 = false;
4667 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4668 %}
4669 ins_pipe( pipe_slow );
4670 %}
4672 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
4673 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4674 match(Set dst (RShiftVS src shift));
4675 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
4676 ins_encode %{
4677 bool vector256 = false;
4678 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4679 %}
4680 ins_pipe( pipe_slow );
4681 %}
4683 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{
4684 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4685 match(Set dst (RShiftVS src shift));
4686 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
4687 ins_encode %{
4688 bool vector256 = true;
4689 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4690 %}
4691 ins_pipe( pipe_slow );
4692 %}
4694 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
4695 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4696 match(Set dst (RShiftVS src shift));
4697 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
4698 ins_encode %{
4699 bool vector256 = true;
4700 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4701 %}
4702 ins_pipe( pipe_slow );
4703 %}
4705 // Integers vector arithmetic right shift
4706 instruct vsra2I(vecD dst, vecS shift) %{
4707 predicate(n->as_Vector()->length() == 2);
4708 match(Set dst (RShiftVI dst shift));
4709 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %}
4710 ins_encode %{
4711 __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
4712 %}
4713 ins_pipe( pipe_slow );
4714 %}
4716 instruct vsra2I_imm(vecD dst, immI8 shift) %{
4717 predicate(n->as_Vector()->length() == 2);
4718 match(Set dst (RShiftVI dst shift));
4719 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %}
4720 ins_encode %{
4721 __ psrad($dst$$XMMRegister, (int)$shift$$constant);
4722 %}
4723 ins_pipe( pipe_slow );
4724 %}
4726 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{
4727 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4728 match(Set dst (RShiftVI src shift));
4729 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %}
4730 ins_encode %{
4731 bool vector256 = false;
4732 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4733 %}
4734 ins_pipe( pipe_slow );
4735 %}
4737 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
4738 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4739 match(Set dst (RShiftVI src shift));
4740 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %}
4741 ins_encode %{
4742 bool vector256 = false;
4743 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4744 %}
4745 ins_pipe( pipe_slow );
4746 %}
4748 instruct vsra4I(vecX dst, vecS shift) %{
4749 predicate(n->as_Vector()->length() == 4);
4750 match(Set dst (RShiftVI dst shift));
4751 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %}
4752 ins_encode %{
4753 __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
4754 %}
4755 ins_pipe( pipe_slow );
4756 %}
4758 instruct vsra4I_imm(vecX dst, immI8 shift) %{
4759 predicate(n->as_Vector()->length() == 4);
4760 match(Set dst (RShiftVI dst shift));
4761 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %}
4762 ins_encode %{
4763 __ psrad($dst$$XMMRegister, (int)$shift$$constant);
4764 %}
4765 ins_pipe( pipe_slow );
4766 %}
4768 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{
4769 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4770 match(Set dst (RShiftVI src shift));
4771 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %}
4772 ins_encode %{
4773 bool vector256 = false;
4774 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4775 %}
4776 ins_pipe( pipe_slow );
4777 %}
4779 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
4780 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4781 match(Set dst (RShiftVI src shift));
4782 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %}
4783 ins_encode %{
4784 bool vector256 = false;
4785 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4786 %}
4787 ins_pipe( pipe_slow );
4788 %}
4790 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{
4791 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4792 match(Set dst (RShiftVI src shift));
4793 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %}
4794 ins_encode %{
4795 bool vector256 = true;
4796 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4797 %}
4798 ins_pipe( pipe_slow );
4799 %}
4801 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
4802 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4803 match(Set dst (RShiftVI src shift));
4804 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %}
4805 ins_encode %{
4806 bool vector256 = true;
4807 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4808 %}
4809 ins_pipe( pipe_slow );
4810 %}
4812 // There are no longs vector arithmetic right shift instructions.
4815 // --------------------------------- AND --------------------------------------
4817 instruct vand4B(vecS dst, vecS src) %{
4818 predicate(n->as_Vector()->length_in_bytes() == 4);
4819 match(Set dst (AndV dst src));
4820 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %}
4821 ins_encode %{
4822 __ pand($dst$$XMMRegister, $src$$XMMRegister);
4823 %}
4824 ins_pipe( pipe_slow );
4825 %}
4827 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{
4828 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
4829 match(Set dst (AndV src1 src2));
4830 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %}
4831 ins_encode %{
4832 bool vector256 = false;
4833 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4834 %}
4835 ins_pipe( pipe_slow );
4836 %}
4838 instruct vand8B(vecD dst, vecD src) %{
4839 predicate(n->as_Vector()->length_in_bytes() == 8);
4840 match(Set dst (AndV dst src));
4841 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %}
4842 ins_encode %{
4843 __ pand($dst$$XMMRegister, $src$$XMMRegister);
4844 %}
4845 ins_pipe( pipe_slow );
4846 %}
4848 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{
4849 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
4850 match(Set dst (AndV src1 src2));
4851 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %}
4852 ins_encode %{
4853 bool vector256 = false;
4854 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4855 %}
4856 ins_pipe( pipe_slow );
4857 %}
4859 instruct vand16B(vecX dst, vecX src) %{
4860 predicate(n->as_Vector()->length_in_bytes() == 16);
4861 match(Set dst (AndV dst src));
4862 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %}
4863 ins_encode %{
4864 __ pand($dst$$XMMRegister, $src$$XMMRegister);
4865 %}
4866 ins_pipe( pipe_slow );
4867 %}
4869 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{
4870 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4871 match(Set dst (AndV src1 src2));
4872 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %}
4873 ins_encode %{
4874 bool vector256 = false;
4875 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4876 %}
4877 ins_pipe( pipe_slow );
4878 %}
4880 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{
4881 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4882 match(Set dst (AndV src (LoadVector mem)));
4883 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %}
4884 ins_encode %{
4885 bool vector256 = false;
4886 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4887 %}
4888 ins_pipe( pipe_slow );
4889 %}
4891 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{
4892 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4893 match(Set dst (AndV src1 src2));
4894 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %}
4895 ins_encode %{
4896 bool vector256 = true;
4897 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4898 %}
4899 ins_pipe( pipe_slow );
4900 %}
4902 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{
4903 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4904 match(Set dst (AndV src (LoadVector mem)));
4905 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %}
4906 ins_encode %{
4907 bool vector256 = true;
4908 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4909 %}
4910 ins_pipe( pipe_slow );
4911 %}
4913 // --------------------------------- OR ---------------------------------------
4915 instruct vor4B(vecS dst, vecS src) %{
4916 predicate(n->as_Vector()->length_in_bytes() == 4);
4917 match(Set dst (OrV dst src));
4918 format %{ "por $dst,$src\t! or vectors (4 bytes)" %}
4919 ins_encode %{
4920 __ por($dst$$XMMRegister, $src$$XMMRegister);
4921 %}
4922 ins_pipe( pipe_slow );
4923 %}
4925 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{
4926 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
4927 match(Set dst (OrV src1 src2));
4928 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %}
4929 ins_encode %{
4930 bool vector256 = false;
4931 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4932 %}
4933 ins_pipe( pipe_slow );
4934 %}
4936 instruct vor8B(vecD dst, vecD src) %{
4937 predicate(n->as_Vector()->length_in_bytes() == 8);
4938 match(Set dst (OrV dst src));
4939 format %{ "por $dst,$src\t! or vectors (8 bytes)" %}
4940 ins_encode %{
4941 __ por($dst$$XMMRegister, $src$$XMMRegister);
4942 %}
4943 ins_pipe( pipe_slow );
4944 %}
4946 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{
4947 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
4948 match(Set dst (OrV src1 src2));
4949 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %}
4950 ins_encode %{
4951 bool vector256 = false;
4952 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4953 %}
4954 ins_pipe( pipe_slow );
4955 %}
4957 instruct vor16B(vecX dst, vecX src) %{
4958 predicate(n->as_Vector()->length_in_bytes() == 16);
4959 match(Set dst (OrV dst src));
4960 format %{ "por $dst,$src\t! or vectors (16 bytes)" %}
4961 ins_encode %{
4962 __ por($dst$$XMMRegister, $src$$XMMRegister);
4963 %}
4964 ins_pipe( pipe_slow );
4965 %}
4967 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{
4968 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4969 match(Set dst (OrV src1 src2));
4970 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %}
4971 ins_encode %{
4972 bool vector256 = false;
4973 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4974 %}
4975 ins_pipe( pipe_slow );
4976 %}
4978 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{
4979 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4980 match(Set dst (OrV src (LoadVector mem)));
4981 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %}
4982 ins_encode %{
4983 bool vector256 = false;
4984 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4985 %}
4986 ins_pipe( pipe_slow );
4987 %}
4989 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{
4990 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4991 match(Set dst (OrV src1 src2));
4992 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %}
4993 ins_encode %{
4994 bool vector256 = true;
4995 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4996 %}
4997 ins_pipe( pipe_slow );
4998 %}
5000 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{
5001 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
5002 match(Set dst (OrV src (LoadVector mem)));
5003 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %}
5004 ins_encode %{
5005 bool vector256 = true;
5006 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
5007 %}
5008 ins_pipe( pipe_slow );
5009 %}
5011 // --------------------------------- XOR --------------------------------------
5013 instruct vxor4B(vecS dst, vecS src) %{
5014 predicate(n->as_Vector()->length_in_bytes() == 4);
5015 match(Set dst (XorV dst src));
5016 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %}
5017 ins_encode %{
5018 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
5019 %}
5020 ins_pipe( pipe_slow );
5021 %}
5023 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{
5024 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
5025 match(Set dst (XorV src1 src2));
5026 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %}
5027 ins_encode %{
5028 bool vector256 = false;
5029 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5030 %}
5031 ins_pipe( pipe_slow );
5032 %}
5034 instruct vxor8B(vecD dst, vecD src) %{
5035 predicate(n->as_Vector()->length_in_bytes() == 8);
5036 match(Set dst (XorV dst src));
5037 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %}
5038 ins_encode %{
5039 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
5040 %}
5041 ins_pipe( pipe_slow );
5042 %}
5044 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{
5045 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
5046 match(Set dst (XorV src1 src2));
5047 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %}
5048 ins_encode %{
5049 bool vector256 = false;
5050 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5051 %}
5052 ins_pipe( pipe_slow );
5053 %}
5055 instruct vxor16B(vecX dst, vecX src) %{
5056 predicate(n->as_Vector()->length_in_bytes() == 16);
5057 match(Set dst (XorV dst src));
5058 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %}
5059 ins_encode %{
5060 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
5061 %}
5062 ins_pipe( pipe_slow );
5063 %}
5065 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{
5066 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
5067 match(Set dst (XorV src1 src2));
5068 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %}
5069 ins_encode %{
5070 bool vector256 = false;
5071 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5072 %}
5073 ins_pipe( pipe_slow );
5074 %}
5076 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{
5077 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
5078 match(Set dst (XorV src (LoadVector mem)));
5079 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %}
5080 ins_encode %{
5081 bool vector256 = false;
5082 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
5083 %}
5084 ins_pipe( pipe_slow );
5085 %}
5087 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{
5088 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
5089 match(Set dst (XorV src1 src2));
5090 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %}
5091 ins_encode %{
5092 bool vector256 = true;
5093 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5094 %}
5095 ins_pipe( pipe_slow );
5096 %}
5098 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{
5099 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
5100 match(Set dst (XorV src (LoadVector mem)));
5101 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %}
5102 ins_encode %{
5103 bool vector256 = true;
5104 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
5105 %}
5106 ins_pipe( pipe_slow );
5107 %}