22 // |
22 // |
23 // |
23 // |
24 |
24 |
25 // X86 Common Architecture Description File |
25 // X86 Common Architecture Description File |
26 |
26 |
|
27 //----------REGISTER DEFINITION BLOCK------------------------------------------ |
|
28 // This information is used by the matcher and the register allocator to |
|
29 // describe individual registers and classes of registers within the target |
|
30 // archtecture. |
|
31 |
|
32 register %{ |
|
33 //----------Architecture Description Register Definitions---------------------- |
|
34 // General Registers |
|
35 // "reg_def" name ( register save type, C convention save type, |
|
36 // ideal register type, encoding ); |
|
37 // Register Save Types: |
|
38 // |
|
39 // NS = No-Save: The register allocator assumes that these registers |
|
40 // can be used without saving upon entry to the method, & |
|
41 // that they do not need to be saved at call sites. |
|
42 // |
|
43 // SOC = Save-On-Call: The register allocator assumes that these registers |
|
44 // can be used without saving upon entry to the method, |
|
45 // but that they must be saved at call sites. |
|
46 // |
|
47 // SOE = Save-On-Entry: The register allocator assumes that these registers |
|
48 // must be saved before using them upon entry to the |
|
49 // method, but they do not need to be saved at call |
|
50 // sites. |
|
51 // |
|
52 // AS = Always-Save: The register allocator assumes that these registers |
|
53 // must be saved before using them upon entry to the |
|
54 // method, & that they must be saved at call sites. |
|
55 // |
|
56 // Ideal Register Type is used to determine how to save & restore a |
|
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get |
|
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. |
|
59 // |
|
60 // The encoding number is the actual bit-pattern placed into the opcodes. |
|
61 |
|
62 // XMM registers. 256-bit registers or 8 words each, labeled (a)-h. |
|
63 // Word a in each register holds a Float, words ab hold a Double. |
|
64 // The whole registers are used in SSE4.2 version intrinsics, |
|
65 // array copy stubs and superword operations (see UseSSE42Intrinsics, |
|
66 // UseXMMForArrayCopy and UseSuperword flags). |
|
67 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX). |
|
68 // Linux ABI: No register preserved across function calls |
|
69 // XMM0-XMM7 might hold parameters |
|
70 // Windows ABI: XMM6-XMM15 preserved across function calls |
|
71 // XMM0-XMM3 might hold parameters |
|
72 |
|
73 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); |
|
74 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()); |
|
75 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()); |
|
76 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()); |
|
77 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()); |
|
78 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next()); |
|
79 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next()->next()); |
|
80 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); |
|
81 |
|
82 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); |
|
83 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()); |
|
84 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()); |
|
85 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()); |
|
86 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()); |
|
87 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next()); |
|
88 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next()->next()); |
|
89 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); |
|
90 |
|
91 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); |
|
92 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()); |
|
93 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()); |
|
94 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()); |
|
95 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()); |
|
96 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next()); |
|
97 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next()->next()); |
|
98 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); |
|
99 |
|
100 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); |
|
101 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()); |
|
102 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()); |
|
103 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()); |
|
104 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()); |
|
105 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next()); |
|
106 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next()->next()); |
|
107 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); |
|
108 |
|
109 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); |
|
110 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()); |
|
111 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()); |
|
112 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()); |
|
113 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()); |
|
114 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next()); |
|
115 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next()->next()); |
|
116 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); |
|
117 |
|
118 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); |
|
119 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()); |
|
120 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()); |
|
121 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()); |
|
122 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()); |
|
123 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next()); |
|
124 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next()->next()); |
|
125 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); |
|
126 |
|
127 #ifdef _WIN64 |
|
128 |
|
129 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); |
|
130 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()); |
|
131 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()); |
|
132 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()); |
|
133 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()); |
|
134 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()); |
|
135 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()); |
|
136 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); |
|
137 |
|
138 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()); |
|
139 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()); |
|
140 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()); |
|
141 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()); |
|
142 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()); |
|
143 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()); |
|
144 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()); |
|
145 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); |
|
146 |
|
147 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()); |
|
148 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()); |
|
149 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()); |
|
150 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()); |
|
151 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()); |
|
152 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()); |
|
153 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()); |
|
154 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); |
|
155 |
|
156 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()); |
|
157 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()); |
|
158 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()); |
|
159 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()); |
|
160 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()); |
|
161 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()); |
|
162 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()); |
|
163 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); |
|
164 |
|
165 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()); |
|
166 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()); |
|
167 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()); |
|
168 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()); |
|
169 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()); |
|
170 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()); |
|
171 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()); |
|
172 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); |
|
173 |
|
174 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()); |
|
175 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()); |
|
176 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()); |
|
177 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()); |
|
178 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()); |
|
179 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()); |
|
180 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()); |
|
181 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); |
|
182 |
|
183 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()); |
|
184 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()); |
|
185 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()); |
|
186 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()); |
|
187 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()); |
|
188 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()); |
|
189 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()); |
|
190 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); |
|
191 |
|
192 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()); |
|
193 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()); |
|
194 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()); |
|
195 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()); |
|
196 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()); |
|
197 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()); |
|
198 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()); |
|
199 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); |
|
200 |
|
201 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()); |
|
202 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()); |
|
203 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()); |
|
204 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()); |
|
205 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()); |
|
206 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()); |
|
207 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()); |
|
208 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); |
|
209 |
|
210 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()); |
|
211 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()); |
|
212 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()); |
|
213 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()); |
|
214 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()); |
|
215 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()); |
|
216 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()); |
|
217 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); |
|
218 |
|
219 #else // _WIN64 |
|
220 |
|
221 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); |
|
222 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()); |
|
223 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()); |
|
224 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()); |
|
225 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()); |
|
226 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()); |
|
227 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()); |
|
228 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); |
|
229 |
|
230 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); |
|
231 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()); |
|
232 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()); |
|
233 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()); |
|
234 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()); |
|
235 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()); |
|
236 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()); |
|
237 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); |
|
238 |
|
239 #ifdef _LP64 |
|
240 |
|
241 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); |
|
242 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()); |
|
243 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()); |
|
244 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()); |
|
245 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()); |
|
246 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()); |
|
247 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()); |
|
248 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); |
|
249 |
|
250 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); |
|
251 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()); |
|
252 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()); |
|
253 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()); |
|
254 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()); |
|
255 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()); |
|
256 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()); |
|
257 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); |
|
258 |
|
259 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); |
|
260 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()); |
|
261 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()); |
|
262 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()); |
|
263 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()); |
|
264 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()); |
|
265 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()); |
|
266 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); |
|
267 |
|
268 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); |
|
269 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()); |
|
270 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()); |
|
271 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()); |
|
272 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()); |
|
273 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()); |
|
274 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()); |
|
275 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); |
|
276 |
|
277 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); |
|
278 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()); |
|
279 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()); |
|
280 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()); |
|
281 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()); |
|
282 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()); |
|
283 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()); |
|
284 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); |
|
285 |
|
286 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); |
|
287 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()); |
|
288 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()); |
|
289 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()); |
|
290 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()); |
|
291 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()); |
|
292 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()); |
|
293 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); |
|
294 |
|
295 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); |
|
296 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()); |
|
297 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()); |
|
298 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()); |
|
299 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()); |
|
300 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()); |
|
301 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()); |
|
302 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); |
|
303 |
|
304 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); |
|
305 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()); |
|
306 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()); |
|
307 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()); |
|
308 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()); |
|
309 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()); |
|
310 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()); |
|
311 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); |
|
312 |
|
313 #endif // _LP64 |
|
314 |
|
315 #endif // _WIN64 |
|
316 |
|
317 #ifdef _LP64 |
|
318 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); |
|
319 #else |
|
320 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); |
|
321 #endif // _LP64 |
|
322 |
|
323 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, |
|
324 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, |
|
325 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, |
|
326 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, |
|
327 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, |
|
328 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, |
|
329 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, |
|
330 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h |
|
331 #ifdef _LP64 |
|
332 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, |
|
333 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, |
|
334 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, |
|
335 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, |
|
336 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, |
|
337 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, |
|
338 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, |
|
339 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h |
|
340 #endif |
|
341 ); |
|
342 |
|
343 // flags allocation class should be last. |
|
344 alloc_class chunk2(RFLAGS); |
|
345 |
|
346 // Singleton class for condition codes |
|
347 reg_class int_flags(RFLAGS); |
|
348 |
|
349 // Class for all float registers |
|
350 reg_class float_reg(XMM0, |
|
351 XMM1, |
|
352 XMM2, |
|
353 XMM3, |
|
354 XMM4, |
|
355 XMM5, |
|
356 XMM6, |
|
357 XMM7 |
|
358 #ifdef _LP64 |
|
359 ,XMM8, |
|
360 XMM9, |
|
361 XMM10, |
|
362 XMM11, |
|
363 XMM12, |
|
364 XMM13, |
|
365 XMM14, |
|
366 XMM15 |
|
367 #endif |
|
368 ); |
|
369 |
|
370 // Class for all double registers |
|
371 reg_class double_reg(XMM0, XMM0b, |
|
372 XMM1, XMM1b, |
|
373 XMM2, XMM2b, |
|
374 XMM3, XMM3b, |
|
375 XMM4, XMM4b, |
|
376 XMM5, XMM5b, |
|
377 XMM6, XMM6b, |
|
378 XMM7, XMM7b |
|
379 #ifdef _LP64 |
|
380 ,XMM8, XMM8b, |
|
381 XMM9, XMM9b, |
|
382 XMM10, XMM10b, |
|
383 XMM11, XMM11b, |
|
384 XMM12, XMM12b, |
|
385 XMM13, XMM13b, |
|
386 XMM14, XMM14b, |
|
387 XMM15, XMM15b |
|
388 #endif |
|
389 ); |
|
390 |
|
391 // Class for all 32bit vector registers |
|
392 reg_class vectors_reg(XMM0, |
|
393 XMM1, |
|
394 XMM2, |
|
395 XMM3, |
|
396 XMM4, |
|
397 XMM5, |
|
398 XMM6, |
|
399 XMM7 |
|
400 #ifdef _LP64 |
|
401 ,XMM8, |
|
402 XMM9, |
|
403 XMM10, |
|
404 XMM11, |
|
405 XMM12, |
|
406 XMM13, |
|
407 XMM14, |
|
408 XMM15 |
|
409 #endif |
|
410 ); |
|
411 |
|
412 // Class for all 64bit vector registers |
|
413 reg_class vectord_reg(XMM0, XMM0b, |
|
414 XMM1, XMM1b, |
|
415 XMM2, XMM2b, |
|
416 XMM3, XMM3b, |
|
417 XMM4, XMM4b, |
|
418 XMM5, XMM5b, |
|
419 XMM6, XMM6b, |
|
420 XMM7, XMM7b |
|
421 #ifdef _LP64 |
|
422 ,XMM8, XMM8b, |
|
423 XMM9, XMM9b, |
|
424 XMM10, XMM10b, |
|
425 XMM11, XMM11b, |
|
426 XMM12, XMM12b, |
|
427 XMM13, XMM13b, |
|
428 XMM14, XMM14b, |
|
429 XMM15, XMM15b |
|
430 #endif |
|
431 ); |
|
432 |
|
433 // Class for all 128bit vector registers |
|
434 reg_class vectorx_reg(XMM0, XMM0b, XMM0c, XMM0d, |
|
435 XMM1, XMM1b, XMM1c, XMM1d, |
|
436 XMM2, XMM2b, XMM2c, XMM2d, |
|
437 XMM3, XMM3b, XMM3c, XMM3d, |
|
438 XMM4, XMM4b, XMM4c, XMM4d, |
|
439 XMM5, XMM5b, XMM5c, XMM5d, |
|
440 XMM6, XMM6b, XMM6c, XMM6d, |
|
441 XMM7, XMM7b, XMM7c, XMM7d |
|
442 #ifdef _LP64 |
|
443 ,XMM8, XMM8b, XMM8c, XMM8d, |
|
444 XMM9, XMM9b, XMM9c, XMM9d, |
|
445 XMM10, XMM10b, XMM10c, XMM10d, |
|
446 XMM11, XMM11b, XMM11c, XMM11d, |
|
447 XMM12, XMM12b, XMM12c, XMM12d, |
|
448 XMM13, XMM13b, XMM13c, XMM13d, |
|
449 XMM14, XMM14b, XMM14c, XMM14d, |
|
450 XMM15, XMM15b, XMM15c, XMM15d |
|
451 #endif |
|
452 ); |
|
453 |
|
454 // Class for all 256bit vector registers |
|
455 reg_class vectory_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, |
|
456 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, |
|
457 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, |
|
458 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, |
|
459 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, |
|
460 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, |
|
461 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, |
|
462 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h |
|
463 #ifdef _LP64 |
|
464 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, |
|
465 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, |
|
466 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, |
|
467 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, |
|
468 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, |
|
469 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, |
|
470 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, |
|
471 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h |
|
472 #endif |
|
473 ); |
|
474 |
|
475 %} |
|
476 |
27 source %{ |
477 source %{ |
28 // Float masks come from different places depending on platform. |
478 // Float masks come from different places depending on platform. |
29 #ifdef _LP64 |
479 #ifdef _LP64 |
30 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } |
480 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } |
31 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } |
481 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } |
36 static address float_signflip() { return (address)float_signflip_pool; } |
486 static address float_signflip() { return (address)float_signflip_pool; } |
37 static address double_signmask() { return (address)double_signmask_pool; } |
487 static address double_signmask() { return (address)double_signmask_pool; } |
38 static address double_signflip() { return (address)double_signflip_pool; } |
488 static address double_signflip() { return (address)double_signflip_pool; } |
39 #endif |
489 #endif |
40 |
490 |
|
491 // Map Types to machine register types |
|
492 const int Matcher::base2reg[Type::lastype] = { |
|
493 Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN, |
|
494 Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */ |
|
495 Op_VecS, Op_VecD, Op_VecX, Op_VecY, /* Vectors */ |
|
496 Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */ |
|
497 0, 0/*abio*/, |
|
498 Op_RegP /* Return address */, 0, /* the memories */ |
|
499 Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD, |
|
500 0 /*bottom*/ |
|
501 }; |
|
502 |
|
503 // Max vector size in bytes. 0 if not supported. |
|
504 const int Matcher::vector_width_in_bytes(BasicType bt) { |
|
505 assert(is_java_primitive(bt), "only primitive type vectors"); |
|
506 if (UseSSE < 2) return 0; |
|
507 // SSE2 supports 128bit vectors for all types. |
|
508 // AVX2 supports 256bit vectors for all types. |
|
509 int size = (UseAVX > 1) ? 32 : 16; |
|
510 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. |
|
511 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) |
|
512 size = 32; |
|
513 // Use flag to limit vector size. |
|
514 size = MIN2(size,(int)MaxVectorSize); |
|
515 // Minimum 2 values in vector (or 4 for bytes). |
|
516 switch (bt) { |
|
517 case T_DOUBLE: |
|
518 case T_LONG: |
|
519 if (size < 16) return 0; |
|
520 case T_FLOAT: |
|
521 case T_INT: |
|
522 if (size < 8) return 0; |
|
523 case T_BOOLEAN: |
|
524 case T_BYTE: |
|
525 case T_CHAR: |
|
526 case T_SHORT: |
|
527 if (size < 4) return 0; |
|
528 break; |
|
529 default: |
|
530 ShouldNotReachHere(); |
|
531 } |
|
532 return size; |
|
533 } |
|
534 |
|
535 // Limits on vector size (number of elements) loaded into vector. |
|
536 const int Matcher::max_vector_size(const BasicType bt) { |
|
537 return vector_width_in_bytes(bt)/type2aelembytes(bt); |
|
538 } |
|
539 const int Matcher::min_vector_size(const BasicType bt) { |
|
540 int max_size = max_vector_size(bt); |
|
541 // Min size which can be loaded into vector is 4 bytes. |
|
542 int size = (type2aelembytes(bt) == 1) ? 4 : 2; |
|
543 return MIN2(size,max_size); |
|
544 } |
|
545 |
|
546 // Vector ideal reg corresponding to specidied size in bytes |
|
547 const int Matcher::vector_ideal_reg(int size) { |
|
548 assert(MaxVectorSize >= size, ""); |
|
549 switch(size) { |
|
550 case 4: return Op_VecS; |
|
551 case 8: return Op_VecD; |
|
552 case 16: return Op_VecX; |
|
553 case 32: return Op_VecY; |
|
554 } |
|
555 ShouldNotReachHere(); |
|
556 return 0; |
|
557 } |
|
558 |
|
559 // x86 supports misaligned vectors store/load. |
|
560 const bool Matcher::misaligned_vectors_ok() { |
|
561 return !AlignVector; // can be changed by flag |
|
562 } |
|
563 |
|
564 // Helper methods for MachSpillCopyNode::implementation(). |
|
565 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, |
|
566 int src_hi, int dst_hi, uint ireg, outputStream* st) { |
|
567 // In 64-bit VM size calculation is very complex. Emitting instructions |
|
568 // into scratch buffer is used to get size in 64-bit VM. |
|
569 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) |
|
570 assert(ireg == Op_VecS || // 32bit vector |
|
571 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && |
|
572 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, |
|
573 "no non-adjacent vector moves" ); |
|
574 if (cbuf) { |
|
575 MacroAssembler _masm(cbuf); |
|
576 int offset = __ offset(); |
|
577 switch (ireg) { |
|
578 case Op_VecS: // copy whole register |
|
579 case Op_VecD: |
|
580 case Op_VecX: |
|
581 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); |
|
582 break; |
|
583 case Op_VecY: |
|
584 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); |
|
585 break; |
|
586 default: |
|
587 ShouldNotReachHere(); |
|
588 } |
|
589 int size = __ offset() - offset; |
|
590 #ifdef ASSERT |
|
591 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. |
|
592 assert(!do_size || size == 4, "incorrect size calculattion"); |
|
593 #endif |
|
594 return size; |
|
595 #ifndef PRODUCT |
|
596 } else if (!do_size) { |
|
597 switch (ireg) { |
|
598 case Op_VecS: |
|
599 case Op_VecD: |
|
600 case Op_VecX: |
|
601 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); |
|
602 break; |
|
603 case Op_VecY: |
|
604 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); |
|
605 break; |
|
606 default: |
|
607 ShouldNotReachHere(); |
|
608 } |
|
609 #endif |
|
610 } |
|
611 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. |
|
612 return 4; |
|
613 } |
|
614 |
|
615 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, |
|
616 int stack_offset, int reg, uint ireg, outputStream* st) { |
|
617 // In 64-bit VM size calculation is very complex. Emitting instructions |
|
618 // into scratch buffer is used to get size in 64-bit VM. |
|
619 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) |
|
620 if (cbuf) { |
|
621 MacroAssembler _masm(cbuf); |
|
622 int offset = __ offset(); |
|
623 if (is_load) { |
|
624 switch (ireg) { |
|
625 case Op_VecS: |
|
626 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); |
|
627 break; |
|
628 case Op_VecD: |
|
629 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); |
|
630 break; |
|
631 case Op_VecX: |
|
632 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); |
|
633 break; |
|
634 case Op_VecY: |
|
635 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); |
|
636 break; |
|
637 default: |
|
638 ShouldNotReachHere(); |
|
639 } |
|
640 } else { // store |
|
641 switch (ireg) { |
|
642 case Op_VecS: |
|
643 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); |
|
644 break; |
|
645 case Op_VecD: |
|
646 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); |
|
647 break; |
|
648 case Op_VecX: |
|
649 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); |
|
650 break; |
|
651 case Op_VecY: |
|
652 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); |
|
653 break; |
|
654 default: |
|
655 ShouldNotReachHere(); |
|
656 } |
|
657 } |
|
658 int size = __ offset() - offset; |
|
659 #ifdef ASSERT |
|
660 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4); |
|
661 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. |
|
662 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); |
|
663 #endif |
|
664 return size; |
|
665 #ifndef PRODUCT |
|
666 } else if (!do_size) { |
|
667 if (is_load) { |
|
668 switch (ireg) { |
|
669 case Op_VecS: |
|
670 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); |
|
671 break; |
|
672 case Op_VecD: |
|
673 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); |
|
674 break; |
|
675 case Op_VecX: |
|
676 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); |
|
677 break; |
|
678 case Op_VecY: |
|
679 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); |
|
680 break; |
|
681 default: |
|
682 ShouldNotReachHere(); |
|
683 } |
|
684 } else { // store |
|
685 switch (ireg) { |
|
686 case Op_VecS: |
|
687 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); |
|
688 break; |
|
689 case Op_VecD: |
|
690 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); |
|
691 break; |
|
692 case Op_VecX: |
|
693 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); |
|
694 break; |
|
695 case Op_VecY: |
|
696 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); |
|
697 break; |
|
698 default: |
|
699 ShouldNotReachHere(); |
|
700 } |
|
701 } |
|
702 #endif |
|
703 } |
|
704 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4); |
|
705 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. |
|
706 return 5+offset_size; |
|
707 } |
|
708 |
|
709 static inline jfloat replicate4_imm(int con, int width) { |
|
710 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. |
|
711 assert(width == 1 || width == 2, "only byte or short types here"); |
|
712 int bit_width = width * 8; |
|
713 jint val = con; |
|
714 val &= (1 << bit_width) - 1; // mask off sign bits |
|
715 while(bit_width < 32) { |
|
716 val |= (val << bit_width); |
|
717 bit_width <<= 1; |
|
718 } |
|
719 jfloat fval = *((jfloat*) &val); // coerce to float type |
|
720 return fval; |
|
721 } |
|
722 |
|
723 static inline jdouble replicate8_imm(int con, int width) { |
|
724 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. |
|
725 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); |
|
726 int bit_width = width * 8; |
|
727 jlong val = con; |
|
728 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits |
|
729 while(bit_width < 64) { |
|
730 val |= (val << bit_width); |
|
731 bit_width <<= 1; |
|
732 } |
|
733 jdouble dval = *((jdouble*) &val); // coerce to double type |
|
734 return dval; |
|
735 } |
|
736 |
41 #ifndef PRODUCT |
737 #ifndef PRODUCT |
42 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { |
738 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { |
43 st->print("nop \t# %d bytes pad for loops and calls", _count); |
739 st->print("nop \t# %d bytes pad for loops and calls", _count); |
44 } |
740 } |
45 #endif |
741 #endif |
850 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); |
1586 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); |
851 %} |
1587 %} |
852 ins_pipe(pipe_slow); |
1588 ins_pipe(pipe_slow); |
853 %} |
1589 %} |
854 |
1590 |
|
1591 |
|
1592 // ====================VECTOR INSTRUCTIONS===================================== |
|
1593 |
|
1594 // Load vectors (4 bytes long) |
|
1595 instruct loadV4(vecS dst, memory mem) %{ |
|
1596 predicate(n->as_LoadVector()->memory_size() == 4); |
|
1597 match(Set dst (LoadVector mem)); |
|
1598 ins_cost(125); |
|
1599 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} |
|
1600 ins_encode %{ |
|
1601 __ movdl($dst$$XMMRegister, $mem$$Address); |
|
1602 %} |
|
1603 ins_pipe( pipe_slow ); |
|
1604 %} |
|
1605 |
|
1606 // Load vectors (8 bytes long) |
|
1607 instruct loadV8(vecD dst, memory mem) %{ |
|
1608 predicate(n->as_LoadVector()->memory_size() == 8); |
|
1609 match(Set dst (LoadVector mem)); |
|
1610 ins_cost(125); |
|
1611 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} |
|
1612 ins_encode %{ |
|
1613 __ movq($dst$$XMMRegister, $mem$$Address); |
|
1614 %} |
|
1615 ins_pipe( pipe_slow ); |
|
1616 %} |
|
1617 |
|
1618 // Load vectors (16 bytes long) |
|
1619 instruct loadV16(vecX dst, memory mem) %{ |
|
1620 predicate(n->as_LoadVector()->memory_size() == 16); |
|
1621 match(Set dst (LoadVector mem)); |
|
1622 ins_cost(125); |
|
1623 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} |
|
1624 ins_encode %{ |
|
1625 __ movdqu($dst$$XMMRegister, $mem$$Address); |
|
1626 %} |
|
1627 ins_pipe( pipe_slow ); |
|
1628 %} |
|
1629 |
|
1630 // Load vectors (32 bytes long) |
|
1631 instruct loadV32(vecY dst, memory mem) %{ |
|
1632 predicate(n->as_LoadVector()->memory_size() == 32); |
|
1633 match(Set dst (LoadVector mem)); |
|
1634 ins_cost(125); |
|
1635 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} |
|
1636 ins_encode %{ |
|
1637 __ vmovdqu($dst$$XMMRegister, $mem$$Address); |
|
1638 %} |
|
1639 ins_pipe( pipe_slow ); |
|
1640 %} |
|
1641 |
|
1642 // Store vectors |
|
1643 instruct storeV4(memory mem, vecS src) %{ |
|
1644 predicate(n->as_StoreVector()->memory_size() == 4); |
|
1645 match(Set mem (StoreVector mem src)); |
|
1646 ins_cost(145); |
|
1647 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} |
|
1648 ins_encode %{ |
|
1649 __ movdl($mem$$Address, $src$$XMMRegister); |
|
1650 %} |
|
1651 ins_pipe( pipe_slow ); |
|
1652 %} |
|
1653 |
|
1654 instruct storeV8(memory mem, vecD src) %{ |
|
1655 predicate(n->as_StoreVector()->memory_size() == 8); |
|
1656 match(Set mem (StoreVector mem src)); |
|
1657 ins_cost(145); |
|
1658 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} |
|
1659 ins_encode %{ |
|
1660 __ movq($mem$$Address, $src$$XMMRegister); |
|
1661 %} |
|
1662 ins_pipe( pipe_slow ); |
|
1663 %} |
|
1664 |
|
1665 instruct storeV16(memory mem, vecX src) %{ |
|
1666 predicate(n->as_StoreVector()->memory_size() == 16); |
|
1667 match(Set mem (StoreVector mem src)); |
|
1668 ins_cost(145); |
|
1669 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} |
|
1670 ins_encode %{ |
|
1671 __ movdqu($mem$$Address, $src$$XMMRegister); |
|
1672 %} |
|
1673 ins_pipe( pipe_slow ); |
|
1674 %} |
|
1675 |
|
1676 instruct storeV32(memory mem, vecY src) %{ |
|
1677 predicate(n->as_StoreVector()->memory_size() == 32); |
|
1678 match(Set mem (StoreVector mem src)); |
|
1679 ins_cost(145); |
|
1680 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} |
|
1681 ins_encode %{ |
|
1682 __ vmovdqu($mem$$Address, $src$$XMMRegister); |
|
1683 %} |
|
1684 ins_pipe( pipe_slow ); |
|
1685 %} |
|
1686 |
|
1687 // Replicate byte scalar to be vector |
|
1688 instruct Repl4B(vecS dst, rRegI src) %{ |
|
1689 predicate(n->as_Vector()->length() == 4); |
|
1690 match(Set dst (ReplicateB src)); |
|
1691 format %{ "movd $dst,$src\n\t" |
|
1692 "punpcklbw $dst,$dst\n\t" |
|
1693 "pshuflw $dst,$dst,0x00\t! replicate4B" %} |
|
1694 ins_encode %{ |
|
1695 __ movdl($dst$$XMMRegister, $src$$Register); |
|
1696 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); |
|
1697 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
|
1698 %} |
|
1699 ins_pipe( pipe_slow ); |
|
1700 %} |
|
1701 |
|
1702 instruct Repl8B(vecD dst, rRegI src) %{ |
|
1703 predicate(n->as_Vector()->length() == 8); |
|
1704 match(Set dst (ReplicateB src)); |
|
1705 format %{ "movd $dst,$src\n\t" |
|
1706 "punpcklbw $dst,$dst\n\t" |
|
1707 "pshuflw $dst,$dst,0x00\t! replicate8B" %} |
|
1708 ins_encode %{ |
|
1709 __ movdl($dst$$XMMRegister, $src$$Register); |
|
1710 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); |
|
1711 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
|
1712 %} |
|
1713 ins_pipe( pipe_slow ); |
|
1714 %} |
|
1715 |
|
1716 instruct Repl16B(vecX dst, rRegI src) %{ |
|
1717 predicate(n->as_Vector()->length() == 16); |
|
1718 match(Set dst (ReplicateB src)); |
|
1719 format %{ "movd $dst,$src\n\t" |
|
1720 "punpcklbw $dst,$dst\n\t" |
|
1721 "pshuflw $dst,$dst,0x00\n\t" |
|
1722 "movlhps $dst,$dst\t! replicate16B" %} |
|
1723 ins_encode %{ |
|
1724 __ movdl($dst$$XMMRegister, $src$$Register); |
|
1725 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); |
|
1726 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
|
1727 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); |
|
1728 %} |
|
1729 ins_pipe( pipe_slow ); |
|
1730 %} |
|
1731 |
|
1732 instruct Repl32B(vecY dst, rRegI src) %{ |
|
1733 predicate(n->as_Vector()->length() == 32); |
|
1734 match(Set dst (ReplicateB src)); |
|
1735 format %{ "movd $dst,$src\n\t" |
|
1736 "punpcklbw $dst,$dst\n\t" |
|
1737 "pshuflw $dst,$dst,0x00\n\t" |
|
1738 "movlhps $dst,$dst\n\t" |
|
1739 "vinsertf128h $dst,$dst,$dst\t! replicate32B" %} |
|
1740 ins_encode %{ |
|
1741 __ movdl($dst$$XMMRegister, $src$$Register); |
|
1742 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); |
|
1743 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
|
1744 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); |
|
1745 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); |
|
1746 %} |
|
1747 ins_pipe( pipe_slow ); |
|
1748 %} |
|
1749 |
|
1750 // Replicate byte scalar immediate to be vector by loading from const table. |
|
1751 instruct Repl4B_imm(vecS dst, immI con) %{ |
|
1752 predicate(n->as_Vector()->length() == 4); |
|
1753 match(Set dst (ReplicateB con)); |
|
1754 format %{ "movss $dst,[$constantaddress]\t! replicate4B($con)" %} |
|
1755 ins_encode %{ |
|
1756 __ movflt($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); |
|
1757 %} |
|
1758 ins_pipe( pipe_slow ); |
|
1759 %} |
|
1760 |
|
1761 instruct Repl8B_imm(vecD dst, immI con) %{ |
|
1762 predicate(n->as_Vector()->length() == 8); |
|
1763 match(Set dst (ReplicateB con)); |
|
1764 format %{ "movsd $dst,[$constantaddress]\t! replicate8B($con)" %} |
|
1765 ins_encode %{ |
|
1766 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); |
|
1767 %} |
|
1768 ins_pipe( pipe_slow ); |
|
1769 %} |
|
1770 |
|
1771 instruct Repl16B_imm(vecX dst, immI con) %{ |
|
1772 predicate(n->as_Vector()->length() == 16); |
|
1773 match(Set dst (ReplicateB con)); |
|
1774 format %{ "movsd $dst,[$constantaddress]\t! replicate16B($con)\n\t" |
|
1775 "movlhps $dst,$dst" %} |
|
1776 ins_encode %{ |
|
1777 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); |
|
1778 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); |
|
1779 %} |
|
1780 ins_pipe( pipe_slow ); |
|
1781 %} |
|
1782 |
|
1783 instruct Repl32B_imm(vecY dst, immI con) %{ |
|
1784 predicate(n->as_Vector()->length() == 32); |
|
1785 match(Set dst (ReplicateB con)); |
|
1786 format %{ "movsd $dst,[$constantaddress]\t! lreplicate32B($con)\n\t" |
|
1787 "movlhps $dst,$dst\n\t" |
|
1788 "vinsertf128h $dst,$dst,$dst" %} |
|
1789 ins_encode %{ |
|
1790 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); |
|
1791 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); |
|
1792 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); |
|
1793 %} |
|
1794 ins_pipe( pipe_slow ); |
|
1795 %} |
|
1796 |
|
1797 // Replicate byte scalar zero to be vector |
|
1798 instruct Repl4B_zero(vecS dst, immI0 zero) %{ |
|
1799 predicate(n->as_Vector()->length() == 4); |
|
1800 match(Set dst (ReplicateB zero)); |
|
1801 format %{ "pxor $dst,$dst\t! replicate4B zero" %} |
|
1802 ins_encode %{ |
|
1803 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); |
|
1804 %} |
|
1805 ins_pipe( fpu_reg_reg ); |
|
1806 %} |
|
1807 |
|
1808 instruct Repl8B_zero(vecD dst, immI0 zero) %{ |
|
1809 predicate(n->as_Vector()->length() == 8); |
|
1810 match(Set dst (ReplicateB zero)); |
|
1811 format %{ "pxor $dst,$dst\t! replicate8B zero" %} |
|
1812 ins_encode %{ |
|
1813 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); |
|
1814 %} |
|
1815 ins_pipe( fpu_reg_reg ); |
|
1816 %} |
|
1817 |
|
1818 instruct Repl16B_zero(vecX dst, immI0 zero) %{ |
|
1819 predicate(n->as_Vector()->length() == 16); |
|
1820 match(Set dst (ReplicateB zero)); |
|
1821 format %{ "pxor $dst,$dst\t! replicate16B zero" %} |
|
1822 ins_encode %{ |
|
1823 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); |
|
1824 %} |
|
1825 ins_pipe( fpu_reg_reg ); |
|
1826 %} |
|
1827 |
|
1828 instruct Repl32B_zero(vecY dst, immI0 zero) %{ |
|
1829 predicate(n->as_Vector()->length() == 32); |
|
1830 match(Set dst (ReplicateB zero)); |
|
1831 format %{ "vxorpd $dst,$dst,$dst\t! replicate32B zero" %} |
|
1832 ins_encode %{ |
|
1833 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). |
|
1834 bool vector256 = true; |
|
1835 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); |
|
1836 %} |
|
1837 ins_pipe( fpu_reg_reg ); |
|
1838 %} |
|
1839 |
|
1840 // Replicate char/short (2 byte) scalar to be vector |
|
1841 instruct Repl2S(vecS dst, rRegI src) %{ |
|
1842 predicate(n->as_Vector()->length() == 2); |
|
1843 match(Set dst (ReplicateS src)); |
|
1844 format %{ "movd $dst,$src\n\t" |
|
1845 "pshuflw $dst,$dst,0x00\t! replicate2S" %} |
|
1846 ins_encode %{ |
|
1847 __ movdl($dst$$XMMRegister, $src$$Register); |
|
1848 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
|
1849 %} |
|
1850 ins_pipe( fpu_reg_reg ); |
|
1851 %} |
|
1852 |
|
1853 instruct Repl4S(vecD dst, rRegI src) %{ |
|
1854 predicate(n->as_Vector()->length() == 4); |
|
1855 match(Set dst (ReplicateS src)); |
|
1856 format %{ "movd $dst,$src\n\t" |
|
1857 "pshuflw $dst,$dst,0x00\t! replicate4S" %} |
|
1858 ins_encode %{ |
|
1859 __ movdl($dst$$XMMRegister, $src$$Register); |
|
1860 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
|
1861 %} |
|
1862 ins_pipe( fpu_reg_reg ); |
|
1863 %} |
|
1864 |
|
1865 instruct Repl8S(vecX dst, rRegI src) %{ |
|
1866 predicate(n->as_Vector()->length() == 8); |
|
1867 match(Set dst (ReplicateS src)); |
|
1868 format %{ "movd $dst,$src\n\t" |
|
1869 "pshuflw $dst,$dst,0x00\n\t" |
|
1870 "movlhps $dst,$dst\t! replicate8S" %} |
|
1871 ins_encode %{ |
|
1872 __ movdl($dst$$XMMRegister, $src$$Register); |
|
1873 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
|
1874 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); |
|
1875 %} |
|
1876 ins_pipe( pipe_slow ); |
|
1877 %} |
|
1878 |
|
1879 instruct Repl16S(vecY dst, rRegI src) %{ |
|
1880 predicate(n->as_Vector()->length() == 16); |
|
1881 match(Set dst (ReplicateS src)); |
|
1882 format %{ "movd $dst,$src\n\t" |
|
1883 "pshuflw $dst,$dst,0x00\n\t" |
|
1884 "movlhps $dst,$dst\n\t" |
|
1885 "vinsertf128h $dst,$dst,$dst\t! replicate16S" %} |
|
1886 ins_encode %{ |
|
1887 __ movdl($dst$$XMMRegister, $src$$Register); |
|
1888 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
|
1889 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); |
|
1890 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); |
|
1891 %} |
|
1892 ins_pipe( pipe_slow ); |
|
1893 %} |
|
1894 |
|
1895 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. |
|
1896 instruct Repl2S_imm(vecS dst, immI con) %{ |
|
1897 predicate(n->as_Vector()->length() == 2); |
|
1898 match(Set dst (ReplicateS con)); |
|
1899 format %{ "movss $dst,[$constantaddress]\t! replicate2S($con)" %} |
|
1900 ins_encode %{ |
|
1901 __ movflt($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); |
|
1902 %} |
|
1903 ins_pipe( fpu_reg_reg ); |
|
1904 %} |
|
1905 |
|
1906 instruct Repl4S_imm(vecD dst, immI con) %{ |
|
1907 predicate(n->as_Vector()->length() == 4); |
|
1908 match(Set dst (ReplicateS con)); |
|
1909 format %{ "movsd $dst,[$constantaddress]\t! replicate4S($con)" %} |
|
1910 ins_encode %{ |
|
1911 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); |
|
1912 %} |
|
1913 ins_pipe( fpu_reg_reg ); |
|
1914 %} |
|
1915 |
|
1916 instruct Repl8S_imm(vecX dst, immI con) %{ |
|
1917 predicate(n->as_Vector()->length() == 8); |
|
1918 match(Set dst (ReplicateS con)); |
|
1919 format %{ "movsd $dst,[$constantaddress]\t! replicate8S($con)\n\t" |
|
1920 "movlhps $dst,$dst" %} |
|
1921 ins_encode %{ |
|
1922 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); |
|
1923 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); |
|
1924 %} |
|
1925 ins_pipe( pipe_slow ); |
|
1926 %} |
|
1927 |
|
1928 instruct Repl16S_imm(vecY dst, immI con) %{ |
|
1929 predicate(n->as_Vector()->length() == 16); |
|
1930 match(Set dst (ReplicateS con)); |
|
1931 format %{ "movsd $dst,[$constantaddress]\t! replicate16S($con)\n\t" |
|
1932 "movlhps $dst,$dst\n\t" |
|
1933 "vinsertf128h $dst,$dst,$dst" %} |
|
1934 ins_encode %{ |
|
1935 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); |
|
1936 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); |
|
1937 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); |
|
1938 %} |
|
1939 ins_pipe( pipe_slow ); |
|
1940 %} |
|
1941 |
|
1942 // Replicate char/short (2 byte) scalar zero to be vector |
|
1943 instruct Repl2S_zero(vecS dst, immI0 zero) %{ |
|
1944 predicate(n->as_Vector()->length() == 2); |
|
1945 match(Set dst (ReplicateS zero)); |
|
1946 format %{ "pxor $dst,$dst\t! replicate2S zero" %} |
|
1947 ins_encode %{ |
|
1948 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); |
|
1949 %} |
|
1950 ins_pipe( fpu_reg_reg ); |
|
1951 %} |
|
1952 |
|
1953 instruct Repl4S_zero(vecD dst, immI0 zero) %{ |
|
1954 predicate(n->as_Vector()->length() == 4); |
|
1955 match(Set dst (ReplicateS zero)); |
|
1956 format %{ "pxor $dst,$dst\t! replicate4S zero" %} |
|
1957 ins_encode %{ |
|
1958 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); |
|
1959 %} |
|
1960 ins_pipe( fpu_reg_reg ); |
|
1961 %} |
|
1962 |
|
1963 instruct Repl8S_zero(vecX dst, immI0 zero) %{ |
|
1964 predicate(n->as_Vector()->length() == 8); |
|
1965 match(Set dst (ReplicateS zero)); |
|
1966 format %{ "pxor $dst,$dst\t! replicate8S zero" %} |
|
1967 ins_encode %{ |
|
1968 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); |
|
1969 %} |
|
1970 ins_pipe( fpu_reg_reg ); |
|
1971 %} |
|
1972 |
|
1973 instruct Repl16S_zero(vecY dst, immI0 zero) %{ |
|
1974 predicate(n->as_Vector()->length() == 16); |
|
1975 match(Set dst (ReplicateS zero)); |
|
1976 format %{ "vxorpd $dst,$dst,$dst\t! replicate16S zero" %} |
|
1977 ins_encode %{ |
|
1978 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). |
|
1979 bool vector256 = true; |
|
1980 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); |
|
1981 %} |
|
1982 ins_pipe( fpu_reg_reg ); |
|
1983 %} |
|
1984 |
|
1985 // Replicate integer (4 byte) scalar to be vector |
|
1986 instruct Repl2I(vecD dst, rRegI src) %{ |
|
1987 predicate(n->as_Vector()->length() == 2); |
|
1988 match(Set dst (ReplicateI src)); |
|
1989 format %{ "movd $dst,$src\n\t" |
|
1990 "pshufd $dst,$dst,0x00\t! replicate2I" %} |
|
1991 ins_encode %{ |
|
1992 __ movdl($dst$$XMMRegister, $src$$Register); |
|
1993 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
|
1994 %} |
|
1995 ins_pipe( fpu_reg_reg ); |
|
1996 %} |
|
1997 |
|
1998 instruct Repl4I(vecX dst, rRegI src) %{ |
|
1999 predicate(n->as_Vector()->length() == 4); |
|
2000 match(Set dst (ReplicateI src)); |
|
2001 format %{ "movd $dst,$src\n\t" |
|
2002 "pshufd $dst,$dst,0x00\t! replicate4I" %} |
|
2003 ins_encode %{ |
|
2004 __ movdl($dst$$XMMRegister, $src$$Register); |
|
2005 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
|
2006 %} |
|
2007 ins_pipe( pipe_slow ); |
|
2008 %} |
|
2009 |
|
2010 instruct Repl8I(vecY dst, rRegI src) %{ |
|
2011 predicate(n->as_Vector()->length() == 8); |
|
2012 match(Set dst (ReplicateI src)); |
|
2013 format %{ "movd $dst,$src\n\t" |
|
2014 "pshufd $dst,$dst,0x00\n\t" |
|
2015 "vinsertf128h $dst,$dst,$dst\t! replicate8I" %} |
|
2016 ins_encode %{ |
|
2017 __ movdl($dst$$XMMRegister, $src$$Register); |
|
2018 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
|
2019 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); |
|
2020 %} |
|
2021 ins_pipe( pipe_slow ); |
|
2022 %} |
|
2023 |
|
2024 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. |
|
2025 instruct Repl2I_imm(vecD dst, immI con) %{ |
|
2026 predicate(n->as_Vector()->length() == 2); |
|
2027 match(Set dst (ReplicateI con)); |
|
2028 format %{ "movsd $dst,[$constantaddress]\t! replicate2I($con)" %} |
|
2029 ins_encode %{ |
|
2030 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); |
|
2031 %} |
|
2032 ins_pipe( fpu_reg_reg ); |
|
2033 %} |
|
2034 |
|
2035 instruct Repl4I_imm(vecX dst, immI con) %{ |
|
2036 predicate(n->as_Vector()->length() == 4); |
|
2037 match(Set dst (ReplicateI con)); |
|
2038 format %{ "movsd $dst,[$constantaddress]\t! replicate4I($con)\n\t" |
|
2039 "movlhps $dst,$dst" %} |
|
2040 ins_encode %{ |
|
2041 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); |
|
2042 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); |
|
2043 %} |
|
2044 ins_pipe( pipe_slow ); |
|
2045 %} |
|
2046 |
|
2047 instruct Repl8I_imm(vecY dst, immI con) %{ |
|
2048 predicate(n->as_Vector()->length() == 8); |
|
2049 match(Set dst (ReplicateI con)); |
|
2050 format %{ "movsd $dst,[$constantaddress]\t! replicate8I($con)\n\t" |
|
2051 "movlhps $dst,$dst\n\t" |
|
2052 "vinsertf128h $dst,$dst,$dst" %} |
|
2053 ins_encode %{ |
|
2054 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); |
|
2055 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); |
|
2056 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); |
|
2057 %} |
|
2058 ins_pipe( pipe_slow ); |
|
2059 %} |
|
2060 |
|
2061 // Integer could be loaded into xmm register directly from memory. |
|
2062 instruct Repl2I_mem(vecD dst, memory mem) %{ |
|
2063 predicate(n->as_Vector()->length() == 2); |
|
2064 match(Set dst (ReplicateI mem)); |
|
2065 format %{ "movd $dst,$mem\n\t" |
|
2066 "pshufd $dst,$dst,0x00\t! replicate2I" %} |
|
2067 ins_encode %{ |
|
2068 __ movdl($dst$$XMMRegister, $mem$$Address); |
|
2069 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
|
2070 %} |
|
2071 ins_pipe( fpu_reg_reg ); |
|
2072 %} |
|
2073 |
|
2074 instruct Repl4I_mem(vecX dst, memory mem) %{ |
|
2075 predicate(n->as_Vector()->length() == 4); |
|
2076 match(Set dst (ReplicateI mem)); |
|
2077 format %{ "movd $dst,$mem\n\t" |
|
2078 "pshufd $dst,$dst,0x00\t! replicate4I" %} |
|
2079 ins_encode %{ |
|
2080 __ movdl($dst$$XMMRegister, $mem$$Address); |
|
2081 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
|
2082 %} |
|
2083 ins_pipe( pipe_slow ); |
|
2084 %} |
|
2085 |
|
2086 instruct Repl8I_mem(vecY dst, memory mem) %{ |
|
2087 predicate(n->as_Vector()->length() == 8); |
|
2088 match(Set dst (ReplicateI mem)); |
|
2089 format %{ "movd $dst,$mem\n\t" |
|
2090 "pshufd $dst,$dst,0x00\n\t" |
|
2091 "vinsertf128h $dst,$dst,$dst\t! replicate8I" %} |
|
2092 ins_encode %{ |
|
2093 __ movdl($dst$$XMMRegister, $mem$$Address); |
|
2094 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
|
2095 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); |
|
2096 %} |
|
2097 ins_pipe( pipe_slow ); |
|
2098 %} |
|
2099 |
|
2100 // Replicate integer (4 byte) scalar zero to be vector |
|
2101 instruct Repl2I_zero(vecD dst, immI0 zero) %{ |
|
2102 predicate(n->as_Vector()->length() == 2); |
|
2103 match(Set dst (ReplicateI zero)); |
|
2104 format %{ "pxor $dst,$dst\t! replicate2I" %} |
|
2105 ins_encode %{ |
|
2106 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); |
|
2107 %} |
|
2108 ins_pipe( fpu_reg_reg ); |
|
2109 %} |
|
2110 |
|
2111 instruct Repl4I_zero(vecX dst, immI0 zero) %{ |
|
2112 predicate(n->as_Vector()->length() == 4); |
|
2113 match(Set dst (ReplicateI zero)); |
|
2114 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} |
|
2115 ins_encode %{ |
|
2116 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); |
|
2117 %} |
|
2118 ins_pipe( fpu_reg_reg ); |
|
2119 %} |
|
2120 |
|
2121 instruct Repl8I_zero(vecY dst, immI0 zero) %{ |
|
2122 predicate(n->as_Vector()->length() == 8); |
|
2123 match(Set dst (ReplicateI zero)); |
|
2124 format %{ "vxorpd $dst,$dst,$dst\t! replicate8I zero" %} |
|
2125 ins_encode %{ |
|
2126 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). |
|
2127 bool vector256 = true; |
|
2128 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); |
|
2129 %} |
|
2130 ins_pipe( fpu_reg_reg ); |
|
2131 %} |
|
2132 |
|
2133 // Replicate long (8 byte) scalar to be vector |
|
2134 #ifdef _LP64 |
|
2135 instruct Repl2L(vecX dst, rRegL src) %{ |
|
2136 predicate(n->as_Vector()->length() == 2); |
|
2137 match(Set dst (ReplicateL src)); |
|
2138 format %{ "movdq $dst,$src\n\t" |
|
2139 "movlhps $dst,$dst\t! replicate2L" %} |
|
2140 ins_encode %{ |
|
2141 __ movdq($dst$$XMMRegister, $src$$Register); |
|
2142 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); |
|
2143 %} |
|
2144 ins_pipe( pipe_slow ); |
|
2145 %} |
|
2146 |
|
2147 instruct Repl4L(vecY dst, rRegL src) %{ |
|
2148 predicate(n->as_Vector()->length() == 4); |
|
2149 match(Set dst (ReplicateL src)); |
|
2150 format %{ "movdq $dst,$src\n\t" |
|
2151 "movlhps $dst,$dst\n\t" |
|
2152 "vinsertf128h $dst,$dst,$dst\t! replicate4L" %} |
|
2153 ins_encode %{ |
|
2154 __ movdq($dst$$XMMRegister, $src$$Register); |
|
2155 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); |
|
2156 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); |
|
2157 %} |
|
2158 ins_pipe( pipe_slow ); |
|
2159 %} |
|
2160 #else // _LP64 |
|
2161 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ |
|
2162 predicate(n->as_Vector()->length() == 2); |
|
2163 match(Set dst (ReplicateL src)); |
|
2164 effect(TEMP dst, USE src, TEMP tmp); |
|
2165 format %{ "movdl $dst,$src.lo\n\t" |
|
2166 "movdl $tmp,$src.hi\n\t" |
|
2167 "punpckldq $dst,$tmp\n\t" |
|
2168 "movlhps $dst,$dst\t! replicate2L"%} |
|
2169 ins_encode %{ |
|
2170 __ movdl($dst$$XMMRegister, $src$$Register); |
|
2171 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); |
|
2172 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); |
|
2173 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); |
|
2174 %} |
|
2175 ins_pipe( pipe_slow ); |
|
2176 %} |
|
2177 |
|
2178 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ |
|
2179 predicate(n->as_Vector()->length() == 4); |
|
2180 match(Set dst (ReplicateL src)); |
|
2181 effect(TEMP dst, USE src, TEMP tmp); |
|
2182 format %{ "movdl $dst,$src.lo\n\t" |
|
2183 "movdl $tmp,$src.hi\n\t" |
|
2184 "punpckldq $dst,$tmp\n\t" |
|
2185 "movlhps $dst,$dst\n\t" |
|
2186 "vinsertf128h $dst,$dst,$dst\t! replicate4L" %} |
|
2187 ins_encode %{ |
|
2188 __ movdl($dst$$XMMRegister, $src$$Register); |
|
2189 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); |
|
2190 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); |
|
2191 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); |
|
2192 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); |
|
2193 %} |
|
2194 ins_pipe( pipe_slow ); |
|
2195 %} |
|
2196 #endif // _LP64 |
|
2197 |
|
2198 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. |
|
2199 instruct Repl2L_imm(vecX dst, immL con) %{ |
|
2200 predicate(n->as_Vector()->length() == 2); |
|
2201 match(Set dst (ReplicateL con)); |
|
2202 format %{ "movsd $dst,[$constantaddress]\t! replicate2L($con)\n\t" |
|
2203 "movlhps $dst,$dst" %} |
|
2204 ins_encode %{ |
|
2205 __ movdbl($dst$$XMMRegister, $constantaddress($con)); |
|
2206 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); |
|
2207 %} |
|
2208 ins_pipe( pipe_slow ); |
|
2209 %} |
|
2210 |
|
2211 instruct Repl4L_imm(vecY dst, immL con) %{ |
|
2212 predicate(n->as_Vector()->length() == 4); |
|
2213 match(Set dst (ReplicateL con)); |
|
2214 format %{ "movsd $dst,[$constantaddress]\t! replicate4L($con)\n\t" |
|
2215 "movlhps $dst,$dst\n\t" |
|
2216 "vinsertf128h $dst,$dst,$dst" %} |
|
2217 ins_encode %{ |
|
2218 __ movdbl($dst$$XMMRegister, $constantaddress($con)); |
|
2219 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); |
|
2220 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); |
|
2221 %} |
|
2222 ins_pipe( pipe_slow ); |
|
2223 %} |
|
2224 |
|
2225 // Long could be loaded into xmm register directly from memory. |
|
2226 instruct Repl2L_mem(vecX dst, memory mem) %{ |
|
2227 predicate(n->as_Vector()->length() == 2); |
|
2228 match(Set dst (ReplicateL mem)); |
|
2229 format %{ "movq $dst,$mem\n\t" |
|
2230 "movlhps $dst,$dst\t! replicate2L" %} |
|
2231 ins_encode %{ |
|
2232 __ movq($dst$$XMMRegister, $mem$$Address); |
|
2233 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); |
|
2234 %} |
|
2235 ins_pipe( pipe_slow ); |
|
2236 %} |
|
2237 |
|
2238 instruct Repl4L_mem(vecY dst, memory mem) %{ |
|
2239 predicate(n->as_Vector()->length() == 4); |
|
2240 match(Set dst (ReplicateL mem)); |
|
2241 format %{ "movq $dst,$mem\n\t" |
|
2242 "movlhps $dst,$dst\n\t" |
|
2243 "vinsertf128h $dst,$dst,$dst\t! replicate4L" %} |
|
2244 ins_encode %{ |
|
2245 __ movq($dst$$XMMRegister, $mem$$Address); |
|
2246 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); |
|
2247 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); |
|
2248 %} |
|
2249 ins_pipe( pipe_slow ); |
|
2250 %} |
|
2251 |
|
2252 // Replicate long (8 byte) scalar zero to be vector |
|
2253 instruct Repl2L_zero(vecX dst, immL0 zero) %{ |
|
2254 predicate(n->as_Vector()->length() == 2); |
|
2255 match(Set dst (ReplicateL zero)); |
|
2256 format %{ "pxor $dst,$dst\t! replicate2L zero" %} |
|
2257 ins_encode %{ |
|
2258 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); |
|
2259 %} |
|
2260 ins_pipe( fpu_reg_reg ); |
|
2261 %} |
|
2262 |
|
2263 instruct Repl4L_zero(vecY dst, immL0 zero) %{ |
|
2264 predicate(n->as_Vector()->length() == 4); |
|
2265 match(Set dst (ReplicateL zero)); |
|
2266 format %{ "vxorpd $dst,$dst,$dst\t! replicate4L zero" %} |
|
2267 ins_encode %{ |
|
2268 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). |
|
2269 bool vector256 = true; |
|
2270 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); |
|
2271 %} |
|
2272 ins_pipe( fpu_reg_reg ); |
|
2273 %} |
|
2274 |
|
2275 // Replicate float (4 byte) scalar to be vector |
|
2276 instruct Repl2F(vecD dst, regF src) %{ |
|
2277 predicate(n->as_Vector()->length() == 2); |
|
2278 match(Set dst (ReplicateF src)); |
|
2279 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} |
|
2280 ins_encode %{ |
|
2281 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); |
|
2282 %} |
|
2283 ins_pipe( fpu_reg_reg ); |
|
2284 %} |
|
2285 |
|
2286 instruct Repl4F(vecX dst, regF src) %{ |
|
2287 predicate(n->as_Vector()->length() == 4); |
|
2288 match(Set dst (ReplicateF src)); |
|
2289 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} |
|
2290 ins_encode %{ |
|
2291 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); |
|
2292 %} |
|
2293 ins_pipe( pipe_slow ); |
|
2294 %} |
|
2295 |
|
2296 instruct Repl8F(vecY dst, regF src) %{ |
|
2297 predicate(n->as_Vector()->length() == 8); |
|
2298 match(Set dst (ReplicateF src)); |
|
2299 format %{ "pshufd $dst,$src,0x00\n\t" |
|
2300 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} |
|
2301 ins_encode %{ |
|
2302 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); |
|
2303 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); |
|
2304 %} |
|
2305 ins_pipe( pipe_slow ); |
|
2306 %} |
|
2307 |
|
2308 // Replicate float (4 byte) scalar zero to be vector |
|
2309 instruct Repl2F_zero(vecD dst, immF0 zero) %{ |
|
2310 predicate(n->as_Vector()->length() == 2); |
|
2311 match(Set dst (ReplicateF zero)); |
|
2312 format %{ "xorps $dst,$dst\t! replicate2F zero" %} |
|
2313 ins_encode %{ |
|
2314 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); |
|
2315 %} |
|
2316 ins_pipe( fpu_reg_reg ); |
|
2317 %} |
|
2318 |
|
2319 instruct Repl4F_zero(vecX dst, immF0 zero) %{ |
|
2320 predicate(n->as_Vector()->length() == 4); |
|
2321 match(Set dst (ReplicateF zero)); |
|
2322 format %{ "xorps $dst,$dst\t! replicate4F zero" %} |
|
2323 ins_encode %{ |
|
2324 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); |
|
2325 %} |
|
2326 ins_pipe( fpu_reg_reg ); |
|
2327 %} |
|
2328 |
|
2329 instruct Repl8F_zero(vecY dst, immF0 zero) %{ |
|
2330 predicate(n->as_Vector()->length() == 8); |
|
2331 match(Set dst (ReplicateF zero)); |
|
2332 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} |
|
2333 ins_encode %{ |
|
2334 bool vector256 = true; |
|
2335 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); |
|
2336 %} |
|
2337 ins_pipe( fpu_reg_reg ); |
|
2338 %} |
|
2339 |
|
2340 // Replicate double (8 bytes) scalar to be vector |
|
2341 instruct Repl2D(vecX dst, regD src) %{ |
|
2342 predicate(n->as_Vector()->length() == 2); |
|
2343 match(Set dst (ReplicateD src)); |
|
2344 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} |
|
2345 ins_encode %{ |
|
2346 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); |
|
2347 %} |
|
2348 ins_pipe( pipe_slow ); |
|
2349 %} |
|
2350 |
|
2351 instruct Repl4D(vecY dst, regD src) %{ |
|
2352 predicate(n->as_Vector()->length() == 4); |
|
2353 match(Set dst (ReplicateD src)); |
|
2354 format %{ "pshufd $dst,$src,0x44\n\t" |
|
2355 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} |
|
2356 ins_encode %{ |
|
2357 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); |
|
2358 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); |
|
2359 %} |
|
2360 ins_pipe( pipe_slow ); |
|
2361 %} |
|
2362 |
|
2363 // Replicate double (8 byte) scalar zero to be vector |
|
2364 instruct Repl2D_zero(vecX dst, immD0 zero) %{ |
|
2365 predicate(n->as_Vector()->length() == 2); |
|
2366 match(Set dst (ReplicateD zero)); |
|
2367 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} |
|
2368 ins_encode %{ |
|
2369 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); |
|
2370 %} |
|
2371 ins_pipe( fpu_reg_reg ); |
|
2372 %} |
|
2373 |
|
2374 instruct Repl4D_zero(vecY dst, immD0 zero) %{ |
|
2375 predicate(n->as_Vector()->length() == 4); |
|
2376 match(Set dst (ReplicateD zero)); |
|
2377 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} |
|
2378 ins_encode %{ |
|
2379 bool vector256 = true; |
|
2380 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); |
|
2381 %} |
|
2382 ins_pipe( fpu_reg_reg ); |
|
2383 %} |
|
2384 |