1.1 --- a/src/cpu/x86/vm/x86.ad Wed Aug 15 16:49:38 2012 -0700 1.2 +++ b/src/cpu/x86/vm/x86.ad Mon Aug 20 09:07:21 2012 -0700 1.3 @@ -500,6 +500,24 @@ 1.4 0 /*bottom*/ 1.5 }; 1.6 1.7 +const bool Matcher::match_rule_supported(int opcode) { 1.8 + if (!has_match_rule(opcode)) 1.9 + return false; 1.10 + 1.11 + switch (opcode) { 1.12 + case Op_PopCountI: 1.13 + case Op_PopCountL: 1.14 + if (!UsePopCountInstruction) 1.15 + return false; 1.16 + case Op_MulVI: 1.17 + if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1.18 + return false; 1.19 + break; 1.20 + } 1.21 + 1.22 + return true; // Per default match rules are supported. 1.23 +} 1.24 + 1.25 // Max vector size in bytes. 0 if not supported. 1.26 const int Matcher::vector_width_in_bytes(BasicType bt) { 1.27 assert(is_java_primitive(bt), "only primitive type vectors"); 1.28 @@ -1439,8 +1457,9 @@ 1.29 ins_cost(150); 1.30 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 1.31 ins_encode %{ 1.32 + bool vector256 = false; 1.33 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 1.34 - ExternalAddress(float_signmask())); 1.35 + ExternalAddress(float_signmask()), vector256); 1.36 %} 1.37 ins_pipe(pipe_slow); 1.38 %} 1.39 @@ -1464,8 +1483,9 @@ 1.40 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 1.41 "# abs double by sign masking" %} 1.42 ins_encode %{ 1.43 + bool vector256 = false; 1.44 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 1.45 - ExternalAddress(double_signmask())); 1.46 + ExternalAddress(double_signmask()), vector256); 1.47 %} 1.48 ins_pipe(pipe_slow); 1.49 %} 1.50 @@ -1487,8 +1507,9 @@ 1.51 ins_cost(150); 1.52 format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 1.53 ins_encode %{ 1.54 + bool vector256 = false; 1.55 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, 1.56 - ExternalAddress(float_signflip())); 1.57 + ExternalAddress(float_signflip()), vector256); 1.58 %} 1.59 ins_pipe(pipe_slow); 1.60 %} 1.61 @@ -1512,8 +1533,9 @@ 1.62 format %{ "vxorpd $dst, $src, [0x8000000000000000]\t" 1.63 "# neg double by sign flipping" %} 1.64 ins_encode %{ 1.65 + bool vector256 = false; 1.66 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, 1.67 - ExternalAddress(double_signflip())); 1.68 + ExternalAddress(double_signflip()), vector256); 1.69 %} 1.70 ins_pipe(pipe_slow); 1.71 %} 1.72 @@ -2382,3 +2404,2416 @@ 1.73 ins_pipe( fpu_reg_reg ); 1.74 %} 1.75 1.76 +// ====================VECTOR ARITHMETIC======================================= 1.77 + 1.78 +// --------------------------------- ADD -------------------------------------- 1.79 + 1.80 +// Bytes vector add 1.81 +instruct vadd4B(vecS dst, vecS src) %{ 1.82 + predicate(n->as_Vector()->length() == 4); 1.83 + match(Set dst (AddVB dst src)); 1.84 + format %{ "paddb $dst,$src\t! add packed4B" %} 1.85 + ins_encode %{ 1.86 + __ paddb($dst$$XMMRegister, $src$$XMMRegister); 1.87 + %} 1.88 + ins_pipe( pipe_slow ); 1.89 +%} 1.90 + 1.91 +instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ 1.92 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.93 + match(Set dst (AddVB src1 src2)); 1.94 + format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 1.95 + ins_encode %{ 1.96 + bool vector256 = false; 1.97 + __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.98 + %} 1.99 + ins_pipe( pipe_slow ); 1.100 +%} 1.101 + 1.102 +instruct vadd8B(vecD dst, vecD src) %{ 1.103 + predicate(n->as_Vector()->length() == 8); 1.104 + match(Set dst (AddVB dst src)); 1.105 + format %{ "paddb $dst,$src\t! add packed8B" %} 1.106 + ins_encode %{ 1.107 + __ paddb($dst$$XMMRegister, $src$$XMMRegister); 1.108 + %} 1.109 + ins_pipe( pipe_slow ); 1.110 +%} 1.111 + 1.112 +instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ 1.113 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.114 + match(Set dst (AddVB src1 src2)); 1.115 + format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 1.116 + ins_encode %{ 1.117 + bool vector256 = false; 1.118 + __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.119 + %} 1.120 + ins_pipe( pipe_slow ); 1.121 +%} 1.122 + 1.123 +instruct vadd16B(vecX dst, vecX src) %{ 1.124 + predicate(n->as_Vector()->length() == 16); 1.125 + match(Set dst (AddVB dst src)); 1.126 + format %{ "paddb $dst,$src\t! add packed16B" %} 1.127 + ins_encode %{ 1.128 + __ paddb($dst$$XMMRegister, $src$$XMMRegister); 1.129 + %} 1.130 + ins_pipe( pipe_slow ); 1.131 +%} 1.132 + 1.133 +instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ 1.134 + predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 1.135 + match(Set dst (AddVB src1 src2)); 1.136 + format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 1.137 + ins_encode %{ 1.138 + bool vector256 = false; 1.139 + __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.140 + %} 1.141 + ins_pipe( pipe_slow ); 1.142 +%} 1.143 + 1.144 +instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ 1.145 + predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 1.146 + match(Set dst (AddVB src (LoadVector mem))); 1.147 + format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 1.148 + ins_encode %{ 1.149 + bool vector256 = false; 1.150 + __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.151 + %} 1.152 + ins_pipe( pipe_slow ); 1.153 +%} 1.154 + 1.155 +instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ 1.156 + predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 1.157 + match(Set dst (AddVB src1 src2)); 1.158 + format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 1.159 + ins_encode %{ 1.160 + bool vector256 = true; 1.161 + __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.162 + %} 1.163 + ins_pipe( pipe_slow ); 1.164 +%} 1.165 + 1.166 +instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ 1.167 + predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 1.168 + match(Set dst (AddVB src (LoadVector mem))); 1.169 + format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 1.170 + ins_encode %{ 1.171 + bool vector256 = true; 1.172 + __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.173 + %} 1.174 + ins_pipe( pipe_slow ); 1.175 +%} 1.176 + 1.177 +// Shorts/Chars vector add 1.178 +instruct vadd2S(vecS dst, vecS src) %{ 1.179 + predicate(n->as_Vector()->length() == 2); 1.180 + match(Set dst (AddVS dst src)); 1.181 + format %{ "paddw $dst,$src\t! add packed2S" %} 1.182 + ins_encode %{ 1.183 + __ paddw($dst$$XMMRegister, $src$$XMMRegister); 1.184 + %} 1.185 + ins_pipe( pipe_slow ); 1.186 +%} 1.187 + 1.188 +instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ 1.189 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.190 + match(Set dst (AddVS src1 src2)); 1.191 + format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 1.192 + ins_encode %{ 1.193 + bool vector256 = false; 1.194 + __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.195 + %} 1.196 + ins_pipe( pipe_slow ); 1.197 +%} 1.198 + 1.199 +instruct vadd4S(vecD dst, vecD src) %{ 1.200 + predicate(n->as_Vector()->length() == 4); 1.201 + match(Set dst (AddVS dst src)); 1.202 + format %{ "paddw $dst,$src\t! add packed4S" %} 1.203 + ins_encode %{ 1.204 + __ paddw($dst$$XMMRegister, $src$$XMMRegister); 1.205 + %} 1.206 + ins_pipe( pipe_slow ); 1.207 +%} 1.208 + 1.209 +instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ 1.210 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.211 + match(Set dst (AddVS src1 src2)); 1.212 + format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 1.213 + ins_encode %{ 1.214 + bool vector256 = false; 1.215 + __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.216 + %} 1.217 + ins_pipe( pipe_slow ); 1.218 +%} 1.219 + 1.220 +instruct vadd8S(vecX dst, vecX src) %{ 1.221 + predicate(n->as_Vector()->length() == 8); 1.222 + match(Set dst (AddVS dst src)); 1.223 + format %{ "paddw $dst,$src\t! add packed8S" %} 1.224 + ins_encode %{ 1.225 + __ paddw($dst$$XMMRegister, $src$$XMMRegister); 1.226 + %} 1.227 + ins_pipe( pipe_slow ); 1.228 +%} 1.229 + 1.230 +instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ 1.231 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.232 + match(Set dst (AddVS src1 src2)); 1.233 + format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 1.234 + ins_encode %{ 1.235 + bool vector256 = false; 1.236 + __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.237 + %} 1.238 + ins_pipe( pipe_slow ); 1.239 +%} 1.240 + 1.241 +instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ 1.242 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.243 + match(Set dst (AddVS src (LoadVector mem))); 1.244 + format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 1.245 + ins_encode %{ 1.246 + bool vector256 = false; 1.247 + __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.248 + %} 1.249 + ins_pipe( pipe_slow ); 1.250 +%} 1.251 + 1.252 +instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ 1.253 + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 1.254 + match(Set dst (AddVS src1 src2)); 1.255 + format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 1.256 + ins_encode %{ 1.257 + bool vector256 = true; 1.258 + __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.259 + %} 1.260 + ins_pipe( pipe_slow ); 1.261 +%} 1.262 + 1.263 +instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ 1.264 + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 1.265 + match(Set dst (AddVS src (LoadVector mem))); 1.266 + format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 1.267 + ins_encode %{ 1.268 + bool vector256 = true; 1.269 + __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.270 + %} 1.271 + ins_pipe( pipe_slow ); 1.272 +%} 1.273 + 1.274 +// Integers vector add 1.275 +instruct vadd2I(vecD dst, vecD src) %{ 1.276 + predicate(n->as_Vector()->length() == 2); 1.277 + match(Set dst (AddVI dst src)); 1.278 + format %{ "paddd $dst,$src\t! add packed2I" %} 1.279 + ins_encode %{ 1.280 + __ paddd($dst$$XMMRegister, $src$$XMMRegister); 1.281 + %} 1.282 + ins_pipe( pipe_slow ); 1.283 +%} 1.284 + 1.285 +instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 1.286 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.287 + match(Set dst (AddVI src1 src2)); 1.288 + format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 1.289 + ins_encode %{ 1.290 + bool vector256 = false; 1.291 + __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.292 + %} 1.293 + ins_pipe( pipe_slow ); 1.294 +%} 1.295 + 1.296 +instruct vadd4I(vecX dst, vecX src) %{ 1.297 + predicate(n->as_Vector()->length() == 4); 1.298 + match(Set dst (AddVI dst src)); 1.299 + format %{ "paddd $dst,$src\t! add packed4I" %} 1.300 + ins_encode %{ 1.301 + __ paddd($dst$$XMMRegister, $src$$XMMRegister); 1.302 + %} 1.303 + ins_pipe( pipe_slow ); 1.304 +%} 1.305 + 1.306 +instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 1.307 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.308 + match(Set dst (AddVI src1 src2)); 1.309 + format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 1.310 + ins_encode %{ 1.311 + bool vector256 = false; 1.312 + __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.313 + %} 1.314 + ins_pipe( pipe_slow ); 1.315 +%} 1.316 + 1.317 +instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 1.318 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.319 + match(Set dst (AddVI src (LoadVector mem))); 1.320 + format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 1.321 + ins_encode %{ 1.322 + bool vector256 = false; 1.323 + __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.324 + %} 1.325 + ins_pipe( pipe_slow ); 1.326 +%} 1.327 + 1.328 +instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 1.329 + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 1.330 + match(Set dst (AddVI src1 src2)); 1.331 + format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 1.332 + ins_encode %{ 1.333 + bool vector256 = true; 1.334 + __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.335 + %} 1.336 + ins_pipe( pipe_slow ); 1.337 +%} 1.338 + 1.339 +instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 1.340 + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 1.341 + match(Set dst (AddVI src (LoadVector mem))); 1.342 + format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 1.343 + ins_encode %{ 1.344 + bool vector256 = true; 1.345 + __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.346 + %} 1.347 + ins_pipe( pipe_slow ); 1.348 +%} 1.349 + 1.350 +// Longs vector add 1.351 +instruct vadd2L(vecX dst, vecX src) %{ 1.352 + predicate(n->as_Vector()->length() == 2); 1.353 + match(Set dst (AddVL dst src)); 1.354 + format %{ "paddq $dst,$src\t! add packed2L" %} 1.355 + ins_encode %{ 1.356 + __ paddq($dst$$XMMRegister, $src$$XMMRegister); 1.357 + %} 1.358 + ins_pipe( pipe_slow ); 1.359 +%} 1.360 + 1.361 +instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 1.362 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.363 + match(Set dst (AddVL src1 src2)); 1.364 + format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 1.365 + ins_encode %{ 1.366 + bool vector256 = false; 1.367 + __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.368 + %} 1.369 + ins_pipe( pipe_slow ); 1.370 +%} 1.371 + 1.372 +instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 1.373 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.374 + match(Set dst (AddVL src (LoadVector mem))); 1.375 + format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 1.376 + ins_encode %{ 1.377 + bool vector256 = false; 1.378 + __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.379 + %} 1.380 + ins_pipe( pipe_slow ); 1.381 +%} 1.382 + 1.383 +instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 1.384 + predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 1.385 + match(Set dst (AddVL src1 src2)); 1.386 + format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 1.387 + ins_encode %{ 1.388 + bool vector256 = true; 1.389 + __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.390 + %} 1.391 + ins_pipe( pipe_slow ); 1.392 +%} 1.393 + 1.394 +instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 1.395 + predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 1.396 + match(Set dst (AddVL src (LoadVector mem))); 1.397 + format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 1.398 + ins_encode %{ 1.399 + bool vector256 = true; 1.400 + __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.401 + %} 1.402 + ins_pipe( pipe_slow ); 1.403 +%} 1.404 + 1.405 +// Floats vector add 1.406 +instruct vadd2F(vecD dst, vecD src) %{ 1.407 + predicate(n->as_Vector()->length() == 2); 1.408 + match(Set dst (AddVF dst src)); 1.409 + format %{ "addps $dst,$src\t! add packed2F" %} 1.410 + ins_encode %{ 1.411 + __ addps($dst$$XMMRegister, $src$$XMMRegister); 1.412 + %} 1.413 + ins_pipe( pipe_slow ); 1.414 +%} 1.415 + 1.416 +instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 1.417 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.418 + match(Set dst (AddVF src1 src2)); 1.419 + format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 1.420 + ins_encode %{ 1.421 + bool vector256 = false; 1.422 + __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.423 + %} 1.424 + ins_pipe( pipe_slow ); 1.425 +%} 1.426 + 1.427 +instruct vadd4F(vecX dst, vecX src) %{ 1.428 + predicate(n->as_Vector()->length() == 4); 1.429 + match(Set dst (AddVF dst src)); 1.430 + format %{ "addps $dst,$src\t! add packed4F" %} 1.431 + ins_encode %{ 1.432 + __ addps($dst$$XMMRegister, $src$$XMMRegister); 1.433 + %} 1.434 + ins_pipe( pipe_slow ); 1.435 +%} 1.436 + 1.437 +instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 1.438 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.439 + match(Set dst (AddVF src1 src2)); 1.440 + format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 1.441 + ins_encode %{ 1.442 + bool vector256 = false; 1.443 + __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.444 + %} 1.445 + ins_pipe( pipe_slow ); 1.446 +%} 1.447 + 1.448 +instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 1.449 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.450 + match(Set dst (AddVF src (LoadVector mem))); 1.451 + format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 1.452 + ins_encode %{ 1.453 + bool vector256 = false; 1.454 + __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.455 + %} 1.456 + ins_pipe( pipe_slow ); 1.457 +%} 1.458 + 1.459 +instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 1.460 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.461 + match(Set dst (AddVF src1 src2)); 1.462 + format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 1.463 + ins_encode %{ 1.464 + bool vector256 = true; 1.465 + __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.466 + %} 1.467 + ins_pipe( pipe_slow ); 1.468 +%} 1.469 + 1.470 +instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 1.471 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.472 + match(Set dst (AddVF src (LoadVector mem))); 1.473 + format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 1.474 + ins_encode %{ 1.475 + bool vector256 = true; 1.476 + __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.477 + %} 1.478 + ins_pipe( pipe_slow ); 1.479 +%} 1.480 + 1.481 +// Doubles vector add 1.482 +instruct vadd2D(vecX dst, vecX src) %{ 1.483 + predicate(n->as_Vector()->length() == 2); 1.484 + match(Set dst (AddVD dst src)); 1.485 + format %{ "addpd $dst,$src\t! add packed2D" %} 1.486 + ins_encode %{ 1.487 + __ addpd($dst$$XMMRegister, $src$$XMMRegister); 1.488 + %} 1.489 + ins_pipe( pipe_slow ); 1.490 +%} 1.491 + 1.492 +instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 1.493 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.494 + match(Set dst (AddVD src1 src2)); 1.495 + format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 1.496 + ins_encode %{ 1.497 + bool vector256 = false; 1.498 + __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.499 + %} 1.500 + ins_pipe( pipe_slow ); 1.501 +%} 1.502 + 1.503 +instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 1.504 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.505 + match(Set dst (AddVD src (LoadVector mem))); 1.506 + format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 1.507 + ins_encode %{ 1.508 + bool vector256 = false; 1.509 + __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.510 + %} 1.511 + ins_pipe( pipe_slow ); 1.512 +%} 1.513 + 1.514 +instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 1.515 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.516 + match(Set dst (AddVD src1 src2)); 1.517 + format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 1.518 + ins_encode %{ 1.519 + bool vector256 = true; 1.520 + __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.521 + %} 1.522 + ins_pipe( pipe_slow ); 1.523 +%} 1.524 + 1.525 +instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 1.526 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.527 + match(Set dst (AddVD src (LoadVector mem))); 1.528 + format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 1.529 + ins_encode %{ 1.530 + bool vector256 = true; 1.531 + __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.532 + %} 1.533 + ins_pipe( pipe_slow ); 1.534 +%} 1.535 + 1.536 +// --------------------------------- SUB -------------------------------------- 1.537 + 1.538 +// Bytes vector sub 1.539 +instruct vsub4B(vecS dst, vecS src) %{ 1.540 + predicate(n->as_Vector()->length() == 4); 1.541 + match(Set dst (SubVB dst src)); 1.542 + format %{ "psubb $dst,$src\t! sub packed4B" %} 1.543 + ins_encode %{ 1.544 + __ psubb($dst$$XMMRegister, $src$$XMMRegister); 1.545 + %} 1.546 + ins_pipe( pipe_slow ); 1.547 +%} 1.548 + 1.549 +instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ 1.550 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.551 + match(Set dst (SubVB src1 src2)); 1.552 + format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 1.553 + ins_encode %{ 1.554 + bool vector256 = false; 1.555 + __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.556 + %} 1.557 + ins_pipe( pipe_slow ); 1.558 +%} 1.559 + 1.560 +instruct vsub8B(vecD dst, vecD src) %{ 1.561 + predicate(n->as_Vector()->length() == 8); 1.562 + match(Set dst (SubVB dst src)); 1.563 + format %{ "psubb $dst,$src\t! sub packed8B" %} 1.564 + ins_encode %{ 1.565 + __ psubb($dst$$XMMRegister, $src$$XMMRegister); 1.566 + %} 1.567 + ins_pipe( pipe_slow ); 1.568 +%} 1.569 + 1.570 +instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ 1.571 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.572 + match(Set dst (SubVB src1 src2)); 1.573 + format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 1.574 + ins_encode %{ 1.575 + bool vector256 = false; 1.576 + __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.577 + %} 1.578 + ins_pipe( pipe_slow ); 1.579 +%} 1.580 + 1.581 +instruct vsub16B(vecX dst, vecX src) %{ 1.582 + predicate(n->as_Vector()->length() == 16); 1.583 + match(Set dst (SubVB dst src)); 1.584 + format %{ "psubb $dst,$src\t! sub packed16B" %} 1.585 + ins_encode %{ 1.586 + __ psubb($dst$$XMMRegister, $src$$XMMRegister); 1.587 + %} 1.588 + ins_pipe( pipe_slow ); 1.589 +%} 1.590 + 1.591 +instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ 1.592 + predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 1.593 + match(Set dst (SubVB src1 src2)); 1.594 + format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 1.595 + ins_encode %{ 1.596 + bool vector256 = false; 1.597 + __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.598 + %} 1.599 + ins_pipe( pipe_slow ); 1.600 +%} 1.601 + 1.602 +instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ 1.603 + predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 1.604 + match(Set dst (SubVB src (LoadVector mem))); 1.605 + format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 1.606 + ins_encode %{ 1.607 + bool vector256 = false; 1.608 + __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.609 + %} 1.610 + ins_pipe( pipe_slow ); 1.611 +%} 1.612 + 1.613 +instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ 1.614 + predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 1.615 + match(Set dst (SubVB src1 src2)); 1.616 + format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 1.617 + ins_encode %{ 1.618 + bool vector256 = true; 1.619 + __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.620 + %} 1.621 + ins_pipe( pipe_slow ); 1.622 +%} 1.623 + 1.624 +instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ 1.625 + predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 1.626 + match(Set dst (SubVB src (LoadVector mem))); 1.627 + format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 1.628 + ins_encode %{ 1.629 + bool vector256 = true; 1.630 + __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.631 + %} 1.632 + ins_pipe( pipe_slow ); 1.633 +%} 1.634 + 1.635 +// Shorts/Chars vector sub 1.636 +instruct vsub2S(vecS dst, vecS src) %{ 1.637 + predicate(n->as_Vector()->length() == 2); 1.638 + match(Set dst (SubVS dst src)); 1.639 + format %{ "psubw $dst,$src\t! sub packed2S" %} 1.640 + ins_encode %{ 1.641 + __ psubw($dst$$XMMRegister, $src$$XMMRegister); 1.642 + %} 1.643 + ins_pipe( pipe_slow ); 1.644 +%} 1.645 + 1.646 +instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ 1.647 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.648 + match(Set dst (SubVS src1 src2)); 1.649 + format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 1.650 + ins_encode %{ 1.651 + bool vector256 = false; 1.652 + __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.653 + %} 1.654 + ins_pipe( pipe_slow ); 1.655 +%} 1.656 + 1.657 +instruct vsub4S(vecD dst, vecD src) %{ 1.658 + predicate(n->as_Vector()->length() == 4); 1.659 + match(Set dst (SubVS dst src)); 1.660 + format %{ "psubw $dst,$src\t! sub packed4S" %} 1.661 + ins_encode %{ 1.662 + __ psubw($dst$$XMMRegister, $src$$XMMRegister); 1.663 + %} 1.664 + ins_pipe( pipe_slow ); 1.665 +%} 1.666 + 1.667 +instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ 1.668 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.669 + match(Set dst (SubVS src1 src2)); 1.670 + format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 1.671 + ins_encode %{ 1.672 + bool vector256 = false; 1.673 + __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.674 + %} 1.675 + ins_pipe( pipe_slow ); 1.676 +%} 1.677 + 1.678 +instruct vsub8S(vecX dst, vecX src) %{ 1.679 + predicate(n->as_Vector()->length() == 8); 1.680 + match(Set dst (SubVS dst src)); 1.681 + format %{ "psubw $dst,$src\t! sub packed8S" %} 1.682 + ins_encode %{ 1.683 + __ psubw($dst$$XMMRegister, $src$$XMMRegister); 1.684 + %} 1.685 + ins_pipe( pipe_slow ); 1.686 +%} 1.687 + 1.688 +instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ 1.689 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.690 + match(Set dst (SubVS src1 src2)); 1.691 + format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 1.692 + ins_encode %{ 1.693 + bool vector256 = false; 1.694 + __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.695 + %} 1.696 + ins_pipe( pipe_slow ); 1.697 +%} 1.698 + 1.699 +instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ 1.700 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.701 + match(Set dst (SubVS src (LoadVector mem))); 1.702 + format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 1.703 + ins_encode %{ 1.704 + bool vector256 = false; 1.705 + __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.706 + %} 1.707 + ins_pipe( pipe_slow ); 1.708 +%} 1.709 + 1.710 +instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ 1.711 + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 1.712 + match(Set dst (SubVS src1 src2)); 1.713 + format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 1.714 + ins_encode %{ 1.715 + bool vector256 = true; 1.716 + __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.717 + %} 1.718 + ins_pipe( pipe_slow ); 1.719 +%} 1.720 + 1.721 +instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ 1.722 + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 1.723 + match(Set dst (SubVS src (LoadVector mem))); 1.724 + format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 1.725 + ins_encode %{ 1.726 + bool vector256 = true; 1.727 + __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.728 + %} 1.729 + ins_pipe( pipe_slow ); 1.730 +%} 1.731 + 1.732 +// Integers vector sub 1.733 +instruct vsub2I(vecD dst, vecD src) %{ 1.734 + predicate(n->as_Vector()->length() == 2); 1.735 + match(Set dst (SubVI dst src)); 1.736 + format %{ "psubd $dst,$src\t! sub packed2I" %} 1.737 + ins_encode %{ 1.738 + __ psubd($dst$$XMMRegister, $src$$XMMRegister); 1.739 + %} 1.740 + ins_pipe( pipe_slow ); 1.741 +%} 1.742 + 1.743 +instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 1.744 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.745 + match(Set dst (SubVI src1 src2)); 1.746 + format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 1.747 + ins_encode %{ 1.748 + bool vector256 = false; 1.749 + __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.750 + %} 1.751 + ins_pipe( pipe_slow ); 1.752 +%} 1.753 + 1.754 +instruct vsub4I(vecX dst, vecX src) %{ 1.755 + predicate(n->as_Vector()->length() == 4); 1.756 + match(Set dst (SubVI dst src)); 1.757 + format %{ "psubd $dst,$src\t! sub packed4I" %} 1.758 + ins_encode %{ 1.759 + __ psubd($dst$$XMMRegister, $src$$XMMRegister); 1.760 + %} 1.761 + ins_pipe( pipe_slow ); 1.762 +%} 1.763 + 1.764 +instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 1.765 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.766 + match(Set dst (SubVI src1 src2)); 1.767 + format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 1.768 + ins_encode %{ 1.769 + bool vector256 = false; 1.770 + __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.771 + %} 1.772 + ins_pipe( pipe_slow ); 1.773 +%} 1.774 + 1.775 +instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 1.776 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.777 + match(Set dst (SubVI src (LoadVector mem))); 1.778 + format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 1.779 + ins_encode %{ 1.780 + bool vector256 = false; 1.781 + __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.782 + %} 1.783 + ins_pipe( pipe_slow ); 1.784 +%} 1.785 + 1.786 +instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 1.787 + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 1.788 + match(Set dst (SubVI src1 src2)); 1.789 + format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 1.790 + ins_encode %{ 1.791 + bool vector256 = true; 1.792 + __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.793 + %} 1.794 + ins_pipe( pipe_slow ); 1.795 +%} 1.796 + 1.797 +instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 1.798 + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 1.799 + match(Set dst (SubVI src (LoadVector mem))); 1.800 + format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 1.801 + ins_encode %{ 1.802 + bool vector256 = true; 1.803 + __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.804 + %} 1.805 + ins_pipe( pipe_slow ); 1.806 +%} 1.807 + 1.808 +// Longs vector sub 1.809 +instruct vsub2L(vecX dst, vecX src) %{ 1.810 + predicate(n->as_Vector()->length() == 2); 1.811 + match(Set dst (SubVL dst src)); 1.812 + format %{ "psubq $dst,$src\t! sub packed2L" %} 1.813 + ins_encode %{ 1.814 + __ psubq($dst$$XMMRegister, $src$$XMMRegister); 1.815 + %} 1.816 + ins_pipe( pipe_slow ); 1.817 +%} 1.818 + 1.819 +instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 1.820 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.821 + match(Set dst (SubVL src1 src2)); 1.822 + format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 1.823 + ins_encode %{ 1.824 + bool vector256 = false; 1.825 + __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.826 + %} 1.827 + ins_pipe( pipe_slow ); 1.828 +%} 1.829 + 1.830 +instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 1.831 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.832 + match(Set dst (SubVL src (LoadVector mem))); 1.833 + format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 1.834 + ins_encode %{ 1.835 + bool vector256 = false; 1.836 + __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.837 + %} 1.838 + ins_pipe( pipe_slow ); 1.839 +%} 1.840 + 1.841 +instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 1.842 + predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 1.843 + match(Set dst (SubVL src1 src2)); 1.844 + format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 1.845 + ins_encode %{ 1.846 + bool vector256 = true; 1.847 + __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.848 + %} 1.849 + ins_pipe( pipe_slow ); 1.850 +%} 1.851 + 1.852 +instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 1.853 + predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 1.854 + match(Set dst (SubVL src (LoadVector mem))); 1.855 + format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 1.856 + ins_encode %{ 1.857 + bool vector256 = true; 1.858 + __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.859 + %} 1.860 + ins_pipe( pipe_slow ); 1.861 +%} 1.862 + 1.863 +// Floats vector sub 1.864 +instruct vsub2F(vecD dst, vecD src) %{ 1.865 + predicate(n->as_Vector()->length() == 2); 1.866 + match(Set dst (SubVF dst src)); 1.867 + format %{ "subps $dst,$src\t! sub packed2F" %} 1.868 + ins_encode %{ 1.869 + __ subps($dst$$XMMRegister, $src$$XMMRegister); 1.870 + %} 1.871 + ins_pipe( pipe_slow ); 1.872 +%} 1.873 + 1.874 +instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 1.875 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.876 + match(Set dst (SubVF src1 src2)); 1.877 + format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 1.878 + ins_encode %{ 1.879 + bool vector256 = false; 1.880 + __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.881 + %} 1.882 + ins_pipe( pipe_slow ); 1.883 +%} 1.884 + 1.885 +instruct vsub4F(vecX dst, vecX src) %{ 1.886 + predicate(n->as_Vector()->length() == 4); 1.887 + match(Set dst (SubVF dst src)); 1.888 + format %{ "subps $dst,$src\t! sub packed4F" %} 1.889 + ins_encode %{ 1.890 + __ subps($dst$$XMMRegister, $src$$XMMRegister); 1.891 + %} 1.892 + ins_pipe( pipe_slow ); 1.893 +%} 1.894 + 1.895 +instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 1.896 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.897 + match(Set dst (SubVF src1 src2)); 1.898 + format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 1.899 + ins_encode %{ 1.900 + bool vector256 = false; 1.901 + __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.902 + %} 1.903 + ins_pipe( pipe_slow ); 1.904 +%} 1.905 + 1.906 +instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 1.907 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.908 + match(Set dst (SubVF src (LoadVector mem))); 1.909 + format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 1.910 + ins_encode %{ 1.911 + bool vector256 = false; 1.912 + __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.913 + %} 1.914 + ins_pipe( pipe_slow ); 1.915 +%} 1.916 + 1.917 +instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 1.918 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.919 + match(Set dst (SubVF src1 src2)); 1.920 + format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 1.921 + ins_encode %{ 1.922 + bool vector256 = true; 1.923 + __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.924 + %} 1.925 + ins_pipe( pipe_slow ); 1.926 +%} 1.927 + 1.928 +instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 1.929 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.930 + match(Set dst (SubVF src (LoadVector mem))); 1.931 + format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 1.932 + ins_encode %{ 1.933 + bool vector256 = true; 1.934 + __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.935 + %} 1.936 + ins_pipe( pipe_slow ); 1.937 +%} 1.938 + 1.939 +// Doubles vector sub 1.940 +instruct vsub2D(vecX dst, vecX src) %{ 1.941 + predicate(n->as_Vector()->length() == 2); 1.942 + match(Set dst (SubVD dst src)); 1.943 + format %{ "subpd $dst,$src\t! sub packed2D" %} 1.944 + ins_encode %{ 1.945 + __ subpd($dst$$XMMRegister, $src$$XMMRegister); 1.946 + %} 1.947 + ins_pipe( pipe_slow ); 1.948 +%} 1.949 + 1.950 +instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 1.951 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.952 + match(Set dst (SubVD src1 src2)); 1.953 + format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 1.954 + ins_encode %{ 1.955 + bool vector256 = false; 1.956 + __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.957 + %} 1.958 + ins_pipe( pipe_slow ); 1.959 +%} 1.960 + 1.961 +instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 1.962 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.963 + match(Set dst (SubVD src (LoadVector mem))); 1.964 + format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 1.965 + ins_encode %{ 1.966 + bool vector256 = false; 1.967 + __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.968 + %} 1.969 + ins_pipe( pipe_slow ); 1.970 +%} 1.971 + 1.972 +instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 1.973 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.974 + match(Set dst (SubVD src1 src2)); 1.975 + format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 1.976 + ins_encode %{ 1.977 + bool vector256 = true; 1.978 + __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.979 + %} 1.980 + ins_pipe( pipe_slow ); 1.981 +%} 1.982 + 1.983 +instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 1.984 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.985 + match(Set dst (SubVD src (LoadVector mem))); 1.986 + format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 1.987 + ins_encode %{ 1.988 + bool vector256 = true; 1.989 + __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.990 + %} 1.991 + ins_pipe( pipe_slow ); 1.992 +%} 1.993 + 1.994 +// --------------------------------- MUL -------------------------------------- 1.995 + 1.996 +// Shorts/Chars vector mul 1.997 +instruct vmul2S(vecS dst, vecS src) %{ 1.998 + predicate(n->as_Vector()->length() == 2); 1.999 + match(Set dst (MulVS dst src)); 1.1000 + format %{ "pmullw $dst,$src\t! mul packed2S" %} 1.1001 + ins_encode %{ 1.1002 + __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 1.1003 + %} 1.1004 + ins_pipe( pipe_slow ); 1.1005 +%} 1.1006 + 1.1007 +instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ 1.1008 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.1009 + match(Set dst (MulVS src1 src2)); 1.1010 + format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 1.1011 + ins_encode %{ 1.1012 + bool vector256 = false; 1.1013 + __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.1014 + %} 1.1015 + ins_pipe( pipe_slow ); 1.1016 +%} 1.1017 + 1.1018 +instruct vmul4S(vecD dst, vecD src) %{ 1.1019 + predicate(n->as_Vector()->length() == 4); 1.1020 + match(Set dst (MulVS dst src)); 1.1021 + format %{ "pmullw $dst,$src\t! mul packed4S" %} 1.1022 + ins_encode %{ 1.1023 + __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 1.1024 + %} 1.1025 + ins_pipe( pipe_slow ); 1.1026 +%} 1.1027 + 1.1028 +instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ 1.1029 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.1030 + match(Set dst (MulVS src1 src2)); 1.1031 + format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 1.1032 + ins_encode %{ 1.1033 + bool vector256 = false; 1.1034 + __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.1035 + %} 1.1036 + ins_pipe( pipe_slow ); 1.1037 +%} 1.1038 + 1.1039 +instruct vmul8S(vecX dst, vecX src) %{ 1.1040 + predicate(n->as_Vector()->length() == 8); 1.1041 + match(Set dst (MulVS dst src)); 1.1042 + format %{ "pmullw $dst,$src\t! mul packed8S" %} 1.1043 + ins_encode %{ 1.1044 + __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 1.1045 + %} 1.1046 + ins_pipe( pipe_slow ); 1.1047 +%} 1.1048 + 1.1049 +instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ 1.1050 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.1051 + match(Set dst (MulVS src1 src2)); 1.1052 + format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 1.1053 + ins_encode %{ 1.1054 + bool vector256 = false; 1.1055 + __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.1056 + %} 1.1057 + ins_pipe( pipe_slow ); 1.1058 +%} 1.1059 + 1.1060 +instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ 1.1061 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.1062 + match(Set dst (MulVS src (LoadVector mem))); 1.1063 + format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 1.1064 + ins_encode %{ 1.1065 + bool vector256 = false; 1.1066 + __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.1067 + %} 1.1068 + ins_pipe( pipe_slow ); 1.1069 +%} 1.1070 + 1.1071 +instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ 1.1072 + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 1.1073 + match(Set dst (MulVS src1 src2)); 1.1074 + format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 1.1075 + ins_encode %{ 1.1076 + bool vector256 = true; 1.1077 + __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.1078 + %} 1.1079 + ins_pipe( pipe_slow ); 1.1080 +%} 1.1081 + 1.1082 +instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ 1.1083 + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 1.1084 + match(Set dst (MulVS src (LoadVector mem))); 1.1085 + format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 1.1086 + ins_encode %{ 1.1087 + bool vector256 = true; 1.1088 + __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.1089 + %} 1.1090 + ins_pipe( pipe_slow ); 1.1091 +%} 1.1092 + 1.1093 +// Integers vector mul (sse4_1) 1.1094 +instruct vmul2I(vecD dst, vecD src) %{ 1.1095 + predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 1.1096 + match(Set dst (MulVI dst src)); 1.1097 + format %{ "pmulld $dst,$src\t! mul packed2I" %} 1.1098 + ins_encode %{ 1.1099 + __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 1.1100 + %} 1.1101 + ins_pipe( pipe_slow ); 1.1102 +%} 1.1103 + 1.1104 +instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 1.1105 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.1106 + match(Set dst (MulVI src1 src2)); 1.1107 + format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 1.1108 + ins_encode %{ 1.1109 + bool vector256 = false; 1.1110 + __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.1111 + %} 1.1112 + ins_pipe( pipe_slow ); 1.1113 +%} 1.1114 + 1.1115 +instruct vmul4I(vecX dst, vecX src) %{ 1.1116 + predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 1.1117 + match(Set dst (MulVI dst src)); 1.1118 + format %{ "pmulld $dst,$src\t! mul packed4I" %} 1.1119 + ins_encode %{ 1.1120 + __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 1.1121 + %} 1.1122 + ins_pipe( pipe_slow ); 1.1123 +%} 1.1124 + 1.1125 +instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 1.1126 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.1127 + match(Set dst (MulVI src1 src2)); 1.1128 + format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 1.1129 + ins_encode %{ 1.1130 + bool vector256 = false; 1.1131 + __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.1132 + %} 1.1133 + ins_pipe( pipe_slow ); 1.1134 +%} 1.1135 + 1.1136 +instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 1.1137 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.1138 + match(Set dst (MulVI src (LoadVector mem))); 1.1139 + format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 1.1140 + ins_encode %{ 1.1141 + bool vector256 = false; 1.1142 + __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.1143 + %} 1.1144 + ins_pipe( pipe_slow ); 1.1145 +%} 1.1146 + 1.1147 +instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 1.1148 + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 1.1149 + match(Set dst (MulVI src1 src2)); 1.1150 + format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 1.1151 + ins_encode %{ 1.1152 + bool vector256 = true; 1.1153 + __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.1154 + %} 1.1155 + ins_pipe( pipe_slow ); 1.1156 +%} 1.1157 + 1.1158 +instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 1.1159 + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 1.1160 + match(Set dst (MulVI src (LoadVector mem))); 1.1161 + format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 1.1162 + ins_encode %{ 1.1163 + bool vector256 = true; 1.1164 + __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.1165 + %} 1.1166 + ins_pipe( pipe_slow ); 1.1167 +%} 1.1168 + 1.1169 +// Floats vector mul 1.1170 +instruct vmul2F(vecD dst, vecD src) %{ 1.1171 + predicate(n->as_Vector()->length() == 2); 1.1172 + match(Set dst (MulVF dst src)); 1.1173 + format %{ "mulps $dst,$src\t! mul packed2F" %} 1.1174 + ins_encode %{ 1.1175 + __ mulps($dst$$XMMRegister, $src$$XMMRegister); 1.1176 + %} 1.1177 + ins_pipe( pipe_slow ); 1.1178 +%} 1.1179 + 1.1180 +instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 1.1181 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.1182 + match(Set dst (MulVF src1 src2)); 1.1183 + format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 1.1184 + ins_encode %{ 1.1185 + bool vector256 = false; 1.1186 + __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.1187 + %} 1.1188 + ins_pipe( pipe_slow ); 1.1189 +%} 1.1190 + 1.1191 +instruct vmul4F(vecX dst, vecX src) %{ 1.1192 + predicate(n->as_Vector()->length() == 4); 1.1193 + match(Set dst (MulVF dst src)); 1.1194 + format %{ "mulps $dst,$src\t! mul packed4F" %} 1.1195 + ins_encode %{ 1.1196 + __ mulps($dst$$XMMRegister, $src$$XMMRegister); 1.1197 + %} 1.1198 + ins_pipe( pipe_slow ); 1.1199 +%} 1.1200 + 1.1201 +instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 1.1202 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.1203 + match(Set dst (MulVF src1 src2)); 1.1204 + format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 1.1205 + ins_encode %{ 1.1206 + bool vector256 = false; 1.1207 + __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.1208 + %} 1.1209 + ins_pipe( pipe_slow ); 1.1210 +%} 1.1211 + 1.1212 +instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 1.1213 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.1214 + match(Set dst (MulVF src (LoadVector mem))); 1.1215 + format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 1.1216 + ins_encode %{ 1.1217 + bool vector256 = false; 1.1218 + __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.1219 + %} 1.1220 + ins_pipe( pipe_slow ); 1.1221 +%} 1.1222 + 1.1223 +instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 1.1224 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.1225 + match(Set dst (MulVF src1 src2)); 1.1226 + format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 1.1227 + ins_encode %{ 1.1228 + bool vector256 = true; 1.1229 + __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.1230 + %} 1.1231 + ins_pipe( pipe_slow ); 1.1232 +%} 1.1233 + 1.1234 +instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 1.1235 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.1236 + match(Set dst (MulVF src (LoadVector mem))); 1.1237 + format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 1.1238 + ins_encode %{ 1.1239 + bool vector256 = true; 1.1240 + __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.1241 + %} 1.1242 + ins_pipe( pipe_slow ); 1.1243 +%} 1.1244 + 1.1245 +// Doubles vector mul 1.1246 +instruct vmul2D(vecX dst, vecX src) %{ 1.1247 + predicate(n->as_Vector()->length() == 2); 1.1248 + match(Set dst (MulVD dst src)); 1.1249 + format %{ "mulpd $dst,$src\t! mul packed2D" %} 1.1250 + ins_encode %{ 1.1251 + __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 1.1252 + %} 1.1253 + ins_pipe( pipe_slow ); 1.1254 +%} 1.1255 + 1.1256 +instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 1.1257 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.1258 + match(Set dst (MulVD src1 src2)); 1.1259 + format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 1.1260 + ins_encode %{ 1.1261 + bool vector256 = false; 1.1262 + __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.1263 + %} 1.1264 + ins_pipe( pipe_slow ); 1.1265 +%} 1.1266 + 1.1267 +instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 1.1268 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.1269 + match(Set dst (MulVD src (LoadVector mem))); 1.1270 + format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 1.1271 + ins_encode %{ 1.1272 + bool vector256 = false; 1.1273 + __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.1274 + %} 1.1275 + ins_pipe( pipe_slow ); 1.1276 +%} 1.1277 + 1.1278 +instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 1.1279 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.1280 + match(Set dst (MulVD src1 src2)); 1.1281 + format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 1.1282 + ins_encode %{ 1.1283 + bool vector256 = true; 1.1284 + __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.1285 + %} 1.1286 + ins_pipe( pipe_slow ); 1.1287 +%} 1.1288 + 1.1289 +instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 1.1290 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.1291 + match(Set dst (MulVD src (LoadVector mem))); 1.1292 + format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 1.1293 + ins_encode %{ 1.1294 + bool vector256 = true; 1.1295 + __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.1296 + %} 1.1297 + ins_pipe( pipe_slow ); 1.1298 +%} 1.1299 + 1.1300 +// --------------------------------- DIV -------------------------------------- 1.1301 + 1.1302 +// Floats vector div 1.1303 +instruct vdiv2F(vecD dst, vecD src) %{ 1.1304 + predicate(n->as_Vector()->length() == 2); 1.1305 + match(Set dst (DivVF dst src)); 1.1306 + format %{ "divps $dst,$src\t! div packed2F" %} 1.1307 + ins_encode %{ 1.1308 + __ divps($dst$$XMMRegister, $src$$XMMRegister); 1.1309 + %} 1.1310 + ins_pipe( pipe_slow ); 1.1311 +%} 1.1312 + 1.1313 +instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 1.1314 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.1315 + match(Set dst (DivVF src1 src2)); 1.1316 + format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 1.1317 + ins_encode %{ 1.1318 + bool vector256 = false; 1.1319 + __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.1320 + %} 1.1321 + ins_pipe( pipe_slow ); 1.1322 +%} 1.1323 + 1.1324 +instruct vdiv4F(vecX dst, vecX src) %{ 1.1325 + predicate(n->as_Vector()->length() == 4); 1.1326 + match(Set dst (DivVF dst src)); 1.1327 + format %{ "divps $dst,$src\t! div packed4F" %} 1.1328 + ins_encode %{ 1.1329 + __ divps($dst$$XMMRegister, $src$$XMMRegister); 1.1330 + %} 1.1331 + ins_pipe( pipe_slow ); 1.1332 +%} 1.1333 + 1.1334 +instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 1.1335 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.1336 + match(Set dst (DivVF src1 src2)); 1.1337 + format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 1.1338 + ins_encode %{ 1.1339 + bool vector256 = false; 1.1340 + __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.1341 + %} 1.1342 + ins_pipe( pipe_slow ); 1.1343 +%} 1.1344 + 1.1345 +instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 1.1346 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.1347 + match(Set dst (DivVF src (LoadVector mem))); 1.1348 + format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 1.1349 + ins_encode %{ 1.1350 + bool vector256 = false; 1.1351 + __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.1352 + %} 1.1353 + ins_pipe( pipe_slow ); 1.1354 +%} 1.1355 + 1.1356 +instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 1.1357 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.1358 + match(Set dst (DivVF src1 src2)); 1.1359 + format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 1.1360 + ins_encode %{ 1.1361 + bool vector256 = true; 1.1362 + __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.1363 + %} 1.1364 + ins_pipe( pipe_slow ); 1.1365 +%} 1.1366 + 1.1367 +instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 1.1368 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.1369 + match(Set dst (DivVF src (LoadVector mem))); 1.1370 + format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 1.1371 + ins_encode %{ 1.1372 + bool vector256 = true; 1.1373 + __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.1374 + %} 1.1375 + ins_pipe( pipe_slow ); 1.1376 +%} 1.1377 + 1.1378 +// Doubles vector div 1.1379 +instruct vdiv2D(vecX dst, vecX src) %{ 1.1380 + predicate(n->as_Vector()->length() == 2); 1.1381 + match(Set dst (DivVD dst src)); 1.1382 + format %{ "divpd $dst,$src\t! div packed2D" %} 1.1383 + ins_encode %{ 1.1384 + __ divpd($dst$$XMMRegister, $src$$XMMRegister); 1.1385 + %} 1.1386 + ins_pipe( pipe_slow ); 1.1387 +%} 1.1388 + 1.1389 +instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 1.1390 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.1391 + match(Set dst (DivVD src1 src2)); 1.1392 + format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 1.1393 + ins_encode %{ 1.1394 + bool vector256 = false; 1.1395 + __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.1396 + %} 1.1397 + ins_pipe( pipe_slow ); 1.1398 +%} 1.1399 + 1.1400 +instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 1.1401 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.1402 + match(Set dst (DivVD src (LoadVector mem))); 1.1403 + format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 1.1404 + ins_encode %{ 1.1405 + bool vector256 = false; 1.1406 + __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.1407 + %} 1.1408 + ins_pipe( pipe_slow ); 1.1409 +%} 1.1410 + 1.1411 +instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 1.1412 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.1413 + match(Set dst (DivVD src1 src2)); 1.1414 + format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 1.1415 + ins_encode %{ 1.1416 + bool vector256 = true; 1.1417 + __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.1418 + %} 1.1419 + ins_pipe( pipe_slow ); 1.1420 +%} 1.1421 + 1.1422 +instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 1.1423 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.1424 + match(Set dst (DivVD src (LoadVector mem))); 1.1425 + format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 1.1426 + ins_encode %{ 1.1427 + bool vector256 = true; 1.1428 + __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.1429 + %} 1.1430 + ins_pipe( pipe_slow ); 1.1431 +%} 1.1432 + 1.1433 +// ------------------------------ LeftShift ----------------------------------- 1.1434 + 1.1435 +// Shorts/Chars vector left shift 1.1436 +instruct vsll2S(vecS dst, regF shift) %{ 1.1437 + predicate(n->as_Vector()->length() == 2); 1.1438 + match(Set dst (LShiftVS dst shift)); 1.1439 + format %{ "psllw $dst,$shift\t! left shift packed2S" %} 1.1440 + ins_encode %{ 1.1441 + __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 1.1442 + %} 1.1443 + ins_pipe( pipe_slow ); 1.1444 +%} 1.1445 + 1.1446 +instruct vsll2S_imm(vecS dst, immI8 shift) %{ 1.1447 + predicate(n->as_Vector()->length() == 2); 1.1448 + match(Set dst (LShiftVS dst shift)); 1.1449 + format %{ "psllw $dst,$shift\t! left shift packed2S" %} 1.1450 + ins_encode %{ 1.1451 + __ psllw($dst$$XMMRegister, (int)$shift$$constant); 1.1452 + %} 1.1453 + ins_pipe( pipe_slow ); 1.1454 +%} 1.1455 + 1.1456 +instruct vsll2S_reg(vecS dst, vecS src, regF shift) %{ 1.1457 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.1458 + match(Set dst (LShiftVS src shift)); 1.1459 + format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 1.1460 + ins_encode %{ 1.1461 + bool vector256 = false; 1.1462 + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.1463 + %} 1.1464 + ins_pipe( pipe_slow ); 1.1465 +%} 1.1466 + 1.1467 +instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 1.1468 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.1469 + match(Set dst (LShiftVS src shift)); 1.1470 + format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 1.1471 + ins_encode %{ 1.1472 + bool vector256 = false; 1.1473 + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.1474 + %} 1.1475 + ins_pipe( pipe_slow ); 1.1476 +%} 1.1477 + 1.1478 +instruct vsll4S(vecD dst, regF shift) %{ 1.1479 + predicate(n->as_Vector()->length() == 4); 1.1480 + match(Set dst (LShiftVS dst shift)); 1.1481 + format %{ "psllw $dst,$shift\t! left shift packed4S" %} 1.1482 + ins_encode %{ 1.1483 + __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 1.1484 + %} 1.1485 + ins_pipe( pipe_slow ); 1.1486 +%} 1.1487 + 1.1488 +instruct vsll4S_imm(vecD dst, immI8 shift) %{ 1.1489 + predicate(n->as_Vector()->length() == 4); 1.1490 + match(Set dst (LShiftVS dst shift)); 1.1491 + format %{ "psllw $dst,$shift\t! left shift packed4S" %} 1.1492 + ins_encode %{ 1.1493 + __ psllw($dst$$XMMRegister, (int)$shift$$constant); 1.1494 + %} 1.1495 + ins_pipe( pipe_slow ); 1.1496 +%} 1.1497 + 1.1498 +instruct vsll4S_reg(vecD dst, vecD src, regF shift) %{ 1.1499 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.1500 + match(Set dst (LShiftVS src shift)); 1.1501 + format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 1.1502 + ins_encode %{ 1.1503 + bool vector256 = false; 1.1504 + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.1505 + %} 1.1506 + ins_pipe( pipe_slow ); 1.1507 +%} 1.1508 + 1.1509 +instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 1.1510 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.1511 + match(Set dst (LShiftVS src shift)); 1.1512 + format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 1.1513 + ins_encode %{ 1.1514 + bool vector256 = false; 1.1515 + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.1516 + %} 1.1517 + ins_pipe( pipe_slow ); 1.1518 +%} 1.1519 + 1.1520 +instruct vsll8S(vecX dst, regF shift) %{ 1.1521 + predicate(n->as_Vector()->length() == 8); 1.1522 + match(Set dst (LShiftVS dst shift)); 1.1523 + format %{ "psllw $dst,$shift\t! left shift packed8S" %} 1.1524 + ins_encode %{ 1.1525 + __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 1.1526 + %} 1.1527 + ins_pipe( pipe_slow ); 1.1528 +%} 1.1529 + 1.1530 +instruct vsll8S_imm(vecX dst, immI8 shift) %{ 1.1531 + predicate(n->as_Vector()->length() == 8); 1.1532 + match(Set dst (LShiftVS dst shift)); 1.1533 + format %{ "psllw $dst,$shift\t! left shift packed8S" %} 1.1534 + ins_encode %{ 1.1535 + __ psllw($dst$$XMMRegister, (int)$shift$$constant); 1.1536 + %} 1.1537 + ins_pipe( pipe_slow ); 1.1538 +%} 1.1539 + 1.1540 +instruct vsll8S_reg(vecX dst, vecX src, regF shift) %{ 1.1541 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.1542 + match(Set dst (LShiftVS src shift)); 1.1543 + format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 1.1544 + ins_encode %{ 1.1545 + bool vector256 = false; 1.1546 + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.1547 + %} 1.1548 + ins_pipe( pipe_slow ); 1.1549 +%} 1.1550 + 1.1551 +instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 1.1552 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.1553 + match(Set dst (LShiftVS src shift)); 1.1554 + format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 1.1555 + ins_encode %{ 1.1556 + bool vector256 = false; 1.1557 + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.1558 + %} 1.1559 + ins_pipe( pipe_slow ); 1.1560 +%} 1.1561 + 1.1562 +instruct vsll16S_reg(vecY dst, vecY src, regF shift) %{ 1.1563 + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 1.1564 + match(Set dst (LShiftVS src shift)); 1.1565 + format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 1.1566 + ins_encode %{ 1.1567 + bool vector256 = true; 1.1568 + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.1569 + %} 1.1570 + ins_pipe( pipe_slow ); 1.1571 +%} 1.1572 + 1.1573 +instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 1.1574 + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 1.1575 + match(Set dst (LShiftVS src shift)); 1.1576 + format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 1.1577 + ins_encode %{ 1.1578 + bool vector256 = true; 1.1579 + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.1580 + %} 1.1581 + ins_pipe( pipe_slow ); 1.1582 +%} 1.1583 + 1.1584 +// Integers vector left shift 1.1585 +instruct vsll2I(vecD dst, regF shift) %{ 1.1586 + predicate(n->as_Vector()->length() == 2); 1.1587 + match(Set dst (LShiftVI dst shift)); 1.1588 + format %{ "pslld $dst,$shift\t! left shift packed2I" %} 1.1589 + ins_encode %{ 1.1590 + __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 1.1591 + %} 1.1592 + ins_pipe( pipe_slow ); 1.1593 +%} 1.1594 + 1.1595 +instruct vsll2I_imm(vecD dst, immI8 shift) %{ 1.1596 + predicate(n->as_Vector()->length() == 2); 1.1597 + match(Set dst (LShiftVI dst shift)); 1.1598 + format %{ "pslld $dst,$shift\t! left shift packed2I" %} 1.1599 + ins_encode %{ 1.1600 + __ pslld($dst$$XMMRegister, (int)$shift$$constant); 1.1601 + %} 1.1602 + ins_pipe( pipe_slow ); 1.1603 +%} 1.1604 + 1.1605 +instruct vsll2I_reg(vecD dst, vecD src, regF shift) %{ 1.1606 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.1607 + match(Set dst (LShiftVI src shift)); 1.1608 + format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 1.1609 + ins_encode %{ 1.1610 + bool vector256 = false; 1.1611 + __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.1612 + %} 1.1613 + ins_pipe( pipe_slow ); 1.1614 +%} 1.1615 + 1.1616 +instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 1.1617 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.1618 + match(Set dst (LShiftVI src shift)); 1.1619 + format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 1.1620 + ins_encode %{ 1.1621 + bool vector256 = false; 1.1622 + __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.1623 + %} 1.1624 + ins_pipe( pipe_slow ); 1.1625 +%} 1.1626 + 1.1627 +instruct vsll4I(vecX dst, regF shift) %{ 1.1628 + predicate(n->as_Vector()->length() == 4); 1.1629 + match(Set dst (LShiftVI dst shift)); 1.1630 + format %{ "pslld $dst,$shift\t! left shift packed4I" %} 1.1631 + ins_encode %{ 1.1632 + __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 1.1633 + %} 1.1634 + ins_pipe( pipe_slow ); 1.1635 +%} 1.1636 + 1.1637 +instruct vsll4I_imm(vecX dst, immI8 shift) %{ 1.1638 + predicate(n->as_Vector()->length() == 4); 1.1639 + match(Set dst (LShiftVI dst shift)); 1.1640 + format %{ "pslld $dst,$shift\t! left shift packed4I" %} 1.1641 + ins_encode %{ 1.1642 + __ pslld($dst$$XMMRegister, (int)$shift$$constant); 1.1643 + %} 1.1644 + ins_pipe( pipe_slow ); 1.1645 +%} 1.1646 + 1.1647 +instruct vsll4I_reg(vecX dst, vecX src, regF shift) %{ 1.1648 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.1649 + match(Set dst (LShiftVI src shift)); 1.1650 + format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 1.1651 + ins_encode %{ 1.1652 + bool vector256 = false; 1.1653 + __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.1654 + %} 1.1655 + ins_pipe( pipe_slow ); 1.1656 +%} 1.1657 + 1.1658 +instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 1.1659 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.1660 + match(Set dst (LShiftVI src shift)); 1.1661 + format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 1.1662 + ins_encode %{ 1.1663 + bool vector256 = false; 1.1664 + __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.1665 + %} 1.1666 + ins_pipe( pipe_slow ); 1.1667 +%} 1.1668 + 1.1669 +instruct vsll8I_reg(vecY dst, vecY src, regF shift) %{ 1.1670 + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 1.1671 + match(Set dst (LShiftVI src shift)); 1.1672 + format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 1.1673 + ins_encode %{ 1.1674 + bool vector256 = true; 1.1675 + __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.1676 + %} 1.1677 + ins_pipe( pipe_slow ); 1.1678 +%} 1.1679 + 1.1680 +instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 1.1681 + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 1.1682 + match(Set dst (LShiftVI src shift)); 1.1683 + format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 1.1684 + ins_encode %{ 1.1685 + bool vector256 = true; 1.1686 + __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.1687 + %} 1.1688 + ins_pipe( pipe_slow ); 1.1689 +%} 1.1690 + 1.1691 +// Longs vector left shift 1.1692 +instruct vsll2L(vecX dst, regF shift) %{ 1.1693 + predicate(n->as_Vector()->length() == 2); 1.1694 + match(Set dst (LShiftVL dst shift)); 1.1695 + format %{ "psllq $dst,$shift\t! left shift packed2L" %} 1.1696 + ins_encode %{ 1.1697 + __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 1.1698 + %} 1.1699 + ins_pipe( pipe_slow ); 1.1700 +%} 1.1701 + 1.1702 +instruct vsll2L_imm(vecX dst, immI8 shift) %{ 1.1703 + predicate(n->as_Vector()->length() == 2); 1.1704 + match(Set dst (LShiftVL dst shift)); 1.1705 + format %{ "psllq $dst,$shift\t! left shift packed2L" %} 1.1706 + ins_encode %{ 1.1707 + __ psllq($dst$$XMMRegister, (int)$shift$$constant); 1.1708 + %} 1.1709 + ins_pipe( pipe_slow ); 1.1710 +%} 1.1711 + 1.1712 +instruct vsll2L_reg(vecX dst, vecX src, regF shift) %{ 1.1713 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.1714 + match(Set dst (LShiftVL src shift)); 1.1715 + format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 1.1716 + ins_encode %{ 1.1717 + bool vector256 = false; 1.1718 + __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.1719 + %} 1.1720 + ins_pipe( pipe_slow ); 1.1721 +%} 1.1722 + 1.1723 +instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 1.1724 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.1725 + match(Set dst (LShiftVL src shift)); 1.1726 + format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 1.1727 + ins_encode %{ 1.1728 + bool vector256 = false; 1.1729 + __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.1730 + %} 1.1731 + ins_pipe( pipe_slow ); 1.1732 +%} 1.1733 + 1.1734 +instruct vsll4L_reg(vecY dst, vecY src, regF shift) %{ 1.1735 + predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 1.1736 + match(Set dst (LShiftVL src shift)); 1.1737 + format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 1.1738 + ins_encode %{ 1.1739 + bool vector256 = true; 1.1740 + __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.1741 + %} 1.1742 + ins_pipe( pipe_slow ); 1.1743 +%} 1.1744 + 1.1745 +instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 1.1746 + predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 1.1747 + match(Set dst (LShiftVL src shift)); 1.1748 + format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 1.1749 + ins_encode %{ 1.1750 + bool vector256 = true; 1.1751 + __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.1752 + %} 1.1753 + ins_pipe( pipe_slow ); 1.1754 +%} 1.1755 + 1.1756 +// ----------------------- LogicalRightShift ----------------------------------- 1.1757 + 1.1758 +// Shorts/Chars vector logical right shift produces incorrect Java result 1.1759 +// for negative data because java code convert short value into int with 1.1760 +// sign extension before a shift. 1.1761 + 1.1762 +// Integers vector logical right shift 1.1763 +instruct vsrl2I(vecD dst, regF shift) %{ 1.1764 + predicate(n->as_Vector()->length() == 2); 1.1765 + match(Set dst (URShiftVI dst shift)); 1.1766 + format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 1.1767 + ins_encode %{ 1.1768 + __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 1.1769 + %} 1.1770 + ins_pipe( pipe_slow ); 1.1771 +%} 1.1772 + 1.1773 +instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 1.1774 + predicate(n->as_Vector()->length() == 2); 1.1775 + match(Set dst (URShiftVI dst shift)); 1.1776 + format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 1.1777 + ins_encode %{ 1.1778 + __ psrld($dst$$XMMRegister, (int)$shift$$constant); 1.1779 + %} 1.1780 + ins_pipe( pipe_slow ); 1.1781 +%} 1.1782 + 1.1783 +instruct vsrl2I_reg(vecD dst, vecD src, regF shift) %{ 1.1784 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.1785 + match(Set dst (URShiftVI src shift)); 1.1786 + format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 1.1787 + ins_encode %{ 1.1788 + bool vector256 = false; 1.1789 + __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.1790 + %} 1.1791 + ins_pipe( pipe_slow ); 1.1792 +%} 1.1793 + 1.1794 +instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 1.1795 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.1796 + match(Set dst (URShiftVI src shift)); 1.1797 + format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 1.1798 + ins_encode %{ 1.1799 + bool vector256 = false; 1.1800 + __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.1801 + %} 1.1802 + ins_pipe( pipe_slow ); 1.1803 +%} 1.1804 + 1.1805 +instruct vsrl4I(vecX dst, regF shift) %{ 1.1806 + predicate(n->as_Vector()->length() == 4); 1.1807 + match(Set dst (URShiftVI dst shift)); 1.1808 + format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 1.1809 + ins_encode %{ 1.1810 + __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 1.1811 + %} 1.1812 + ins_pipe( pipe_slow ); 1.1813 +%} 1.1814 + 1.1815 +instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 1.1816 + predicate(n->as_Vector()->length() == 4); 1.1817 + match(Set dst (URShiftVI dst shift)); 1.1818 + format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 1.1819 + ins_encode %{ 1.1820 + __ psrld($dst$$XMMRegister, (int)$shift$$constant); 1.1821 + %} 1.1822 + ins_pipe( pipe_slow ); 1.1823 +%} 1.1824 + 1.1825 +instruct vsrl4I_reg(vecX dst, vecX src, regF shift) %{ 1.1826 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.1827 + match(Set dst (URShiftVI src shift)); 1.1828 + format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 1.1829 + ins_encode %{ 1.1830 + bool vector256 = false; 1.1831 + __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.1832 + %} 1.1833 + ins_pipe( pipe_slow ); 1.1834 +%} 1.1835 + 1.1836 +instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 1.1837 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.1838 + match(Set dst (URShiftVI src shift)); 1.1839 + format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 1.1840 + ins_encode %{ 1.1841 + bool vector256 = false; 1.1842 + __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.1843 + %} 1.1844 + ins_pipe( pipe_slow ); 1.1845 +%} 1.1846 + 1.1847 +instruct vsrl8I_reg(vecY dst, vecY src, regF shift) %{ 1.1848 + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 1.1849 + match(Set dst (URShiftVI src shift)); 1.1850 + format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 1.1851 + ins_encode %{ 1.1852 + bool vector256 = true; 1.1853 + __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.1854 + %} 1.1855 + ins_pipe( pipe_slow ); 1.1856 +%} 1.1857 + 1.1858 +instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 1.1859 + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 1.1860 + match(Set dst (URShiftVI src shift)); 1.1861 + format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 1.1862 + ins_encode %{ 1.1863 + bool vector256 = true; 1.1864 + __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.1865 + %} 1.1866 + ins_pipe( pipe_slow ); 1.1867 +%} 1.1868 + 1.1869 +// Longs vector logical right shift 1.1870 +instruct vsrl2L(vecX dst, regF shift) %{ 1.1871 + predicate(n->as_Vector()->length() == 2); 1.1872 + match(Set dst (URShiftVL dst shift)); 1.1873 + format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 1.1874 + ins_encode %{ 1.1875 + __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 1.1876 + %} 1.1877 + ins_pipe( pipe_slow ); 1.1878 +%} 1.1879 + 1.1880 +instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 1.1881 + predicate(n->as_Vector()->length() == 2); 1.1882 + match(Set dst (URShiftVL dst shift)); 1.1883 + format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 1.1884 + ins_encode %{ 1.1885 + __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 1.1886 + %} 1.1887 + ins_pipe( pipe_slow ); 1.1888 +%} 1.1889 + 1.1890 +instruct vsrl2L_reg(vecX dst, vecX src, regF shift) %{ 1.1891 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.1892 + match(Set dst (URShiftVL src shift)); 1.1893 + format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 1.1894 + ins_encode %{ 1.1895 + bool vector256 = false; 1.1896 + __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.1897 + %} 1.1898 + ins_pipe( pipe_slow ); 1.1899 +%} 1.1900 + 1.1901 +instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 1.1902 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.1903 + match(Set dst (URShiftVL src shift)); 1.1904 + format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 1.1905 + ins_encode %{ 1.1906 + bool vector256 = false; 1.1907 + __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.1908 + %} 1.1909 + ins_pipe( pipe_slow ); 1.1910 +%} 1.1911 + 1.1912 +instruct vsrl4L_reg(vecY dst, vecY src, regF shift) %{ 1.1913 + predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 1.1914 + match(Set dst (URShiftVL src shift)); 1.1915 + format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 1.1916 + ins_encode %{ 1.1917 + bool vector256 = true; 1.1918 + __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.1919 + %} 1.1920 + ins_pipe( pipe_slow ); 1.1921 +%} 1.1922 + 1.1923 +instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 1.1924 + predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 1.1925 + match(Set dst (URShiftVL src shift)); 1.1926 + format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 1.1927 + ins_encode %{ 1.1928 + bool vector256 = true; 1.1929 + __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.1930 + %} 1.1931 + ins_pipe( pipe_slow ); 1.1932 +%} 1.1933 + 1.1934 +// ------------------- ArithmeticRightShift ----------------------------------- 1.1935 + 1.1936 +// Shorts/Chars vector arithmetic right shift 1.1937 +instruct vsra2S(vecS dst, regF shift) %{ 1.1938 + predicate(n->as_Vector()->length() == 2); 1.1939 + match(Set dst (RShiftVS dst shift)); 1.1940 + format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 1.1941 + ins_encode %{ 1.1942 + __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 1.1943 + %} 1.1944 + ins_pipe( pipe_slow ); 1.1945 +%} 1.1946 + 1.1947 +instruct vsra2S_imm(vecS dst, immI8 shift) %{ 1.1948 + predicate(n->as_Vector()->length() == 2); 1.1949 + match(Set dst (RShiftVS dst shift)); 1.1950 + format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 1.1951 + ins_encode %{ 1.1952 + __ psraw($dst$$XMMRegister, (int)$shift$$constant); 1.1953 + %} 1.1954 + ins_pipe( pipe_slow ); 1.1955 +%} 1.1956 + 1.1957 +instruct vsra2S_reg(vecS dst, vecS src, regF shift) %{ 1.1958 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.1959 + match(Set dst (RShiftVS src shift)); 1.1960 + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 1.1961 + ins_encode %{ 1.1962 + bool vector256 = false; 1.1963 + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.1964 + %} 1.1965 + ins_pipe( pipe_slow ); 1.1966 +%} 1.1967 + 1.1968 +instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 1.1969 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.1970 + match(Set dst (RShiftVS src shift)); 1.1971 + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 1.1972 + ins_encode %{ 1.1973 + bool vector256 = false; 1.1974 + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.1975 + %} 1.1976 + ins_pipe( pipe_slow ); 1.1977 +%} 1.1978 + 1.1979 +instruct vsra4S(vecD dst, regF shift) %{ 1.1980 + predicate(n->as_Vector()->length() == 4); 1.1981 + match(Set dst (RShiftVS dst shift)); 1.1982 + format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 1.1983 + ins_encode %{ 1.1984 + __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 1.1985 + %} 1.1986 + ins_pipe( pipe_slow ); 1.1987 +%} 1.1988 + 1.1989 +instruct vsra4S_imm(vecD dst, immI8 shift) %{ 1.1990 + predicate(n->as_Vector()->length() == 4); 1.1991 + match(Set dst (RShiftVS dst shift)); 1.1992 + format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 1.1993 + ins_encode %{ 1.1994 + __ psraw($dst$$XMMRegister, (int)$shift$$constant); 1.1995 + %} 1.1996 + ins_pipe( pipe_slow ); 1.1997 +%} 1.1998 + 1.1999 +instruct vsra4S_reg(vecD dst, vecD src, regF shift) %{ 1.2000 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.2001 + match(Set dst (RShiftVS src shift)); 1.2002 + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 1.2003 + ins_encode %{ 1.2004 + bool vector256 = false; 1.2005 + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.2006 + %} 1.2007 + ins_pipe( pipe_slow ); 1.2008 +%} 1.2009 + 1.2010 +instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 1.2011 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.2012 + match(Set dst (RShiftVS src shift)); 1.2013 + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 1.2014 + ins_encode %{ 1.2015 + bool vector256 = false; 1.2016 + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.2017 + %} 1.2018 + ins_pipe( pipe_slow ); 1.2019 +%} 1.2020 + 1.2021 +instruct vsra8S(vecX dst, regF shift) %{ 1.2022 + predicate(n->as_Vector()->length() == 8); 1.2023 + match(Set dst (RShiftVS dst shift)); 1.2024 + format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 1.2025 + ins_encode %{ 1.2026 + __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 1.2027 + %} 1.2028 + ins_pipe( pipe_slow ); 1.2029 +%} 1.2030 + 1.2031 +instruct vsra8S_imm(vecX dst, immI8 shift) %{ 1.2032 + predicate(n->as_Vector()->length() == 8); 1.2033 + match(Set dst (RShiftVS dst shift)); 1.2034 + format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 1.2035 + ins_encode %{ 1.2036 + __ psraw($dst$$XMMRegister, (int)$shift$$constant); 1.2037 + %} 1.2038 + ins_pipe( pipe_slow ); 1.2039 +%} 1.2040 + 1.2041 +instruct vsra8S_reg(vecX dst, vecX src, regF shift) %{ 1.2042 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.2043 + match(Set dst (RShiftVS src shift)); 1.2044 + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 1.2045 + ins_encode %{ 1.2046 + bool vector256 = false; 1.2047 + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.2048 + %} 1.2049 + ins_pipe( pipe_slow ); 1.2050 +%} 1.2051 + 1.2052 +instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 1.2053 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.2054 + match(Set dst (RShiftVS src shift)); 1.2055 + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 1.2056 + ins_encode %{ 1.2057 + bool vector256 = false; 1.2058 + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.2059 + %} 1.2060 + ins_pipe( pipe_slow ); 1.2061 +%} 1.2062 + 1.2063 +instruct vsra16S_reg(vecY dst, vecY src, regF shift) %{ 1.2064 + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 1.2065 + match(Set dst (RShiftVS src shift)); 1.2066 + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 1.2067 + ins_encode %{ 1.2068 + bool vector256 = true; 1.2069 + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.2070 + %} 1.2071 + ins_pipe( pipe_slow ); 1.2072 +%} 1.2073 + 1.2074 +instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 1.2075 + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 1.2076 + match(Set dst (RShiftVS src shift)); 1.2077 + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 1.2078 + ins_encode %{ 1.2079 + bool vector256 = true; 1.2080 + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.2081 + %} 1.2082 + ins_pipe( pipe_slow ); 1.2083 +%} 1.2084 + 1.2085 +// Integers vector arithmetic right shift 1.2086 +instruct vsra2I(vecD dst, regF shift) %{ 1.2087 + predicate(n->as_Vector()->length() == 2); 1.2088 + match(Set dst (RShiftVI dst shift)); 1.2089 + format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 1.2090 + ins_encode %{ 1.2091 + __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 1.2092 + %} 1.2093 + ins_pipe( pipe_slow ); 1.2094 +%} 1.2095 + 1.2096 +instruct vsra2I_imm(vecD dst, immI8 shift) %{ 1.2097 + predicate(n->as_Vector()->length() == 2); 1.2098 + match(Set dst (RShiftVI dst shift)); 1.2099 + format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 1.2100 + ins_encode %{ 1.2101 + __ psrad($dst$$XMMRegister, (int)$shift$$constant); 1.2102 + %} 1.2103 + ins_pipe( pipe_slow ); 1.2104 +%} 1.2105 + 1.2106 +instruct vsra2I_reg(vecD dst, vecD src, regF shift) %{ 1.2107 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.2108 + match(Set dst (RShiftVI src shift)); 1.2109 + format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 1.2110 + ins_encode %{ 1.2111 + bool vector256 = false; 1.2112 + __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.2113 + %} 1.2114 + ins_pipe( pipe_slow ); 1.2115 +%} 1.2116 + 1.2117 +instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 1.2118 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.2119 + match(Set dst (RShiftVI src shift)); 1.2120 + format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 1.2121 + ins_encode %{ 1.2122 + bool vector256 = false; 1.2123 + __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.2124 + %} 1.2125 + ins_pipe( pipe_slow ); 1.2126 +%} 1.2127 + 1.2128 +instruct vsra4I(vecX dst, regF shift) %{ 1.2129 + predicate(n->as_Vector()->length() == 4); 1.2130 + match(Set dst (RShiftVI dst shift)); 1.2131 + format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 1.2132 + ins_encode %{ 1.2133 + __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 1.2134 + %} 1.2135 + ins_pipe( pipe_slow ); 1.2136 +%} 1.2137 + 1.2138 +instruct vsra4I_imm(vecX dst, immI8 shift) %{ 1.2139 + predicate(n->as_Vector()->length() == 4); 1.2140 + match(Set dst (RShiftVI dst shift)); 1.2141 + format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 1.2142 + ins_encode %{ 1.2143 + __ psrad($dst$$XMMRegister, (int)$shift$$constant); 1.2144 + %} 1.2145 + ins_pipe( pipe_slow ); 1.2146 +%} 1.2147 + 1.2148 +instruct vsra4I_reg(vecX dst, vecX src, regF shift) %{ 1.2149 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.2150 + match(Set dst (RShiftVI src shift)); 1.2151 + format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 1.2152 + ins_encode %{ 1.2153 + bool vector256 = false; 1.2154 + __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.2155 + %} 1.2156 + ins_pipe( pipe_slow ); 1.2157 +%} 1.2158 + 1.2159 +instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 1.2160 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.2161 + match(Set dst (RShiftVI src shift)); 1.2162 + format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 1.2163 + ins_encode %{ 1.2164 + bool vector256 = false; 1.2165 + __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.2166 + %} 1.2167 + ins_pipe( pipe_slow ); 1.2168 +%} 1.2169 + 1.2170 +instruct vsra8I_reg(vecY dst, vecY src, regF shift) %{ 1.2171 + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 1.2172 + match(Set dst (RShiftVI src shift)); 1.2173 + format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 1.2174 + ins_encode %{ 1.2175 + bool vector256 = true; 1.2176 + __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.2177 + %} 1.2178 + ins_pipe( pipe_slow ); 1.2179 +%} 1.2180 + 1.2181 +instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 1.2182 + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 1.2183 + match(Set dst (RShiftVI src shift)); 1.2184 + format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 1.2185 + ins_encode %{ 1.2186 + bool vector256 = true; 1.2187 + __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.2188 + %} 1.2189 + ins_pipe( pipe_slow ); 1.2190 +%} 1.2191 + 1.2192 +// There are no longs vector arithmetic right shift instructions. 1.2193 + 1.2194 + 1.2195 +// --------------------------------- AND -------------------------------------- 1.2196 + 1.2197 +instruct vand4B(vecS dst, vecS src) %{ 1.2198 + predicate(n->as_Vector()->length_in_bytes() == 4); 1.2199 + match(Set dst (AndV dst src)); 1.2200 + format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 1.2201 + ins_encode %{ 1.2202 + __ pand($dst$$XMMRegister, $src$$XMMRegister); 1.2203 + %} 1.2204 + ins_pipe( pipe_slow ); 1.2205 +%} 1.2206 + 1.2207 +instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 1.2208 + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 1.2209 + match(Set dst (AndV src1 src2)); 1.2210 + format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 1.2211 + ins_encode %{ 1.2212 + bool vector256 = false; 1.2213 + __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.2214 + %} 1.2215 + ins_pipe( pipe_slow ); 1.2216 +%} 1.2217 + 1.2218 +instruct vand8B(vecD dst, vecD src) %{ 1.2219 + predicate(n->as_Vector()->length_in_bytes() == 8); 1.2220 + match(Set dst (AndV dst src)); 1.2221 + format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 1.2222 + ins_encode %{ 1.2223 + __ pand($dst$$XMMRegister, $src$$XMMRegister); 1.2224 + %} 1.2225 + ins_pipe( pipe_slow ); 1.2226 +%} 1.2227 + 1.2228 +instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 1.2229 + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 1.2230 + match(Set dst (AndV src1 src2)); 1.2231 + format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 1.2232 + ins_encode %{ 1.2233 + bool vector256 = false; 1.2234 + __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.2235 + %} 1.2236 + ins_pipe( pipe_slow ); 1.2237 +%} 1.2238 + 1.2239 +instruct vand16B(vecX dst, vecX src) %{ 1.2240 + predicate(n->as_Vector()->length_in_bytes() == 16); 1.2241 + match(Set dst (AndV dst src)); 1.2242 + format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 1.2243 + ins_encode %{ 1.2244 + __ pand($dst$$XMMRegister, $src$$XMMRegister); 1.2245 + %} 1.2246 + ins_pipe( pipe_slow ); 1.2247 +%} 1.2248 + 1.2249 +instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 1.2250 + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 1.2251 + match(Set dst (AndV src1 src2)); 1.2252 + format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 1.2253 + ins_encode %{ 1.2254 + bool vector256 = false; 1.2255 + __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.2256 + %} 1.2257 + ins_pipe( pipe_slow ); 1.2258 +%} 1.2259 + 1.2260 +instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 1.2261 + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 1.2262 + match(Set dst (AndV src (LoadVector mem))); 1.2263 + format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 1.2264 + ins_encode %{ 1.2265 + bool vector256 = false; 1.2266 + __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.2267 + %} 1.2268 + ins_pipe( pipe_slow ); 1.2269 +%} 1.2270 + 1.2271 +instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 1.2272 + predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 1.2273 + match(Set dst (AndV src1 src2)); 1.2274 + format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 1.2275 + ins_encode %{ 1.2276 + bool vector256 = true; 1.2277 + __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.2278 + %} 1.2279 + ins_pipe( pipe_slow ); 1.2280 +%} 1.2281 + 1.2282 +instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 1.2283 + predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 1.2284 + match(Set dst (AndV src (LoadVector mem))); 1.2285 + format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 1.2286 + ins_encode %{ 1.2287 + bool vector256 = true; 1.2288 + __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.2289 + %} 1.2290 + ins_pipe( pipe_slow ); 1.2291 +%} 1.2292 + 1.2293 +// --------------------------------- OR --------------------------------------- 1.2294 + 1.2295 +instruct vor4B(vecS dst, vecS src) %{ 1.2296 + predicate(n->as_Vector()->length_in_bytes() == 4); 1.2297 + match(Set dst (OrV dst src)); 1.2298 + format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 1.2299 + ins_encode %{ 1.2300 + __ por($dst$$XMMRegister, $src$$XMMRegister); 1.2301 + %} 1.2302 + ins_pipe( pipe_slow ); 1.2303 +%} 1.2304 + 1.2305 +instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 1.2306 + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 1.2307 + match(Set dst (OrV src1 src2)); 1.2308 + format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 1.2309 + ins_encode %{ 1.2310 + bool vector256 = false; 1.2311 + __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.2312 + %} 1.2313 + ins_pipe( pipe_slow ); 1.2314 +%} 1.2315 + 1.2316 +instruct vor8B(vecD dst, vecD src) %{ 1.2317 + predicate(n->as_Vector()->length_in_bytes() == 8); 1.2318 + match(Set dst (OrV dst src)); 1.2319 + format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 1.2320 + ins_encode %{ 1.2321 + __ por($dst$$XMMRegister, $src$$XMMRegister); 1.2322 + %} 1.2323 + ins_pipe( pipe_slow ); 1.2324 +%} 1.2325 + 1.2326 +instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 1.2327 + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 1.2328 + match(Set dst (OrV src1 src2)); 1.2329 + format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 1.2330 + ins_encode %{ 1.2331 + bool vector256 = false; 1.2332 + __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.2333 + %} 1.2334 + ins_pipe( pipe_slow ); 1.2335 +%} 1.2336 + 1.2337 +instruct vor16B(vecX dst, vecX src) %{ 1.2338 + predicate(n->as_Vector()->length_in_bytes() == 16); 1.2339 + match(Set dst (OrV dst src)); 1.2340 + format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 1.2341 + ins_encode %{ 1.2342 + __ por($dst$$XMMRegister, $src$$XMMRegister); 1.2343 + %} 1.2344 + ins_pipe( pipe_slow ); 1.2345 +%} 1.2346 + 1.2347 +instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 1.2348 + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 1.2349 + match(Set dst (OrV src1 src2)); 1.2350 + format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 1.2351 + ins_encode %{ 1.2352 + bool vector256 = false; 1.2353 + __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.2354 + %} 1.2355 + ins_pipe( pipe_slow ); 1.2356 +%} 1.2357 + 1.2358 +instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 1.2359 + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 1.2360 + match(Set dst (OrV src (LoadVector mem))); 1.2361 + format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 1.2362 + ins_encode %{ 1.2363 + bool vector256 = false; 1.2364 + __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.2365 + %} 1.2366 + ins_pipe( pipe_slow ); 1.2367 +%} 1.2368 + 1.2369 +instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 1.2370 + predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 1.2371 + match(Set dst (OrV src1 src2)); 1.2372 + format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 1.2373 + ins_encode %{ 1.2374 + bool vector256 = true; 1.2375 + __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.2376 + %} 1.2377 + ins_pipe( pipe_slow ); 1.2378 +%} 1.2379 + 1.2380 +instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 1.2381 + predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 1.2382 + match(Set dst (OrV src (LoadVector mem))); 1.2383 + format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 1.2384 + ins_encode %{ 1.2385 + bool vector256 = true; 1.2386 + __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.2387 + %} 1.2388 + ins_pipe( pipe_slow ); 1.2389 +%} 1.2390 + 1.2391 +// --------------------------------- XOR -------------------------------------- 1.2392 + 1.2393 +instruct vxor4B(vecS dst, vecS src) %{ 1.2394 + predicate(n->as_Vector()->length_in_bytes() == 4); 1.2395 + match(Set dst (XorV dst src)); 1.2396 + format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 1.2397 + ins_encode %{ 1.2398 + __ pxor($dst$$XMMRegister, $src$$XMMRegister); 1.2399 + %} 1.2400 + ins_pipe( pipe_slow ); 1.2401 +%} 1.2402 + 1.2403 +instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 1.2404 + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 1.2405 + match(Set dst (XorV src1 src2)); 1.2406 + format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 1.2407 + ins_encode %{ 1.2408 + bool vector256 = false; 1.2409 + __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.2410 + %} 1.2411 + ins_pipe( pipe_slow ); 1.2412 +%} 1.2413 + 1.2414 +instruct vxor8B(vecD dst, vecD src) %{ 1.2415 + predicate(n->as_Vector()->length_in_bytes() == 8); 1.2416 + match(Set dst (XorV dst src)); 1.2417 + format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 1.2418 + ins_encode %{ 1.2419 + __ pxor($dst$$XMMRegister, $src$$XMMRegister); 1.2420 + %} 1.2421 + ins_pipe( pipe_slow ); 1.2422 +%} 1.2423 + 1.2424 +instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 1.2425 + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 1.2426 + match(Set dst (XorV src1 src2)); 1.2427 + format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 1.2428 + ins_encode %{ 1.2429 + bool vector256 = false; 1.2430 + __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.2431 + %} 1.2432 + ins_pipe( pipe_slow ); 1.2433 +%} 1.2434 + 1.2435 +instruct vxor16B(vecX dst, vecX src) %{ 1.2436 + predicate(n->as_Vector()->length_in_bytes() == 16); 1.2437 + match(Set dst (XorV dst src)); 1.2438 + format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 1.2439 + ins_encode %{ 1.2440 + __ pxor($dst$$XMMRegister, $src$$XMMRegister); 1.2441 + %} 1.2442 + ins_pipe( pipe_slow ); 1.2443 +%} 1.2444 + 1.2445 +instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 1.2446 + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 1.2447 + match(Set dst (XorV src1 src2)); 1.2448 + format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 1.2449 + ins_encode %{ 1.2450 + bool vector256 = false; 1.2451 + __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.2452 + %} 1.2453 + ins_pipe( pipe_slow ); 1.2454 +%} 1.2455 + 1.2456 +instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 1.2457 + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 1.2458 + match(Set dst (XorV src (LoadVector mem))); 1.2459 + format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 1.2460 + ins_encode %{ 1.2461 + bool vector256 = false; 1.2462 + __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.2463 + %} 1.2464 + ins_pipe( pipe_slow ); 1.2465 +%} 1.2466 + 1.2467 +instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 1.2468 + predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 1.2469 + match(Set dst (XorV src1 src2)); 1.2470 + format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 1.2471 + ins_encode %{ 1.2472 + bool vector256 = true; 1.2473 + __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.2474 + %} 1.2475 + ins_pipe( pipe_slow ); 1.2476 +%} 1.2477 + 1.2478 +instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 1.2479 + predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 1.2480 + match(Set dst (XorV src (LoadVector mem))); 1.2481 + format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 1.2482 + ins_encode %{ 1.2483 + bool vector256 = true; 1.2484 + __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.2485 + %} 1.2486 + ins_pipe( pipe_slow ); 1.2487 +%} 1.2488 +