1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/cpu/x86/vm/x86.ad Wed Apr 27 01:25:04 2016 +0800 1.3 @@ -0,0 +1,5108 @@ 1.4 +// 1.5 +// Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved. 1.6 +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 +// 1.8 +// This code is free software; you can redistribute it and/or modify it 1.9 +// under the terms of the GNU General Public License version 2 only, as 1.10 +// published by the Free Software Foundation. 1.11 +// 1.12 +// This code is distributed in the hope that it will be useful, but WITHOUT 1.13 +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.14 +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.15 +// version 2 for more details (a copy is included in the LICENSE file that 1.16 +// accompanied this code). 1.17 +// 1.18 +// You should have received a copy of the GNU General Public License version 1.19 +// 2 along with this work; if not, write to the Free Software Foundation, 1.20 +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.21 +// 1.22 +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 1.23 +// or visit www.oracle.com if you need additional information or have any 1.24 +// questions. 1.25 +// 1.26 +// 1.27 + 1.28 +// X86 Common Architecture Description File 1.29 + 1.30 +//----------REGISTER DEFINITION BLOCK------------------------------------------ 1.31 +// This information is used by the matcher and the register allocator to 1.32 +// describe individual registers and classes of registers within the target 1.33 +// archtecture. 1.34 + 1.35 +register %{ 1.36 +//----------Architecture Description Register Definitions---------------------- 1.37 +// General Registers 1.38 +// "reg_def" name ( register save type, C convention save type, 1.39 +// ideal register type, encoding ); 1.40 +// Register Save Types: 1.41 +// 1.42 +// NS = No-Save: The register allocator assumes that these registers 1.43 +// can be used without saving upon entry to the method, & 1.44 +// that they do not need to be saved at call sites. 1.45 +// 1.46 +// SOC = Save-On-Call: The register allocator assumes that these registers 1.47 +// can be used without saving upon entry to the method, 1.48 +// but that they must be saved at call sites. 1.49 +// 1.50 +// SOE = Save-On-Entry: The register allocator assumes that these registers 1.51 +// must be saved before using them upon entry to the 1.52 +// method, but they do not need to be saved at call 1.53 +// sites. 1.54 +// 1.55 +// AS = Always-Save: The register allocator assumes that these registers 1.56 +// must be saved before using them upon entry to the 1.57 +// method, & that they must be saved at call sites. 1.58 +// 1.59 +// Ideal Register Type is used to determine how to save & restore a 1.60 +// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 1.61 +// spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 1.62 +// 1.63 +// The encoding number is the actual bit-pattern placed into the opcodes. 1.64 + 1.65 +// XMM registers. 256-bit registers or 8 words each, labeled (a)-h. 1.66 +// Word a in each register holds a Float, words ab hold a Double. 1.67 +// The whole registers are used in SSE4.2 version intrinsics, 1.68 +// array copy stubs and superword operations (see UseSSE42Intrinsics, 1.69 +// UseXMMForArrayCopy and UseSuperword flags). 1.70 +// XMM8-XMM15 must be encoded with REX (VEX for UseAVX). 1.71 +// Linux ABI: No register preserved across function calls 1.72 +// XMM0-XMM7 might hold parameters 1.73 +// Windows ABI: XMM6-XMM15 preserved across function calls 1.74 +// XMM0-XMM3 might hold parameters 1.75 + 1.76 +reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 1.77 +reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 1.78 +reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 1.79 +reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 1.80 +reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 1.81 +reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 1.82 +reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 1.83 +reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 1.84 + 1.85 +reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 1.86 +reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 1.87 +reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 1.88 +reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 1.89 +reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 1.90 +reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 1.91 +reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 1.92 +reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 1.93 + 1.94 +reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 1.95 +reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 1.96 +reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 1.97 +reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 1.98 +reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 1.99 +reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 1.100 +reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 1.101 +reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 1.102 + 1.103 +reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 1.104 +reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 1.105 +reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 1.106 +reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 1.107 +reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 1.108 +reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 1.109 +reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 1.110 +reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 1.111 + 1.112 +reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 1.113 +reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 1.114 +reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 1.115 +reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 1.116 +reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 1.117 +reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 1.118 +reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 1.119 +reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 1.120 + 1.121 +reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 1.122 +reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 1.123 +reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 1.124 +reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 1.125 +reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 1.126 +reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 1.127 +reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 1.128 +reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 1.129 + 1.130 +#ifdef _WIN64 1.131 + 1.132 +reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); 1.133 +reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 1.134 +reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 1.135 +reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 1.136 +reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 1.137 +reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 1.138 +reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 1.139 +reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 1.140 + 1.141 +reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()); 1.142 +reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 1.143 +reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 1.144 +reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 1.145 +reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 1.146 +reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 1.147 +reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 1.148 +reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 1.149 + 1.150 +reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()); 1.151 +reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 1.152 +reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 1.153 +reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 1.154 +reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 1.155 +reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 1.156 +reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 1.157 +reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 1.158 + 1.159 +reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()); 1.160 +reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 1.161 +reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 1.162 +reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 1.163 +reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 1.164 +reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 1.165 +reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 1.166 +reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 1.167 + 1.168 +reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()); 1.169 +reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 1.170 +reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 1.171 +reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 1.172 +reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 1.173 +reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 1.174 +reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 1.175 +reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 1.176 + 1.177 +reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()); 1.178 +reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 1.179 +reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 1.180 +reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 1.181 +reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 1.182 +reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 1.183 +reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 1.184 +reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 1.185 + 1.186 +reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()); 1.187 +reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 1.188 +reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 1.189 +reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 1.190 +reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 1.191 +reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 1.192 +reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 1.193 +reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 1.194 + 1.195 +reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()); 1.196 +reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 1.197 +reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 1.198 +reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 1.199 +reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 1.200 +reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 1.201 +reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 1.202 +reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 1.203 + 1.204 +reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()); 1.205 +reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 1.206 +reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 1.207 +reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 1.208 +reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 1.209 +reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 1.210 +reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 1.211 +reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 1.212 + 1.213 +reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()); 1.214 +reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 1.215 +reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 1.216 +reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 1.217 +reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 1.218 +reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 1.219 +reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 1.220 +reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 1.221 + 1.222 +#else // _WIN64 1.223 + 1.224 +reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 1.225 +reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 1.226 +reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 1.227 +reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 1.228 +reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 1.229 +reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 1.230 +reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 1.231 +reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 1.232 + 1.233 +reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 1.234 +reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 1.235 +reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 1.236 +reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 1.237 +reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 1.238 +reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 1.239 +reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 1.240 +reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 1.241 + 1.242 +#ifdef _LP64 1.243 + 1.244 +reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 1.245 +reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 1.246 +reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 1.247 +reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 1.248 +reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 1.249 +reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 1.250 +reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 1.251 +reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 1.252 + 1.253 +reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 1.254 +reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 1.255 +reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 1.256 +reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 1.257 +reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 1.258 +reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 1.259 +reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 1.260 +reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 1.261 + 1.262 +reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 1.263 +reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 1.264 +reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 1.265 +reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 1.266 +reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 1.267 +reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 1.268 +reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 1.269 +reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 1.270 + 1.271 +reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 1.272 +reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 1.273 +reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 1.274 +reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 1.275 +reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 1.276 +reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 1.277 +reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 1.278 +reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 1.279 + 1.280 +reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 1.281 +reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 1.282 +reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 1.283 +reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 1.284 +reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 1.285 +reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 1.286 +reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 1.287 +reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 1.288 + 1.289 +reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 1.290 +reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 1.291 +reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 1.292 +reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 1.293 +reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 1.294 +reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 1.295 +reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 1.296 +reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 1.297 + 1.298 +reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 1.299 +reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 1.300 +reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 1.301 +reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 1.302 +reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 1.303 +reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 1.304 +reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 1.305 +reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 1.306 + 1.307 +reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 1.308 +reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 1.309 +reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 1.310 +reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 1.311 +reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 1.312 +reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 1.313 +reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 1.314 +reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 1.315 + 1.316 +#endif // _LP64 1.317 + 1.318 +#endif // _WIN64 1.319 + 1.320 +#ifdef _LP64 1.321 +reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 1.322 +#else 1.323 +reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 1.324 +#endif // _LP64 1.325 + 1.326 +alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1.327 + XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1.328 + XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1.329 + XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1.330 + XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1.331 + XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1.332 + XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1.333 + XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1.334 +#ifdef _LP64 1.335 + ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1.336 + XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1.337 + XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1.338 + XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1.339 + XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1.340 + XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1.341 + XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1.342 + XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 1.343 +#endif 1.344 + ); 1.345 + 1.346 +// flags allocation class should be last. 1.347 +alloc_class chunk2(RFLAGS); 1.348 + 1.349 +// Singleton class for condition codes 1.350 +reg_class int_flags(RFLAGS); 1.351 + 1.352 +// Class for all float registers 1.353 +reg_class float_reg(XMM0, 1.354 + XMM1, 1.355 + XMM2, 1.356 + XMM3, 1.357 + XMM4, 1.358 + XMM5, 1.359 + XMM6, 1.360 + XMM7 1.361 +#ifdef _LP64 1.362 + ,XMM8, 1.363 + XMM9, 1.364 + XMM10, 1.365 + XMM11, 1.366 + XMM12, 1.367 + XMM13, 1.368 + XMM14, 1.369 + XMM15 1.370 +#endif 1.371 + ); 1.372 + 1.373 +// Class for all double registers 1.374 +reg_class double_reg(XMM0, XMM0b, 1.375 + XMM1, XMM1b, 1.376 + XMM2, XMM2b, 1.377 + XMM3, XMM3b, 1.378 + XMM4, XMM4b, 1.379 + XMM5, XMM5b, 1.380 + XMM6, XMM6b, 1.381 + XMM7, XMM7b 1.382 +#ifdef _LP64 1.383 + ,XMM8, XMM8b, 1.384 + XMM9, XMM9b, 1.385 + XMM10, XMM10b, 1.386 + XMM11, XMM11b, 1.387 + XMM12, XMM12b, 1.388 + XMM13, XMM13b, 1.389 + XMM14, XMM14b, 1.390 + XMM15, XMM15b 1.391 +#endif 1.392 + ); 1.393 + 1.394 +// Class for all 32bit vector registers 1.395 +reg_class vectors_reg(XMM0, 1.396 + XMM1, 1.397 + XMM2, 1.398 + XMM3, 1.399 + XMM4, 1.400 + XMM5, 1.401 + XMM6, 1.402 + XMM7 1.403 +#ifdef _LP64 1.404 + ,XMM8, 1.405 + XMM9, 1.406 + XMM10, 1.407 + XMM11, 1.408 + XMM12, 1.409 + XMM13, 1.410 + XMM14, 1.411 + XMM15 1.412 +#endif 1.413 + ); 1.414 + 1.415 +// Class for all 64bit vector registers 1.416 +reg_class vectord_reg(XMM0, XMM0b, 1.417 + XMM1, XMM1b, 1.418 + XMM2, XMM2b, 1.419 + XMM3, XMM3b, 1.420 + XMM4, XMM4b, 1.421 + XMM5, XMM5b, 1.422 + XMM6, XMM6b, 1.423 + XMM7, XMM7b 1.424 +#ifdef _LP64 1.425 + ,XMM8, XMM8b, 1.426 + XMM9, XMM9b, 1.427 + XMM10, XMM10b, 1.428 + XMM11, XMM11b, 1.429 + XMM12, XMM12b, 1.430 + XMM13, XMM13b, 1.431 + XMM14, XMM14b, 1.432 + XMM15, XMM15b 1.433 +#endif 1.434 + ); 1.435 + 1.436 +// Class for all 128bit vector registers 1.437 +reg_class vectorx_reg(XMM0, XMM0b, XMM0c, XMM0d, 1.438 + XMM1, XMM1b, XMM1c, XMM1d, 1.439 + XMM2, XMM2b, XMM2c, XMM2d, 1.440 + XMM3, XMM3b, XMM3c, XMM3d, 1.441 + XMM4, XMM4b, XMM4c, XMM4d, 1.442 + XMM5, XMM5b, XMM5c, XMM5d, 1.443 + XMM6, XMM6b, XMM6c, XMM6d, 1.444 + XMM7, XMM7b, XMM7c, XMM7d 1.445 +#ifdef _LP64 1.446 + ,XMM8, XMM8b, XMM8c, XMM8d, 1.447 + XMM9, XMM9b, XMM9c, XMM9d, 1.448 + XMM10, XMM10b, XMM10c, XMM10d, 1.449 + XMM11, XMM11b, XMM11c, XMM11d, 1.450 + XMM12, XMM12b, XMM12c, XMM12d, 1.451 + XMM13, XMM13b, XMM13c, XMM13d, 1.452 + XMM14, XMM14b, XMM14c, XMM14d, 1.453 + XMM15, XMM15b, XMM15c, XMM15d 1.454 +#endif 1.455 + ); 1.456 + 1.457 +// Class for all 256bit vector registers 1.458 +reg_class vectory_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1.459 + XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1.460 + XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1.461 + XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1.462 + XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1.463 + XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1.464 + XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1.465 + XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1.466 +#ifdef _LP64 1.467 + ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1.468 + XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1.469 + XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1.470 + XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1.471 + XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1.472 + XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1.473 + XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1.474 + XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 1.475 +#endif 1.476 + ); 1.477 + 1.478 +%} 1.479 + 1.480 + 1.481 +//----------SOURCE BLOCK------------------------------------------------------- 1.482 +// This is a block of C++ code which provides values, functions, and 1.483 +// definitions necessary in the rest of the architecture description 1.484 + 1.485 +source_hpp %{ 1.486 +// Header information of the source block. 1.487 +// Method declarations/definitions which are used outside 1.488 +// the ad-scope can conveniently be defined here. 1.489 +// 1.490 +// To keep related declarations/definitions/uses close together, 1.491 +// we switch between source %{ }% and source_hpp %{ }% freely as needed. 1.492 + 1.493 +class CallStubImpl { 1.494 + 1.495 + //-------------------------------------------------------------- 1.496 + //---< Used for optimization in Compile::shorten_branches >--- 1.497 + //-------------------------------------------------------------- 1.498 + 1.499 + public: 1.500 + // Size of call trampoline stub. 1.501 + static uint size_call_trampoline() { 1.502 + return 0; // no call trampolines on this platform 1.503 + } 1.504 + 1.505 + // number of relocations needed by a call trampoline stub 1.506 + static uint reloc_call_trampoline() { 1.507 + return 0; // no call trampolines on this platform 1.508 + } 1.509 +}; 1.510 + 1.511 +class HandlerImpl { 1.512 + 1.513 + public: 1.514 + 1.515 + static int emit_exception_handler(CodeBuffer &cbuf); 1.516 + static int emit_deopt_handler(CodeBuffer& cbuf); 1.517 + 1.518 + static uint size_exception_handler() { 1.519 + // NativeCall instruction size is the same as NativeJump. 1.520 + // exception handler starts out as jump and can be patched to 1.521 + // a call be deoptimization. (4932387) 1.522 + // Note that this value is also credited (in output.cpp) to 1.523 + // the size of the code section. 1.524 + return NativeJump::instruction_size; 1.525 + } 1.526 + 1.527 +#ifdef _LP64 1.528 + static uint size_deopt_handler() { 1.529 + // three 5 byte instructions 1.530 + return 15; 1.531 + } 1.532 +#else 1.533 + static uint size_deopt_handler() { 1.534 + // NativeCall instruction size is the same as NativeJump. 1.535 + // exception handler starts out as jump and can be patched to 1.536 + // a call be deoptimization. (4932387) 1.537 + // Note that this value is also credited (in output.cpp) to 1.538 + // the size of the code section. 1.539 + return 5 + NativeJump::instruction_size; // pushl(); jmp; 1.540 + } 1.541 +#endif 1.542 +}; 1.543 + 1.544 +%} // end source_hpp 1.545 + 1.546 +source %{ 1.547 + 1.548 +// Emit exception handler code. 1.549 +// Stuff framesize into a register and call a VM stub routine. 1.550 +int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1.551 + 1.552 + // Note that the code buffer's insts_mark is always relative to insts. 1.553 + // That's why we must use the macroassembler to generate a handler. 1.554 + MacroAssembler _masm(&cbuf); 1.555 + address base = __ start_a_stub(size_exception_handler()); 1.556 + if (base == NULL) return 0; // CodeBuffer::expand failed 1.557 + int offset = __ offset(); 1.558 + __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1.559 + assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1.560 + __ end_a_stub(); 1.561 + return offset; 1.562 +} 1.563 + 1.564 +// Emit deopt handler code. 1.565 +int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1.566 + 1.567 + // Note that the code buffer's insts_mark is always relative to insts. 1.568 + // That's why we must use the macroassembler to generate a handler. 1.569 + MacroAssembler _masm(&cbuf); 1.570 + address base = __ start_a_stub(size_deopt_handler()); 1.571 + if (base == NULL) return 0; // CodeBuffer::expand failed 1.572 + int offset = __ offset(); 1.573 + 1.574 +#ifdef _LP64 1.575 + address the_pc = (address) __ pc(); 1.576 + Label next; 1.577 + // push a "the_pc" on the stack without destroying any registers 1.578 + // as they all may be live. 1.579 + 1.580 + // push address of "next" 1.581 + __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1.582 + __ bind(next); 1.583 + // adjust it so it matches "the_pc" 1.584 + __ subptr(Address(rsp, 0), __ offset() - offset); 1.585 +#else 1.586 + InternalAddress here(__ pc()); 1.587 + __ pushptr(here.addr()); 1.588 +#endif 1.589 + 1.590 + __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1.591 + assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); 1.592 + __ end_a_stub(); 1.593 + return offset; 1.594 +} 1.595 + 1.596 + 1.597 +//============================================================================= 1.598 + 1.599 + // Float masks come from different places depending on platform. 1.600 +#ifdef _LP64 1.601 + static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1.602 + static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1.603 + static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1.604 + static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1.605 +#else 1.606 + static address float_signmask() { return (address)float_signmask_pool; } 1.607 + static address float_signflip() { return (address)float_signflip_pool; } 1.608 + static address double_signmask() { return (address)double_signmask_pool; } 1.609 + static address double_signflip() { return (address)double_signflip_pool; } 1.610 +#endif 1.611 + 1.612 + 1.613 +const bool Matcher::match_rule_supported(int opcode) { 1.614 + if (!has_match_rule(opcode)) 1.615 + return false; 1.616 + 1.617 + switch (opcode) { 1.618 + case Op_PopCountI: 1.619 + case Op_PopCountL: 1.620 + if (!UsePopCountInstruction) 1.621 + return false; 1.622 + break; 1.623 + case Op_MulVI: 1.624 + if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1.625 + return false; 1.626 + break; 1.627 + case Op_CompareAndSwapL: 1.628 +#ifdef _LP64 1.629 + case Op_CompareAndSwapP: 1.630 +#endif 1.631 + if (!VM_Version::supports_cx8()) 1.632 + return false; 1.633 + break; 1.634 + } 1.635 + 1.636 + return true; // Per default match rules are supported. 1.637 +} 1.638 + 1.639 +// Max vector size in bytes. 0 if not supported. 1.640 +const int Matcher::vector_width_in_bytes(BasicType bt) { 1.641 + assert(is_java_primitive(bt), "only primitive type vectors"); 1.642 + if (UseSSE < 2) return 0; 1.643 + // SSE2 supports 128bit vectors for all types. 1.644 + // AVX2 supports 256bit vectors for all types. 1.645 + int size = (UseAVX > 1) ? 32 : 16; 1.646 + // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1.647 + if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1.648 + size = 32; 1.649 + // Use flag to limit vector size. 1.650 + size = MIN2(size,(int)MaxVectorSize); 1.651 + // Minimum 2 values in vector (or 4 for bytes). 1.652 + switch (bt) { 1.653 + case T_DOUBLE: 1.654 + case T_LONG: 1.655 + if (size < 16) return 0; 1.656 + case T_FLOAT: 1.657 + case T_INT: 1.658 + if (size < 8) return 0; 1.659 + case T_BOOLEAN: 1.660 + case T_BYTE: 1.661 + case T_CHAR: 1.662 + case T_SHORT: 1.663 + if (size < 4) return 0; 1.664 + break; 1.665 + default: 1.666 + ShouldNotReachHere(); 1.667 + } 1.668 + return size; 1.669 +} 1.670 + 1.671 +// Limits on vector size (number of elements) loaded into vector. 1.672 +const int Matcher::max_vector_size(const BasicType bt) { 1.673 + return vector_width_in_bytes(bt)/type2aelembytes(bt); 1.674 +} 1.675 +const int Matcher::min_vector_size(const BasicType bt) { 1.676 + int max_size = max_vector_size(bt); 1.677 + // Min size which can be loaded into vector is 4 bytes. 1.678 + int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1.679 + return MIN2(size,max_size); 1.680 +} 1.681 + 1.682 +// Vector ideal reg corresponding to specidied size in bytes 1.683 +const int Matcher::vector_ideal_reg(int size) { 1.684 + assert(MaxVectorSize >= size, ""); 1.685 + switch(size) { 1.686 + case 4: return Op_VecS; 1.687 + case 8: return Op_VecD; 1.688 + case 16: return Op_VecX; 1.689 + case 32: return Op_VecY; 1.690 + } 1.691 + ShouldNotReachHere(); 1.692 + return 0; 1.693 +} 1.694 + 1.695 +// Only lowest bits of xmm reg are used for vector shift count. 1.696 +const int Matcher::vector_shift_count_ideal_reg(int size) { 1.697 + return Op_VecS; 1.698 +} 1.699 + 1.700 +// x86 supports misaligned vectors store/load. 1.701 +const bool Matcher::misaligned_vectors_ok() { 1.702 + return !AlignVector; // can be changed by flag 1.703 +} 1.704 + 1.705 +// x86 AES instructions are compatible with SunJCE expanded 1.706 +// keys, hence we do not need to pass the original key to stubs 1.707 +const bool Matcher::pass_original_key_for_aes() { 1.708 + return false; 1.709 +} 1.710 + 1.711 +// Helper methods for MachSpillCopyNode::implementation(). 1.712 +static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1.713 + int src_hi, int dst_hi, uint ireg, outputStream* st) { 1.714 + // In 64-bit VM size calculation is very complex. Emitting instructions 1.715 + // into scratch buffer is used to get size in 64-bit VM. 1.716 + LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1.717 + assert(ireg == Op_VecS || // 32bit vector 1.718 + (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1.719 + (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1.720 + "no non-adjacent vector moves" ); 1.721 + if (cbuf) { 1.722 + MacroAssembler _masm(cbuf); 1.723 + int offset = __ offset(); 1.724 + switch (ireg) { 1.725 + case Op_VecS: // copy whole register 1.726 + case Op_VecD: 1.727 + case Op_VecX: 1.728 + __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1.729 + break; 1.730 + case Op_VecY: 1.731 + __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1.732 + break; 1.733 + default: 1.734 + ShouldNotReachHere(); 1.735 + } 1.736 + int size = __ offset() - offset; 1.737 +#ifdef ASSERT 1.738 + // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1.739 + assert(!do_size || size == 4, "incorrect size calculattion"); 1.740 +#endif 1.741 + return size; 1.742 +#ifndef PRODUCT 1.743 + } else if (!do_size) { 1.744 + switch (ireg) { 1.745 + case Op_VecS: 1.746 + case Op_VecD: 1.747 + case Op_VecX: 1.748 + st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1.749 + break; 1.750 + case Op_VecY: 1.751 + st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1.752 + break; 1.753 + default: 1.754 + ShouldNotReachHere(); 1.755 + } 1.756 +#endif 1.757 + } 1.758 + // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1.759 + return 4; 1.760 +} 1.761 + 1.762 +static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1.763 + int stack_offset, int reg, uint ireg, outputStream* st) { 1.764 + // In 64-bit VM size calculation is very complex. Emitting instructions 1.765 + // into scratch buffer is used to get size in 64-bit VM. 1.766 + LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1.767 + if (cbuf) { 1.768 + MacroAssembler _masm(cbuf); 1.769 + int offset = __ offset(); 1.770 + if (is_load) { 1.771 + switch (ireg) { 1.772 + case Op_VecS: 1.773 + __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1.774 + break; 1.775 + case Op_VecD: 1.776 + __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1.777 + break; 1.778 + case Op_VecX: 1.779 + __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1.780 + break; 1.781 + case Op_VecY: 1.782 + __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1.783 + break; 1.784 + default: 1.785 + ShouldNotReachHere(); 1.786 + } 1.787 + } else { // store 1.788 + switch (ireg) { 1.789 + case Op_VecS: 1.790 + __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1.791 + break; 1.792 + case Op_VecD: 1.793 + __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1.794 + break; 1.795 + case Op_VecX: 1.796 + __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1.797 + break; 1.798 + case Op_VecY: 1.799 + __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1.800 + break; 1.801 + default: 1.802 + ShouldNotReachHere(); 1.803 + } 1.804 + } 1.805 + int size = __ offset() - offset; 1.806 +#ifdef ASSERT 1.807 + int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4); 1.808 + // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1.809 + assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1.810 +#endif 1.811 + return size; 1.812 +#ifndef PRODUCT 1.813 + } else if (!do_size) { 1.814 + if (is_load) { 1.815 + switch (ireg) { 1.816 + case Op_VecS: 1.817 + st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1.818 + break; 1.819 + case Op_VecD: 1.820 + st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1.821 + break; 1.822 + case Op_VecX: 1.823 + st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1.824 + break; 1.825 + case Op_VecY: 1.826 + st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1.827 + break; 1.828 + default: 1.829 + ShouldNotReachHere(); 1.830 + } 1.831 + } else { // store 1.832 + switch (ireg) { 1.833 + case Op_VecS: 1.834 + st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1.835 + break; 1.836 + case Op_VecD: 1.837 + st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1.838 + break; 1.839 + case Op_VecX: 1.840 + st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1.841 + break; 1.842 + case Op_VecY: 1.843 + st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1.844 + break; 1.845 + default: 1.846 + ShouldNotReachHere(); 1.847 + } 1.848 + } 1.849 +#endif 1.850 + } 1.851 + int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4); 1.852 + // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1.853 + return 5+offset_size; 1.854 +} 1.855 + 1.856 +static inline jfloat replicate4_imm(int con, int width) { 1.857 + // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 1.858 + assert(width == 1 || width == 2, "only byte or short types here"); 1.859 + int bit_width = width * 8; 1.860 + jint val = con; 1.861 + val &= (1 << bit_width) - 1; // mask off sign bits 1.862 + while(bit_width < 32) { 1.863 + val |= (val << bit_width); 1.864 + bit_width <<= 1; 1.865 + } 1.866 + jfloat fval = *((jfloat*) &val); // coerce to float type 1.867 + return fval; 1.868 +} 1.869 + 1.870 +static inline jdouble replicate8_imm(int con, int width) { 1.871 + // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 1.872 + assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 1.873 + int bit_width = width * 8; 1.874 + jlong val = con; 1.875 + val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 1.876 + while(bit_width < 64) { 1.877 + val |= (val << bit_width); 1.878 + bit_width <<= 1; 1.879 + } 1.880 + jdouble dval = *((jdouble*) &val); // coerce to double type 1.881 + return dval; 1.882 +} 1.883 + 1.884 +#ifndef PRODUCT 1.885 + void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 1.886 + st->print("nop \t# %d bytes pad for loops and calls", _count); 1.887 + } 1.888 +#endif 1.889 + 1.890 + void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 1.891 + MacroAssembler _masm(&cbuf); 1.892 + __ nop(_count); 1.893 + } 1.894 + 1.895 + uint MachNopNode::size(PhaseRegAlloc*) const { 1.896 + return _count; 1.897 + } 1.898 + 1.899 +#ifndef PRODUCT 1.900 + void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 1.901 + st->print("# breakpoint"); 1.902 + } 1.903 +#endif 1.904 + 1.905 + void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 1.906 + MacroAssembler _masm(&cbuf); 1.907 + __ int3(); 1.908 + } 1.909 + 1.910 + uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 1.911 + return MachNode::size(ra_); 1.912 + } 1.913 + 1.914 +%} 1.915 + 1.916 +encode %{ 1.917 + 1.918 + enc_class preserve_SP %{ 1.919 + debug_only(int off0 = cbuf.insts_size()); 1.920 + MacroAssembler _masm(&cbuf); 1.921 + // RBP is preserved across all calls, even compiled calls. 1.922 + // Use it to preserve RSP in places where the callee might change the SP. 1.923 + __ movptr(rbp_mh_SP_save, rsp); 1.924 + debug_only(int off1 = cbuf.insts_size()); 1.925 + assert(off1 - off0 == preserve_SP_size(), "correct size prediction"); 1.926 + %} 1.927 + 1.928 + enc_class restore_SP %{ 1.929 + MacroAssembler _masm(&cbuf); 1.930 + __ movptr(rsp, rbp_mh_SP_save); 1.931 + %} 1.932 + 1.933 + enc_class call_epilog %{ 1.934 + if (VerifyStackAtCalls) { 1.935 + // Check that stack depth is unchanged: find majik cookie on stack 1.936 + int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 1.937 + MacroAssembler _masm(&cbuf); 1.938 + Label L; 1.939 + __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 1.940 + __ jccb(Assembler::equal, L); 1.941 + // Die if stack mismatch 1.942 + __ int3(); 1.943 + __ bind(L); 1.944 + } 1.945 + %} 1.946 + 1.947 +%} 1.948 + 1.949 + 1.950 +//----------OPERANDS----------------------------------------------------------- 1.951 +// Operand definitions must precede instruction definitions for correct parsing 1.952 +// in the ADLC because operands constitute user defined types which are used in 1.953 +// instruction definitions. 1.954 + 1.955 +// Vectors 1.956 +operand vecS() %{ 1.957 + constraint(ALLOC_IN_RC(vectors_reg)); 1.958 + match(VecS); 1.959 + 1.960 + format %{ %} 1.961 + interface(REG_INTER); 1.962 +%} 1.963 + 1.964 +operand vecD() %{ 1.965 + constraint(ALLOC_IN_RC(vectord_reg)); 1.966 + match(VecD); 1.967 + 1.968 + format %{ %} 1.969 + interface(REG_INTER); 1.970 +%} 1.971 + 1.972 +operand vecX() %{ 1.973 + constraint(ALLOC_IN_RC(vectorx_reg)); 1.974 + match(VecX); 1.975 + 1.976 + format %{ %} 1.977 + interface(REG_INTER); 1.978 +%} 1.979 + 1.980 +operand vecY() %{ 1.981 + constraint(ALLOC_IN_RC(vectory_reg)); 1.982 + match(VecY); 1.983 + 1.984 + format %{ %} 1.985 + interface(REG_INTER); 1.986 +%} 1.987 + 1.988 + 1.989 +// INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 1.990 + 1.991 +// ============================================================================ 1.992 + 1.993 +instruct ShouldNotReachHere() %{ 1.994 + match(Halt); 1.995 + format %{ "int3\t# ShouldNotReachHere" %} 1.996 + ins_encode %{ 1.997 + __ int3(); 1.998 + %} 1.999 + ins_pipe(pipe_slow); 1.1000 +%} 1.1001 + 1.1002 +// ============================================================================ 1.1003 + 1.1004 +instruct addF_reg(regF dst, regF src) %{ 1.1005 + predicate((UseSSE>=1) && (UseAVX == 0)); 1.1006 + match(Set dst (AddF dst src)); 1.1007 + 1.1008 + format %{ "addss $dst, $src" %} 1.1009 + ins_cost(150); 1.1010 + ins_encode %{ 1.1011 + __ addss($dst$$XMMRegister, $src$$XMMRegister); 1.1012 + %} 1.1013 + ins_pipe(pipe_slow); 1.1014 +%} 1.1015 + 1.1016 +instruct addF_mem(regF dst, memory src) %{ 1.1017 + predicate((UseSSE>=1) && (UseAVX == 0)); 1.1018 + match(Set dst (AddF dst (LoadF src))); 1.1019 + 1.1020 + format %{ "addss $dst, $src" %} 1.1021 + ins_cost(150); 1.1022 + ins_encode %{ 1.1023 + __ addss($dst$$XMMRegister, $src$$Address); 1.1024 + %} 1.1025 + ins_pipe(pipe_slow); 1.1026 +%} 1.1027 + 1.1028 +instruct addF_imm(regF dst, immF con) %{ 1.1029 + predicate((UseSSE>=1) && (UseAVX == 0)); 1.1030 + match(Set dst (AddF dst con)); 1.1031 + format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1.1032 + ins_cost(150); 1.1033 + ins_encode %{ 1.1034 + __ addss($dst$$XMMRegister, $constantaddress($con)); 1.1035 + %} 1.1036 + ins_pipe(pipe_slow); 1.1037 +%} 1.1038 + 1.1039 +instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 1.1040 + predicate(UseAVX > 0); 1.1041 + match(Set dst (AddF src1 src2)); 1.1042 + 1.1043 + format %{ "vaddss $dst, $src1, $src2" %} 1.1044 + ins_cost(150); 1.1045 + ins_encode %{ 1.1046 + __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1.1047 + %} 1.1048 + ins_pipe(pipe_slow); 1.1049 +%} 1.1050 + 1.1051 +instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 1.1052 + predicate(UseAVX > 0); 1.1053 + match(Set dst (AddF src1 (LoadF src2))); 1.1054 + 1.1055 + format %{ "vaddss $dst, $src1, $src2" %} 1.1056 + ins_cost(150); 1.1057 + ins_encode %{ 1.1058 + __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1.1059 + %} 1.1060 + ins_pipe(pipe_slow); 1.1061 +%} 1.1062 + 1.1063 +instruct addF_reg_imm(regF dst, regF src, immF con) %{ 1.1064 + predicate(UseAVX > 0); 1.1065 + match(Set dst (AddF src con)); 1.1066 + 1.1067 + format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1.1068 + ins_cost(150); 1.1069 + ins_encode %{ 1.1070 + __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1.1071 + %} 1.1072 + ins_pipe(pipe_slow); 1.1073 +%} 1.1074 + 1.1075 +instruct addD_reg(regD dst, regD src) %{ 1.1076 + predicate((UseSSE>=2) && (UseAVX == 0)); 1.1077 + match(Set dst (AddD dst src)); 1.1078 + 1.1079 + format %{ "addsd $dst, $src" %} 1.1080 + ins_cost(150); 1.1081 + ins_encode %{ 1.1082 + __ addsd($dst$$XMMRegister, $src$$XMMRegister); 1.1083 + %} 1.1084 + ins_pipe(pipe_slow); 1.1085 +%} 1.1086 + 1.1087 +instruct addD_mem(regD dst, memory src) %{ 1.1088 + predicate((UseSSE>=2) && (UseAVX == 0)); 1.1089 + match(Set dst (AddD dst (LoadD src))); 1.1090 + 1.1091 + format %{ "addsd $dst, $src" %} 1.1092 + ins_cost(150); 1.1093 + ins_encode %{ 1.1094 + __ addsd($dst$$XMMRegister, $src$$Address); 1.1095 + %} 1.1096 + ins_pipe(pipe_slow); 1.1097 +%} 1.1098 + 1.1099 +instruct addD_imm(regD dst, immD con) %{ 1.1100 + predicate((UseSSE>=2) && (UseAVX == 0)); 1.1101 + match(Set dst (AddD dst con)); 1.1102 + format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1.1103 + ins_cost(150); 1.1104 + ins_encode %{ 1.1105 + __ addsd($dst$$XMMRegister, $constantaddress($con)); 1.1106 + %} 1.1107 + ins_pipe(pipe_slow); 1.1108 +%} 1.1109 + 1.1110 +instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 1.1111 + predicate(UseAVX > 0); 1.1112 + match(Set dst (AddD src1 src2)); 1.1113 + 1.1114 + format %{ "vaddsd $dst, $src1, $src2" %} 1.1115 + ins_cost(150); 1.1116 + ins_encode %{ 1.1117 + __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1.1118 + %} 1.1119 + ins_pipe(pipe_slow); 1.1120 +%} 1.1121 + 1.1122 +instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 1.1123 + predicate(UseAVX > 0); 1.1124 + match(Set dst (AddD src1 (LoadD src2))); 1.1125 + 1.1126 + format %{ "vaddsd $dst, $src1, $src2" %} 1.1127 + ins_cost(150); 1.1128 + ins_encode %{ 1.1129 + __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1.1130 + %} 1.1131 + ins_pipe(pipe_slow); 1.1132 +%} 1.1133 + 1.1134 +instruct addD_reg_imm(regD dst, regD src, immD con) %{ 1.1135 + predicate(UseAVX > 0); 1.1136 + match(Set dst (AddD src con)); 1.1137 + 1.1138 + format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1.1139 + ins_cost(150); 1.1140 + ins_encode %{ 1.1141 + __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1.1142 + %} 1.1143 + ins_pipe(pipe_slow); 1.1144 +%} 1.1145 + 1.1146 +instruct subF_reg(regF dst, regF src) %{ 1.1147 + predicate((UseSSE>=1) && (UseAVX == 0)); 1.1148 + match(Set dst (SubF dst src)); 1.1149 + 1.1150 + format %{ "subss $dst, $src" %} 1.1151 + ins_cost(150); 1.1152 + ins_encode %{ 1.1153 + __ subss($dst$$XMMRegister, $src$$XMMRegister); 1.1154 + %} 1.1155 + ins_pipe(pipe_slow); 1.1156 +%} 1.1157 + 1.1158 +instruct subF_mem(regF dst, memory src) %{ 1.1159 + predicate((UseSSE>=1) && (UseAVX == 0)); 1.1160 + match(Set dst (SubF dst (LoadF src))); 1.1161 + 1.1162 + format %{ "subss $dst, $src" %} 1.1163 + ins_cost(150); 1.1164 + ins_encode %{ 1.1165 + __ subss($dst$$XMMRegister, $src$$Address); 1.1166 + %} 1.1167 + ins_pipe(pipe_slow); 1.1168 +%} 1.1169 + 1.1170 +instruct subF_imm(regF dst, immF con) %{ 1.1171 + predicate((UseSSE>=1) && (UseAVX == 0)); 1.1172 + match(Set dst (SubF dst con)); 1.1173 + format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1.1174 + ins_cost(150); 1.1175 + ins_encode %{ 1.1176 + __ subss($dst$$XMMRegister, $constantaddress($con)); 1.1177 + %} 1.1178 + ins_pipe(pipe_slow); 1.1179 +%} 1.1180 + 1.1181 +instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 1.1182 + predicate(UseAVX > 0); 1.1183 + match(Set dst (SubF src1 src2)); 1.1184 + 1.1185 + format %{ "vsubss $dst, $src1, $src2" %} 1.1186 + ins_cost(150); 1.1187 + ins_encode %{ 1.1188 + __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1.1189 + %} 1.1190 + ins_pipe(pipe_slow); 1.1191 +%} 1.1192 + 1.1193 +instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 1.1194 + predicate(UseAVX > 0); 1.1195 + match(Set dst (SubF src1 (LoadF src2))); 1.1196 + 1.1197 + format %{ "vsubss $dst, $src1, $src2" %} 1.1198 + ins_cost(150); 1.1199 + ins_encode %{ 1.1200 + __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1.1201 + %} 1.1202 + ins_pipe(pipe_slow); 1.1203 +%} 1.1204 + 1.1205 +instruct subF_reg_imm(regF dst, regF src, immF con) %{ 1.1206 + predicate(UseAVX > 0); 1.1207 + match(Set dst (SubF src con)); 1.1208 + 1.1209 + format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1.1210 + ins_cost(150); 1.1211 + ins_encode %{ 1.1212 + __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1.1213 + %} 1.1214 + ins_pipe(pipe_slow); 1.1215 +%} 1.1216 + 1.1217 +instruct subD_reg(regD dst, regD src) %{ 1.1218 + predicate((UseSSE>=2) && (UseAVX == 0)); 1.1219 + match(Set dst (SubD dst src)); 1.1220 + 1.1221 + format %{ "subsd $dst, $src" %} 1.1222 + ins_cost(150); 1.1223 + ins_encode %{ 1.1224 + __ subsd($dst$$XMMRegister, $src$$XMMRegister); 1.1225 + %} 1.1226 + ins_pipe(pipe_slow); 1.1227 +%} 1.1228 + 1.1229 +instruct subD_mem(regD dst, memory src) %{ 1.1230 + predicate((UseSSE>=2) && (UseAVX == 0)); 1.1231 + match(Set dst (SubD dst (LoadD src))); 1.1232 + 1.1233 + format %{ "subsd $dst, $src" %} 1.1234 + ins_cost(150); 1.1235 + ins_encode %{ 1.1236 + __ subsd($dst$$XMMRegister, $src$$Address); 1.1237 + %} 1.1238 + ins_pipe(pipe_slow); 1.1239 +%} 1.1240 + 1.1241 +instruct subD_imm(regD dst, immD con) %{ 1.1242 + predicate((UseSSE>=2) && (UseAVX == 0)); 1.1243 + match(Set dst (SubD dst con)); 1.1244 + format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1.1245 + ins_cost(150); 1.1246 + ins_encode %{ 1.1247 + __ subsd($dst$$XMMRegister, $constantaddress($con)); 1.1248 + %} 1.1249 + ins_pipe(pipe_slow); 1.1250 +%} 1.1251 + 1.1252 +instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 1.1253 + predicate(UseAVX > 0); 1.1254 + match(Set dst (SubD src1 src2)); 1.1255 + 1.1256 + format %{ "vsubsd $dst, $src1, $src2" %} 1.1257 + ins_cost(150); 1.1258 + ins_encode %{ 1.1259 + __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1.1260 + %} 1.1261 + ins_pipe(pipe_slow); 1.1262 +%} 1.1263 + 1.1264 +instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 1.1265 + predicate(UseAVX > 0); 1.1266 + match(Set dst (SubD src1 (LoadD src2))); 1.1267 + 1.1268 + format %{ "vsubsd $dst, $src1, $src2" %} 1.1269 + ins_cost(150); 1.1270 + ins_encode %{ 1.1271 + __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1.1272 + %} 1.1273 + ins_pipe(pipe_slow); 1.1274 +%} 1.1275 + 1.1276 +instruct subD_reg_imm(regD dst, regD src, immD con) %{ 1.1277 + predicate(UseAVX > 0); 1.1278 + match(Set dst (SubD src con)); 1.1279 + 1.1280 + format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1.1281 + ins_cost(150); 1.1282 + ins_encode %{ 1.1283 + __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1.1284 + %} 1.1285 + ins_pipe(pipe_slow); 1.1286 +%} 1.1287 + 1.1288 +instruct mulF_reg(regF dst, regF src) %{ 1.1289 + predicate((UseSSE>=1) && (UseAVX == 0)); 1.1290 + match(Set dst (MulF dst src)); 1.1291 + 1.1292 + format %{ "mulss $dst, $src" %} 1.1293 + ins_cost(150); 1.1294 + ins_encode %{ 1.1295 + __ mulss($dst$$XMMRegister, $src$$XMMRegister); 1.1296 + %} 1.1297 + ins_pipe(pipe_slow); 1.1298 +%} 1.1299 + 1.1300 +instruct mulF_mem(regF dst, memory src) %{ 1.1301 + predicate((UseSSE>=1) && (UseAVX == 0)); 1.1302 + match(Set dst (MulF dst (LoadF src))); 1.1303 + 1.1304 + format %{ "mulss $dst, $src" %} 1.1305 + ins_cost(150); 1.1306 + ins_encode %{ 1.1307 + __ mulss($dst$$XMMRegister, $src$$Address); 1.1308 + %} 1.1309 + ins_pipe(pipe_slow); 1.1310 +%} 1.1311 + 1.1312 +instruct mulF_imm(regF dst, immF con) %{ 1.1313 + predicate((UseSSE>=1) && (UseAVX == 0)); 1.1314 + match(Set dst (MulF dst con)); 1.1315 + format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1.1316 + ins_cost(150); 1.1317 + ins_encode %{ 1.1318 + __ mulss($dst$$XMMRegister, $constantaddress($con)); 1.1319 + %} 1.1320 + ins_pipe(pipe_slow); 1.1321 +%} 1.1322 + 1.1323 +instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 1.1324 + predicate(UseAVX > 0); 1.1325 + match(Set dst (MulF src1 src2)); 1.1326 + 1.1327 + format %{ "vmulss $dst, $src1, $src2" %} 1.1328 + ins_cost(150); 1.1329 + ins_encode %{ 1.1330 + __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1.1331 + %} 1.1332 + ins_pipe(pipe_slow); 1.1333 +%} 1.1334 + 1.1335 +instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 1.1336 + predicate(UseAVX > 0); 1.1337 + match(Set dst (MulF src1 (LoadF src2))); 1.1338 + 1.1339 + format %{ "vmulss $dst, $src1, $src2" %} 1.1340 + ins_cost(150); 1.1341 + ins_encode %{ 1.1342 + __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1.1343 + %} 1.1344 + ins_pipe(pipe_slow); 1.1345 +%} 1.1346 + 1.1347 +instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 1.1348 + predicate(UseAVX > 0); 1.1349 + match(Set dst (MulF src con)); 1.1350 + 1.1351 + format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1.1352 + ins_cost(150); 1.1353 + ins_encode %{ 1.1354 + __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1.1355 + %} 1.1356 + ins_pipe(pipe_slow); 1.1357 +%} 1.1358 + 1.1359 +instruct mulD_reg(regD dst, regD src) %{ 1.1360 + predicate((UseSSE>=2) && (UseAVX == 0)); 1.1361 + match(Set dst (MulD dst src)); 1.1362 + 1.1363 + format %{ "mulsd $dst, $src" %} 1.1364 + ins_cost(150); 1.1365 + ins_encode %{ 1.1366 + __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 1.1367 + %} 1.1368 + ins_pipe(pipe_slow); 1.1369 +%} 1.1370 + 1.1371 +instruct mulD_mem(regD dst, memory src) %{ 1.1372 + predicate((UseSSE>=2) && (UseAVX == 0)); 1.1373 + match(Set dst (MulD dst (LoadD src))); 1.1374 + 1.1375 + format %{ "mulsd $dst, $src" %} 1.1376 + ins_cost(150); 1.1377 + ins_encode %{ 1.1378 + __ mulsd($dst$$XMMRegister, $src$$Address); 1.1379 + %} 1.1380 + ins_pipe(pipe_slow); 1.1381 +%} 1.1382 + 1.1383 +instruct mulD_imm(regD dst, immD con) %{ 1.1384 + predicate((UseSSE>=2) && (UseAVX == 0)); 1.1385 + match(Set dst (MulD dst con)); 1.1386 + format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1.1387 + ins_cost(150); 1.1388 + ins_encode %{ 1.1389 + __ mulsd($dst$$XMMRegister, $constantaddress($con)); 1.1390 + %} 1.1391 + ins_pipe(pipe_slow); 1.1392 +%} 1.1393 + 1.1394 +instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 1.1395 + predicate(UseAVX > 0); 1.1396 + match(Set dst (MulD src1 src2)); 1.1397 + 1.1398 + format %{ "vmulsd $dst, $src1, $src2" %} 1.1399 + ins_cost(150); 1.1400 + ins_encode %{ 1.1401 + __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1.1402 + %} 1.1403 + ins_pipe(pipe_slow); 1.1404 +%} 1.1405 + 1.1406 +instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 1.1407 + predicate(UseAVX > 0); 1.1408 + match(Set dst (MulD src1 (LoadD src2))); 1.1409 + 1.1410 + format %{ "vmulsd $dst, $src1, $src2" %} 1.1411 + ins_cost(150); 1.1412 + ins_encode %{ 1.1413 + __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1.1414 + %} 1.1415 + ins_pipe(pipe_slow); 1.1416 +%} 1.1417 + 1.1418 +instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 1.1419 + predicate(UseAVX > 0); 1.1420 + match(Set dst (MulD src con)); 1.1421 + 1.1422 + format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1.1423 + ins_cost(150); 1.1424 + ins_encode %{ 1.1425 + __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1.1426 + %} 1.1427 + ins_pipe(pipe_slow); 1.1428 +%} 1.1429 + 1.1430 +instruct divF_reg(regF dst, regF src) %{ 1.1431 + predicate((UseSSE>=1) && (UseAVX == 0)); 1.1432 + match(Set dst (DivF dst src)); 1.1433 + 1.1434 + format %{ "divss $dst, $src" %} 1.1435 + ins_cost(150); 1.1436 + ins_encode %{ 1.1437 + __ divss($dst$$XMMRegister, $src$$XMMRegister); 1.1438 + %} 1.1439 + ins_pipe(pipe_slow); 1.1440 +%} 1.1441 + 1.1442 +instruct divF_mem(regF dst, memory src) %{ 1.1443 + predicate((UseSSE>=1) && (UseAVX == 0)); 1.1444 + match(Set dst (DivF dst (LoadF src))); 1.1445 + 1.1446 + format %{ "divss $dst, $src" %} 1.1447 + ins_cost(150); 1.1448 + ins_encode %{ 1.1449 + __ divss($dst$$XMMRegister, $src$$Address); 1.1450 + %} 1.1451 + ins_pipe(pipe_slow); 1.1452 +%} 1.1453 + 1.1454 +instruct divF_imm(regF dst, immF con) %{ 1.1455 + predicate((UseSSE>=1) && (UseAVX == 0)); 1.1456 + match(Set dst (DivF dst con)); 1.1457 + format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1.1458 + ins_cost(150); 1.1459 + ins_encode %{ 1.1460 + __ divss($dst$$XMMRegister, $constantaddress($con)); 1.1461 + %} 1.1462 + ins_pipe(pipe_slow); 1.1463 +%} 1.1464 + 1.1465 +instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 1.1466 + predicate(UseAVX > 0); 1.1467 + match(Set dst (DivF src1 src2)); 1.1468 + 1.1469 + format %{ "vdivss $dst, $src1, $src2" %} 1.1470 + ins_cost(150); 1.1471 + ins_encode %{ 1.1472 + __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1.1473 + %} 1.1474 + ins_pipe(pipe_slow); 1.1475 +%} 1.1476 + 1.1477 +instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 1.1478 + predicate(UseAVX > 0); 1.1479 + match(Set dst (DivF src1 (LoadF src2))); 1.1480 + 1.1481 + format %{ "vdivss $dst, $src1, $src2" %} 1.1482 + ins_cost(150); 1.1483 + ins_encode %{ 1.1484 + __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1.1485 + %} 1.1486 + ins_pipe(pipe_slow); 1.1487 +%} 1.1488 + 1.1489 +instruct divF_reg_imm(regF dst, regF src, immF con) %{ 1.1490 + predicate(UseAVX > 0); 1.1491 + match(Set dst (DivF src con)); 1.1492 + 1.1493 + format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1.1494 + ins_cost(150); 1.1495 + ins_encode %{ 1.1496 + __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1.1497 + %} 1.1498 + ins_pipe(pipe_slow); 1.1499 +%} 1.1500 + 1.1501 +instruct divD_reg(regD dst, regD src) %{ 1.1502 + predicate((UseSSE>=2) && (UseAVX == 0)); 1.1503 + match(Set dst (DivD dst src)); 1.1504 + 1.1505 + format %{ "divsd $dst, $src" %} 1.1506 + ins_cost(150); 1.1507 + ins_encode %{ 1.1508 + __ divsd($dst$$XMMRegister, $src$$XMMRegister); 1.1509 + %} 1.1510 + ins_pipe(pipe_slow); 1.1511 +%} 1.1512 + 1.1513 +instruct divD_mem(regD dst, memory src) %{ 1.1514 + predicate((UseSSE>=2) && (UseAVX == 0)); 1.1515 + match(Set dst (DivD dst (LoadD src))); 1.1516 + 1.1517 + format %{ "divsd $dst, $src" %} 1.1518 + ins_cost(150); 1.1519 + ins_encode %{ 1.1520 + __ divsd($dst$$XMMRegister, $src$$Address); 1.1521 + %} 1.1522 + ins_pipe(pipe_slow); 1.1523 +%} 1.1524 + 1.1525 +instruct divD_imm(regD dst, immD con) %{ 1.1526 + predicate((UseSSE>=2) && (UseAVX == 0)); 1.1527 + match(Set dst (DivD dst con)); 1.1528 + format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1.1529 + ins_cost(150); 1.1530 + ins_encode %{ 1.1531 + __ divsd($dst$$XMMRegister, $constantaddress($con)); 1.1532 + %} 1.1533 + ins_pipe(pipe_slow); 1.1534 +%} 1.1535 + 1.1536 +instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 1.1537 + predicate(UseAVX > 0); 1.1538 + match(Set dst (DivD src1 src2)); 1.1539 + 1.1540 + format %{ "vdivsd $dst, $src1, $src2" %} 1.1541 + ins_cost(150); 1.1542 + ins_encode %{ 1.1543 + __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1.1544 + %} 1.1545 + ins_pipe(pipe_slow); 1.1546 +%} 1.1547 + 1.1548 +instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 1.1549 + predicate(UseAVX > 0); 1.1550 + match(Set dst (DivD src1 (LoadD src2))); 1.1551 + 1.1552 + format %{ "vdivsd $dst, $src1, $src2" %} 1.1553 + ins_cost(150); 1.1554 + ins_encode %{ 1.1555 + __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1.1556 + %} 1.1557 + ins_pipe(pipe_slow); 1.1558 +%} 1.1559 + 1.1560 +instruct divD_reg_imm(regD dst, regD src, immD con) %{ 1.1561 + predicate(UseAVX > 0); 1.1562 + match(Set dst (DivD src con)); 1.1563 + 1.1564 + format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1.1565 + ins_cost(150); 1.1566 + ins_encode %{ 1.1567 + __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1.1568 + %} 1.1569 + ins_pipe(pipe_slow); 1.1570 +%} 1.1571 + 1.1572 +instruct absF_reg(regF dst) %{ 1.1573 + predicate((UseSSE>=1) && (UseAVX == 0)); 1.1574 + match(Set dst (AbsF dst)); 1.1575 + ins_cost(150); 1.1576 + format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 1.1577 + ins_encode %{ 1.1578 + __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 1.1579 + %} 1.1580 + ins_pipe(pipe_slow); 1.1581 +%} 1.1582 + 1.1583 +instruct absF_reg_reg(regF dst, regF src) %{ 1.1584 + predicate(UseAVX > 0); 1.1585 + match(Set dst (AbsF src)); 1.1586 + ins_cost(150); 1.1587 + format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 1.1588 + ins_encode %{ 1.1589 + bool vector256 = false; 1.1590 + __ vandps($dst$$XMMRegister, $src$$XMMRegister, 1.1591 + ExternalAddress(float_signmask()), vector256); 1.1592 + %} 1.1593 + ins_pipe(pipe_slow); 1.1594 +%} 1.1595 + 1.1596 +instruct absD_reg(regD dst) %{ 1.1597 + predicate((UseSSE>=2) && (UseAVX == 0)); 1.1598 + match(Set dst (AbsD dst)); 1.1599 + ins_cost(150); 1.1600 + format %{ "andpd $dst, [0x7fffffffffffffff]\t" 1.1601 + "# abs double by sign masking" %} 1.1602 + ins_encode %{ 1.1603 + __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 1.1604 + %} 1.1605 + ins_pipe(pipe_slow); 1.1606 +%} 1.1607 + 1.1608 +instruct absD_reg_reg(regD dst, regD src) %{ 1.1609 + predicate(UseAVX > 0); 1.1610 + match(Set dst (AbsD src)); 1.1611 + ins_cost(150); 1.1612 + format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 1.1613 + "# abs double by sign masking" %} 1.1614 + ins_encode %{ 1.1615 + bool vector256 = false; 1.1616 + __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 1.1617 + ExternalAddress(double_signmask()), vector256); 1.1618 + %} 1.1619 + ins_pipe(pipe_slow); 1.1620 +%} 1.1621 + 1.1622 +instruct negF_reg(regF dst) %{ 1.1623 + predicate((UseSSE>=1) && (UseAVX == 0)); 1.1624 + match(Set dst (NegF dst)); 1.1625 + ins_cost(150); 1.1626 + format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 1.1627 + ins_encode %{ 1.1628 + __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 1.1629 + %} 1.1630 + ins_pipe(pipe_slow); 1.1631 +%} 1.1632 + 1.1633 +instruct negF_reg_reg(regF dst, regF src) %{ 1.1634 + predicate(UseAVX > 0); 1.1635 + match(Set dst (NegF src)); 1.1636 + ins_cost(150); 1.1637 + format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 1.1638 + ins_encode %{ 1.1639 + bool vector256 = false; 1.1640 + __ vxorps($dst$$XMMRegister, $src$$XMMRegister, 1.1641 + ExternalAddress(float_signflip()), vector256); 1.1642 + %} 1.1643 + ins_pipe(pipe_slow); 1.1644 +%} 1.1645 + 1.1646 +instruct negD_reg(regD dst) %{ 1.1647 + predicate((UseSSE>=2) && (UseAVX == 0)); 1.1648 + match(Set dst (NegD dst)); 1.1649 + ins_cost(150); 1.1650 + format %{ "xorpd $dst, [0x8000000000000000]\t" 1.1651 + "# neg double by sign flipping" %} 1.1652 + ins_encode %{ 1.1653 + __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 1.1654 + %} 1.1655 + ins_pipe(pipe_slow); 1.1656 +%} 1.1657 + 1.1658 +instruct negD_reg_reg(regD dst, regD src) %{ 1.1659 + predicate(UseAVX > 0); 1.1660 + match(Set dst (NegD src)); 1.1661 + ins_cost(150); 1.1662 + format %{ "vxorpd $dst, $src, [0x8000000000000000]\t" 1.1663 + "# neg double by sign flipping" %} 1.1664 + ins_encode %{ 1.1665 + bool vector256 = false; 1.1666 + __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, 1.1667 + ExternalAddress(double_signflip()), vector256); 1.1668 + %} 1.1669 + ins_pipe(pipe_slow); 1.1670 +%} 1.1671 + 1.1672 +instruct sqrtF_reg(regF dst, regF src) %{ 1.1673 + predicate(UseSSE>=1); 1.1674 + match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 1.1675 + 1.1676 + format %{ "sqrtss $dst, $src" %} 1.1677 + ins_cost(150); 1.1678 + ins_encode %{ 1.1679 + __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 1.1680 + %} 1.1681 + ins_pipe(pipe_slow); 1.1682 +%} 1.1683 + 1.1684 +instruct sqrtF_mem(regF dst, memory src) %{ 1.1685 + predicate(UseSSE>=1); 1.1686 + match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); 1.1687 + 1.1688 + format %{ "sqrtss $dst, $src" %} 1.1689 + ins_cost(150); 1.1690 + ins_encode %{ 1.1691 + __ sqrtss($dst$$XMMRegister, $src$$Address); 1.1692 + %} 1.1693 + ins_pipe(pipe_slow); 1.1694 +%} 1.1695 + 1.1696 +instruct sqrtF_imm(regF dst, immF con) %{ 1.1697 + predicate(UseSSE>=1); 1.1698 + match(Set dst (ConvD2F (SqrtD (ConvF2D con)))); 1.1699 + format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1.1700 + ins_cost(150); 1.1701 + ins_encode %{ 1.1702 + __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 1.1703 + %} 1.1704 + ins_pipe(pipe_slow); 1.1705 +%} 1.1706 + 1.1707 +instruct sqrtD_reg(regD dst, regD src) %{ 1.1708 + predicate(UseSSE>=2); 1.1709 + match(Set dst (SqrtD src)); 1.1710 + 1.1711 + format %{ "sqrtsd $dst, $src" %} 1.1712 + ins_cost(150); 1.1713 + ins_encode %{ 1.1714 + __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 1.1715 + %} 1.1716 + ins_pipe(pipe_slow); 1.1717 +%} 1.1718 + 1.1719 +instruct sqrtD_mem(regD dst, memory src) %{ 1.1720 + predicate(UseSSE>=2); 1.1721 + match(Set dst (SqrtD (LoadD src))); 1.1722 + 1.1723 + format %{ "sqrtsd $dst, $src" %} 1.1724 + ins_cost(150); 1.1725 + ins_encode %{ 1.1726 + __ sqrtsd($dst$$XMMRegister, $src$$Address); 1.1727 + %} 1.1728 + ins_pipe(pipe_slow); 1.1729 +%} 1.1730 + 1.1731 +instruct sqrtD_imm(regD dst, immD con) %{ 1.1732 + predicate(UseSSE>=2); 1.1733 + match(Set dst (SqrtD con)); 1.1734 + format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1.1735 + ins_cost(150); 1.1736 + ins_encode %{ 1.1737 + __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 1.1738 + %} 1.1739 + ins_pipe(pipe_slow); 1.1740 +%} 1.1741 + 1.1742 + 1.1743 +// ====================VECTOR INSTRUCTIONS===================================== 1.1744 + 1.1745 +// Load vectors (4 bytes long) 1.1746 +instruct loadV4(vecS dst, memory mem) %{ 1.1747 + predicate(n->as_LoadVector()->memory_size() == 4); 1.1748 + match(Set dst (LoadVector mem)); 1.1749 + ins_cost(125); 1.1750 + format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 1.1751 + ins_encode %{ 1.1752 + __ movdl($dst$$XMMRegister, $mem$$Address); 1.1753 + %} 1.1754 + ins_pipe( pipe_slow ); 1.1755 +%} 1.1756 + 1.1757 +// Load vectors (8 bytes long) 1.1758 +instruct loadV8(vecD dst, memory mem) %{ 1.1759 + predicate(n->as_LoadVector()->memory_size() == 8); 1.1760 + match(Set dst (LoadVector mem)); 1.1761 + ins_cost(125); 1.1762 + format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 1.1763 + ins_encode %{ 1.1764 + __ movq($dst$$XMMRegister, $mem$$Address); 1.1765 + %} 1.1766 + ins_pipe( pipe_slow ); 1.1767 +%} 1.1768 + 1.1769 +// Load vectors (16 bytes long) 1.1770 +instruct loadV16(vecX dst, memory mem) %{ 1.1771 + predicate(n->as_LoadVector()->memory_size() == 16); 1.1772 + match(Set dst (LoadVector mem)); 1.1773 + ins_cost(125); 1.1774 + format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 1.1775 + ins_encode %{ 1.1776 + __ movdqu($dst$$XMMRegister, $mem$$Address); 1.1777 + %} 1.1778 + ins_pipe( pipe_slow ); 1.1779 +%} 1.1780 + 1.1781 +// Load vectors (32 bytes long) 1.1782 +instruct loadV32(vecY dst, memory mem) %{ 1.1783 + predicate(n->as_LoadVector()->memory_size() == 32); 1.1784 + match(Set dst (LoadVector mem)); 1.1785 + ins_cost(125); 1.1786 + format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 1.1787 + ins_encode %{ 1.1788 + __ vmovdqu($dst$$XMMRegister, $mem$$Address); 1.1789 + %} 1.1790 + ins_pipe( pipe_slow ); 1.1791 +%} 1.1792 + 1.1793 +// Store vectors 1.1794 +instruct storeV4(memory mem, vecS src) %{ 1.1795 + predicate(n->as_StoreVector()->memory_size() == 4); 1.1796 + match(Set mem (StoreVector mem src)); 1.1797 + ins_cost(145); 1.1798 + format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 1.1799 + ins_encode %{ 1.1800 + __ movdl($mem$$Address, $src$$XMMRegister); 1.1801 + %} 1.1802 + ins_pipe( pipe_slow ); 1.1803 +%} 1.1804 + 1.1805 +instruct storeV8(memory mem, vecD src) %{ 1.1806 + predicate(n->as_StoreVector()->memory_size() == 8); 1.1807 + match(Set mem (StoreVector mem src)); 1.1808 + ins_cost(145); 1.1809 + format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 1.1810 + ins_encode %{ 1.1811 + __ movq($mem$$Address, $src$$XMMRegister); 1.1812 + %} 1.1813 + ins_pipe( pipe_slow ); 1.1814 +%} 1.1815 + 1.1816 +instruct storeV16(memory mem, vecX src) %{ 1.1817 + predicate(n->as_StoreVector()->memory_size() == 16); 1.1818 + match(Set mem (StoreVector mem src)); 1.1819 + ins_cost(145); 1.1820 + format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 1.1821 + ins_encode %{ 1.1822 + __ movdqu($mem$$Address, $src$$XMMRegister); 1.1823 + %} 1.1824 + ins_pipe( pipe_slow ); 1.1825 +%} 1.1826 + 1.1827 +instruct storeV32(memory mem, vecY src) %{ 1.1828 + predicate(n->as_StoreVector()->memory_size() == 32); 1.1829 + match(Set mem (StoreVector mem src)); 1.1830 + ins_cost(145); 1.1831 + format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 1.1832 + ins_encode %{ 1.1833 + __ vmovdqu($mem$$Address, $src$$XMMRegister); 1.1834 + %} 1.1835 + ins_pipe( pipe_slow ); 1.1836 +%} 1.1837 + 1.1838 +// Replicate byte scalar to be vector 1.1839 +instruct Repl4B(vecS dst, rRegI src) %{ 1.1840 + predicate(n->as_Vector()->length() == 4); 1.1841 + match(Set dst (ReplicateB src)); 1.1842 + format %{ "movd $dst,$src\n\t" 1.1843 + "punpcklbw $dst,$dst\n\t" 1.1844 + "pshuflw $dst,$dst,0x00\t! replicate4B" %} 1.1845 + ins_encode %{ 1.1846 + __ movdl($dst$$XMMRegister, $src$$Register); 1.1847 + __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1.1848 + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1.1849 + %} 1.1850 + ins_pipe( pipe_slow ); 1.1851 +%} 1.1852 + 1.1853 +instruct Repl8B(vecD dst, rRegI src) %{ 1.1854 + predicate(n->as_Vector()->length() == 8); 1.1855 + match(Set dst (ReplicateB src)); 1.1856 + format %{ "movd $dst,$src\n\t" 1.1857 + "punpcklbw $dst,$dst\n\t" 1.1858 + "pshuflw $dst,$dst,0x00\t! replicate8B" %} 1.1859 + ins_encode %{ 1.1860 + __ movdl($dst$$XMMRegister, $src$$Register); 1.1861 + __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1.1862 + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1.1863 + %} 1.1864 + ins_pipe( pipe_slow ); 1.1865 +%} 1.1866 + 1.1867 +instruct Repl16B(vecX dst, rRegI src) %{ 1.1868 + predicate(n->as_Vector()->length() == 16); 1.1869 + match(Set dst (ReplicateB src)); 1.1870 + format %{ "movd $dst,$src\n\t" 1.1871 + "punpcklbw $dst,$dst\n\t" 1.1872 + "pshuflw $dst,$dst,0x00\n\t" 1.1873 + "punpcklqdq $dst,$dst\t! replicate16B" %} 1.1874 + ins_encode %{ 1.1875 + __ movdl($dst$$XMMRegister, $src$$Register); 1.1876 + __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1.1877 + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1.1878 + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1.1879 + %} 1.1880 + ins_pipe( pipe_slow ); 1.1881 +%} 1.1882 + 1.1883 +instruct Repl32B(vecY dst, rRegI src) %{ 1.1884 + predicate(n->as_Vector()->length() == 32); 1.1885 + match(Set dst (ReplicateB src)); 1.1886 + format %{ "movd $dst,$src\n\t" 1.1887 + "punpcklbw $dst,$dst\n\t" 1.1888 + "pshuflw $dst,$dst,0x00\n\t" 1.1889 + "punpcklqdq $dst,$dst\n\t" 1.1890 + "vinserti128h $dst,$dst,$dst\t! replicate32B" %} 1.1891 + ins_encode %{ 1.1892 + __ movdl($dst$$XMMRegister, $src$$Register); 1.1893 + __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1.1894 + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1.1895 + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1.1896 + __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1.1897 + %} 1.1898 + ins_pipe( pipe_slow ); 1.1899 +%} 1.1900 + 1.1901 +// Replicate byte scalar immediate to be vector by loading from const table. 1.1902 +instruct Repl4B_imm(vecS dst, immI con) %{ 1.1903 + predicate(n->as_Vector()->length() == 4); 1.1904 + match(Set dst (ReplicateB con)); 1.1905 + format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 1.1906 + ins_encode %{ 1.1907 + __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 1.1908 + %} 1.1909 + ins_pipe( pipe_slow ); 1.1910 +%} 1.1911 + 1.1912 +instruct Repl8B_imm(vecD dst, immI con) %{ 1.1913 + predicate(n->as_Vector()->length() == 8); 1.1914 + match(Set dst (ReplicateB con)); 1.1915 + format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 1.1916 + ins_encode %{ 1.1917 + __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1.1918 + %} 1.1919 + ins_pipe( pipe_slow ); 1.1920 +%} 1.1921 + 1.1922 +instruct Repl16B_imm(vecX dst, immI con) %{ 1.1923 + predicate(n->as_Vector()->length() == 16); 1.1924 + match(Set dst (ReplicateB con)); 1.1925 + format %{ "movq $dst,[$constantaddress]\n\t" 1.1926 + "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 1.1927 + ins_encode %{ 1.1928 + __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1.1929 + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1.1930 + %} 1.1931 + ins_pipe( pipe_slow ); 1.1932 +%} 1.1933 + 1.1934 +instruct Repl32B_imm(vecY dst, immI con) %{ 1.1935 + predicate(n->as_Vector()->length() == 32); 1.1936 + match(Set dst (ReplicateB con)); 1.1937 + format %{ "movq $dst,[$constantaddress]\n\t" 1.1938 + "punpcklqdq $dst,$dst\n\t" 1.1939 + "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %} 1.1940 + ins_encode %{ 1.1941 + __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1.1942 + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1.1943 + __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1.1944 + %} 1.1945 + ins_pipe( pipe_slow ); 1.1946 +%} 1.1947 + 1.1948 +// Replicate byte scalar zero to be vector 1.1949 +instruct Repl4B_zero(vecS dst, immI0 zero) %{ 1.1950 + predicate(n->as_Vector()->length() == 4); 1.1951 + match(Set dst (ReplicateB zero)); 1.1952 + format %{ "pxor $dst,$dst\t! replicate4B zero" %} 1.1953 + ins_encode %{ 1.1954 + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1.1955 + %} 1.1956 + ins_pipe( fpu_reg_reg ); 1.1957 +%} 1.1958 + 1.1959 +instruct Repl8B_zero(vecD dst, immI0 zero) %{ 1.1960 + predicate(n->as_Vector()->length() == 8); 1.1961 + match(Set dst (ReplicateB zero)); 1.1962 + format %{ "pxor $dst,$dst\t! replicate8B zero" %} 1.1963 + ins_encode %{ 1.1964 + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1.1965 + %} 1.1966 + ins_pipe( fpu_reg_reg ); 1.1967 +%} 1.1968 + 1.1969 +instruct Repl16B_zero(vecX dst, immI0 zero) %{ 1.1970 + predicate(n->as_Vector()->length() == 16); 1.1971 + match(Set dst (ReplicateB zero)); 1.1972 + format %{ "pxor $dst,$dst\t! replicate16B zero" %} 1.1973 + ins_encode %{ 1.1974 + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1.1975 + %} 1.1976 + ins_pipe( fpu_reg_reg ); 1.1977 +%} 1.1978 + 1.1979 +instruct Repl32B_zero(vecY dst, immI0 zero) %{ 1.1980 + predicate(n->as_Vector()->length() == 32); 1.1981 + match(Set dst (ReplicateB zero)); 1.1982 + format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 1.1983 + ins_encode %{ 1.1984 + // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 1.1985 + bool vector256 = true; 1.1986 + __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 1.1987 + %} 1.1988 + ins_pipe( fpu_reg_reg ); 1.1989 +%} 1.1990 + 1.1991 +// Replicate char/short (2 byte) scalar to be vector 1.1992 +instruct Repl2S(vecS dst, rRegI src) %{ 1.1993 + predicate(n->as_Vector()->length() == 2); 1.1994 + match(Set dst (ReplicateS src)); 1.1995 + format %{ "movd $dst,$src\n\t" 1.1996 + "pshuflw $dst,$dst,0x00\t! replicate2S" %} 1.1997 + ins_encode %{ 1.1998 + __ movdl($dst$$XMMRegister, $src$$Register); 1.1999 + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1.2000 + %} 1.2001 + ins_pipe( fpu_reg_reg ); 1.2002 +%} 1.2003 + 1.2004 +instruct Repl4S(vecD dst, rRegI src) %{ 1.2005 + predicate(n->as_Vector()->length() == 4); 1.2006 + match(Set dst (ReplicateS src)); 1.2007 + format %{ "movd $dst,$src\n\t" 1.2008 + "pshuflw $dst,$dst,0x00\t! replicate4S" %} 1.2009 + ins_encode %{ 1.2010 + __ movdl($dst$$XMMRegister, $src$$Register); 1.2011 + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1.2012 + %} 1.2013 + ins_pipe( fpu_reg_reg ); 1.2014 +%} 1.2015 + 1.2016 +instruct Repl8S(vecX dst, rRegI src) %{ 1.2017 + predicate(n->as_Vector()->length() == 8); 1.2018 + match(Set dst (ReplicateS src)); 1.2019 + format %{ "movd $dst,$src\n\t" 1.2020 + "pshuflw $dst,$dst,0x00\n\t" 1.2021 + "punpcklqdq $dst,$dst\t! replicate8S" %} 1.2022 + ins_encode %{ 1.2023 + __ movdl($dst$$XMMRegister, $src$$Register); 1.2024 + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1.2025 + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1.2026 + %} 1.2027 + ins_pipe( pipe_slow ); 1.2028 +%} 1.2029 + 1.2030 +instruct Repl16S(vecY dst, rRegI src) %{ 1.2031 + predicate(n->as_Vector()->length() == 16); 1.2032 + match(Set dst (ReplicateS src)); 1.2033 + format %{ "movd $dst,$src\n\t" 1.2034 + "pshuflw $dst,$dst,0x00\n\t" 1.2035 + "punpcklqdq $dst,$dst\n\t" 1.2036 + "vinserti128h $dst,$dst,$dst\t! replicate16S" %} 1.2037 + ins_encode %{ 1.2038 + __ movdl($dst$$XMMRegister, $src$$Register); 1.2039 + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1.2040 + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1.2041 + __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1.2042 + %} 1.2043 + ins_pipe( pipe_slow ); 1.2044 +%} 1.2045 + 1.2046 +// Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 1.2047 +instruct Repl2S_imm(vecS dst, immI con) %{ 1.2048 + predicate(n->as_Vector()->length() == 2); 1.2049 + match(Set dst (ReplicateS con)); 1.2050 + format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 1.2051 + ins_encode %{ 1.2052 + __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 1.2053 + %} 1.2054 + ins_pipe( fpu_reg_reg ); 1.2055 +%} 1.2056 + 1.2057 +instruct Repl4S_imm(vecD dst, immI con) %{ 1.2058 + predicate(n->as_Vector()->length() == 4); 1.2059 + match(Set dst (ReplicateS con)); 1.2060 + format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 1.2061 + ins_encode %{ 1.2062 + __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 1.2063 + %} 1.2064 + ins_pipe( fpu_reg_reg ); 1.2065 +%} 1.2066 + 1.2067 +instruct Repl8S_imm(vecX dst, immI con) %{ 1.2068 + predicate(n->as_Vector()->length() == 8); 1.2069 + match(Set dst (ReplicateS con)); 1.2070 + format %{ "movq $dst,[$constantaddress]\n\t" 1.2071 + "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 1.2072 + ins_encode %{ 1.2073 + __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 1.2074 + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1.2075 + %} 1.2076 + ins_pipe( pipe_slow ); 1.2077 +%} 1.2078 + 1.2079 +instruct Repl16S_imm(vecY dst, immI con) %{ 1.2080 + predicate(n->as_Vector()->length() == 16); 1.2081 + match(Set dst (ReplicateS con)); 1.2082 + format %{ "movq $dst,[$constantaddress]\n\t" 1.2083 + "punpcklqdq $dst,$dst\n\t" 1.2084 + "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %} 1.2085 + ins_encode %{ 1.2086 + __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 1.2087 + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1.2088 + __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1.2089 + %} 1.2090 + ins_pipe( pipe_slow ); 1.2091 +%} 1.2092 + 1.2093 +// Replicate char/short (2 byte) scalar zero to be vector 1.2094 +instruct Repl2S_zero(vecS dst, immI0 zero) %{ 1.2095 + predicate(n->as_Vector()->length() == 2); 1.2096 + match(Set dst (ReplicateS zero)); 1.2097 + format %{ "pxor $dst,$dst\t! replicate2S zero" %} 1.2098 + ins_encode %{ 1.2099 + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1.2100 + %} 1.2101 + ins_pipe( fpu_reg_reg ); 1.2102 +%} 1.2103 + 1.2104 +instruct Repl4S_zero(vecD dst, immI0 zero) %{ 1.2105 + predicate(n->as_Vector()->length() == 4); 1.2106 + match(Set dst (ReplicateS zero)); 1.2107 + format %{ "pxor $dst,$dst\t! replicate4S zero" %} 1.2108 + ins_encode %{ 1.2109 + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1.2110 + %} 1.2111 + ins_pipe( fpu_reg_reg ); 1.2112 +%} 1.2113 + 1.2114 +instruct Repl8S_zero(vecX dst, immI0 zero) %{ 1.2115 + predicate(n->as_Vector()->length() == 8); 1.2116 + match(Set dst (ReplicateS zero)); 1.2117 + format %{ "pxor $dst,$dst\t! replicate8S zero" %} 1.2118 + ins_encode %{ 1.2119 + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1.2120 + %} 1.2121 + ins_pipe( fpu_reg_reg ); 1.2122 +%} 1.2123 + 1.2124 +instruct Repl16S_zero(vecY dst, immI0 zero) %{ 1.2125 + predicate(n->as_Vector()->length() == 16); 1.2126 + match(Set dst (ReplicateS zero)); 1.2127 + format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 1.2128 + ins_encode %{ 1.2129 + // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 1.2130 + bool vector256 = true; 1.2131 + __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 1.2132 + %} 1.2133 + ins_pipe( fpu_reg_reg ); 1.2134 +%} 1.2135 + 1.2136 +// Replicate integer (4 byte) scalar to be vector 1.2137 +instruct Repl2I(vecD dst, rRegI src) %{ 1.2138 + predicate(n->as_Vector()->length() == 2); 1.2139 + match(Set dst (ReplicateI src)); 1.2140 + format %{ "movd $dst,$src\n\t" 1.2141 + "pshufd $dst,$dst,0x00\t! replicate2I" %} 1.2142 + ins_encode %{ 1.2143 + __ movdl($dst$$XMMRegister, $src$$Register); 1.2144 + __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1.2145 + %} 1.2146 + ins_pipe( fpu_reg_reg ); 1.2147 +%} 1.2148 + 1.2149 +instruct Repl4I(vecX dst, rRegI src) %{ 1.2150 + predicate(n->as_Vector()->length() == 4); 1.2151 + match(Set dst (ReplicateI src)); 1.2152 + format %{ "movd $dst,$src\n\t" 1.2153 + "pshufd $dst,$dst,0x00\t! replicate4I" %} 1.2154 + ins_encode %{ 1.2155 + __ movdl($dst$$XMMRegister, $src$$Register); 1.2156 + __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1.2157 + %} 1.2158 + ins_pipe( pipe_slow ); 1.2159 +%} 1.2160 + 1.2161 +instruct Repl8I(vecY dst, rRegI src) %{ 1.2162 + predicate(n->as_Vector()->length() == 8); 1.2163 + match(Set dst (ReplicateI src)); 1.2164 + format %{ "movd $dst,$src\n\t" 1.2165 + "pshufd $dst,$dst,0x00\n\t" 1.2166 + "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 1.2167 + ins_encode %{ 1.2168 + __ movdl($dst$$XMMRegister, $src$$Register); 1.2169 + __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1.2170 + __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1.2171 + %} 1.2172 + ins_pipe( pipe_slow ); 1.2173 +%} 1.2174 + 1.2175 +// Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 1.2176 +instruct Repl2I_imm(vecD dst, immI con) %{ 1.2177 + predicate(n->as_Vector()->length() == 2); 1.2178 + match(Set dst (ReplicateI con)); 1.2179 + format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 1.2180 + ins_encode %{ 1.2181 + __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 1.2182 + %} 1.2183 + ins_pipe( fpu_reg_reg ); 1.2184 +%} 1.2185 + 1.2186 +instruct Repl4I_imm(vecX dst, immI con) %{ 1.2187 + predicate(n->as_Vector()->length() == 4); 1.2188 + match(Set dst (ReplicateI con)); 1.2189 + format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 1.2190 + "punpcklqdq $dst,$dst" %} 1.2191 + ins_encode %{ 1.2192 + __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 1.2193 + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1.2194 + %} 1.2195 + ins_pipe( pipe_slow ); 1.2196 +%} 1.2197 + 1.2198 +instruct Repl8I_imm(vecY dst, immI con) %{ 1.2199 + predicate(n->as_Vector()->length() == 8); 1.2200 + match(Set dst (ReplicateI con)); 1.2201 + format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 1.2202 + "punpcklqdq $dst,$dst\n\t" 1.2203 + "vinserti128h $dst,$dst,$dst" %} 1.2204 + ins_encode %{ 1.2205 + __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 1.2206 + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1.2207 + __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1.2208 + %} 1.2209 + ins_pipe( pipe_slow ); 1.2210 +%} 1.2211 + 1.2212 +// Integer could be loaded into xmm register directly from memory. 1.2213 +instruct Repl2I_mem(vecD dst, memory mem) %{ 1.2214 + predicate(n->as_Vector()->length() == 2); 1.2215 + match(Set dst (ReplicateI (LoadI mem))); 1.2216 + format %{ "movd $dst,$mem\n\t" 1.2217 + "pshufd $dst,$dst,0x00\t! replicate2I" %} 1.2218 + ins_encode %{ 1.2219 + __ movdl($dst$$XMMRegister, $mem$$Address); 1.2220 + __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1.2221 + %} 1.2222 + ins_pipe( fpu_reg_reg ); 1.2223 +%} 1.2224 + 1.2225 +instruct Repl4I_mem(vecX dst, memory mem) %{ 1.2226 + predicate(n->as_Vector()->length() == 4); 1.2227 + match(Set dst (ReplicateI (LoadI mem))); 1.2228 + format %{ "movd $dst,$mem\n\t" 1.2229 + "pshufd $dst,$dst,0x00\t! replicate4I" %} 1.2230 + ins_encode %{ 1.2231 + __ movdl($dst$$XMMRegister, $mem$$Address); 1.2232 + __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1.2233 + %} 1.2234 + ins_pipe( pipe_slow ); 1.2235 +%} 1.2236 + 1.2237 +instruct Repl8I_mem(vecY dst, memory mem) %{ 1.2238 + predicate(n->as_Vector()->length() == 8); 1.2239 + match(Set dst (ReplicateI (LoadI mem))); 1.2240 + format %{ "movd $dst,$mem\n\t" 1.2241 + "pshufd $dst,$dst,0x00\n\t" 1.2242 + "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 1.2243 + ins_encode %{ 1.2244 + __ movdl($dst$$XMMRegister, $mem$$Address); 1.2245 + __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1.2246 + __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1.2247 + %} 1.2248 + ins_pipe( pipe_slow ); 1.2249 +%} 1.2250 + 1.2251 +// Replicate integer (4 byte) scalar zero to be vector 1.2252 +instruct Repl2I_zero(vecD dst, immI0 zero) %{ 1.2253 + predicate(n->as_Vector()->length() == 2); 1.2254 + match(Set dst (ReplicateI zero)); 1.2255 + format %{ "pxor $dst,$dst\t! replicate2I" %} 1.2256 + ins_encode %{ 1.2257 + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1.2258 + %} 1.2259 + ins_pipe( fpu_reg_reg ); 1.2260 +%} 1.2261 + 1.2262 +instruct Repl4I_zero(vecX dst, immI0 zero) %{ 1.2263 + predicate(n->as_Vector()->length() == 4); 1.2264 + match(Set dst (ReplicateI zero)); 1.2265 + format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 1.2266 + ins_encode %{ 1.2267 + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1.2268 + %} 1.2269 + ins_pipe( fpu_reg_reg ); 1.2270 +%} 1.2271 + 1.2272 +instruct Repl8I_zero(vecY dst, immI0 zero) %{ 1.2273 + predicate(n->as_Vector()->length() == 8); 1.2274 + match(Set dst (ReplicateI zero)); 1.2275 + format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 1.2276 + ins_encode %{ 1.2277 + // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 1.2278 + bool vector256 = true; 1.2279 + __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 1.2280 + %} 1.2281 + ins_pipe( fpu_reg_reg ); 1.2282 +%} 1.2283 + 1.2284 +// Replicate long (8 byte) scalar to be vector 1.2285 +#ifdef _LP64 1.2286 +instruct Repl2L(vecX dst, rRegL src) %{ 1.2287 + predicate(n->as_Vector()->length() == 2); 1.2288 + match(Set dst (ReplicateL src)); 1.2289 + format %{ "movdq $dst,$src\n\t" 1.2290 + "punpcklqdq $dst,$dst\t! replicate2L" %} 1.2291 + ins_encode %{ 1.2292 + __ movdq($dst$$XMMRegister, $src$$Register); 1.2293 + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1.2294 + %} 1.2295 + ins_pipe( pipe_slow ); 1.2296 +%} 1.2297 + 1.2298 +instruct Repl4L(vecY dst, rRegL src) %{ 1.2299 + predicate(n->as_Vector()->length() == 4); 1.2300 + match(Set dst (ReplicateL src)); 1.2301 + format %{ "movdq $dst,$src\n\t" 1.2302 + "punpcklqdq $dst,$dst\n\t" 1.2303 + "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 1.2304 + ins_encode %{ 1.2305 + __ movdq($dst$$XMMRegister, $src$$Register); 1.2306 + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1.2307 + __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1.2308 + %} 1.2309 + ins_pipe( pipe_slow ); 1.2310 +%} 1.2311 +#else // _LP64 1.2312 +instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 1.2313 + predicate(n->as_Vector()->length() == 2); 1.2314 + match(Set dst (ReplicateL src)); 1.2315 + effect(TEMP dst, USE src, TEMP tmp); 1.2316 + format %{ "movdl $dst,$src.lo\n\t" 1.2317 + "movdl $tmp,$src.hi\n\t" 1.2318 + "punpckldq $dst,$tmp\n\t" 1.2319 + "punpcklqdq $dst,$dst\t! replicate2L"%} 1.2320 + ins_encode %{ 1.2321 + __ movdl($dst$$XMMRegister, $src$$Register); 1.2322 + __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 1.2323 + __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 1.2324 + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1.2325 + %} 1.2326 + ins_pipe( pipe_slow ); 1.2327 +%} 1.2328 + 1.2329 +instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 1.2330 + predicate(n->as_Vector()->length() == 4); 1.2331 + match(Set dst (ReplicateL src)); 1.2332 + effect(TEMP dst, USE src, TEMP tmp); 1.2333 + format %{ "movdl $dst,$src.lo\n\t" 1.2334 + "movdl $tmp,$src.hi\n\t" 1.2335 + "punpckldq $dst,$tmp\n\t" 1.2336 + "punpcklqdq $dst,$dst\n\t" 1.2337 + "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 1.2338 + ins_encode %{ 1.2339 + __ movdl($dst$$XMMRegister, $src$$Register); 1.2340 + __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 1.2341 + __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 1.2342 + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1.2343 + __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1.2344 + %} 1.2345 + ins_pipe( pipe_slow ); 1.2346 +%} 1.2347 +#endif // _LP64 1.2348 + 1.2349 +// Replicate long (8 byte) scalar immediate to be vector by loading from const table. 1.2350 +instruct Repl2L_imm(vecX dst, immL con) %{ 1.2351 + predicate(n->as_Vector()->length() == 2); 1.2352 + match(Set dst (ReplicateL con)); 1.2353 + format %{ "movq $dst,[$constantaddress]\n\t" 1.2354 + "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 1.2355 + ins_encode %{ 1.2356 + __ movq($dst$$XMMRegister, $constantaddress($con)); 1.2357 + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1.2358 + %} 1.2359 + ins_pipe( pipe_slow ); 1.2360 +%} 1.2361 + 1.2362 +instruct Repl4L_imm(vecY dst, immL con) %{ 1.2363 + predicate(n->as_Vector()->length() == 4); 1.2364 + match(Set dst (ReplicateL con)); 1.2365 + format %{ "movq $dst,[$constantaddress]\n\t" 1.2366 + "punpcklqdq $dst,$dst\n\t" 1.2367 + "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %} 1.2368 + ins_encode %{ 1.2369 + __ movq($dst$$XMMRegister, $constantaddress($con)); 1.2370 + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1.2371 + __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1.2372 + %} 1.2373 + ins_pipe( pipe_slow ); 1.2374 +%} 1.2375 + 1.2376 +// Long could be loaded into xmm register directly from memory. 1.2377 +instruct Repl2L_mem(vecX dst, memory mem) %{ 1.2378 + predicate(n->as_Vector()->length() == 2); 1.2379 + match(Set dst (ReplicateL (LoadL mem))); 1.2380 + format %{ "movq $dst,$mem\n\t" 1.2381 + "punpcklqdq $dst,$dst\t! replicate2L" %} 1.2382 + ins_encode %{ 1.2383 + __ movq($dst$$XMMRegister, $mem$$Address); 1.2384 + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1.2385 + %} 1.2386 + ins_pipe( pipe_slow ); 1.2387 +%} 1.2388 + 1.2389 +instruct Repl4L_mem(vecY dst, memory mem) %{ 1.2390 + predicate(n->as_Vector()->length() == 4); 1.2391 + match(Set dst (ReplicateL (LoadL mem))); 1.2392 + format %{ "movq $dst,$mem\n\t" 1.2393 + "punpcklqdq $dst,$dst\n\t" 1.2394 + "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 1.2395 + ins_encode %{ 1.2396 + __ movq($dst$$XMMRegister, $mem$$Address); 1.2397 + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1.2398 + __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1.2399 + %} 1.2400 + ins_pipe( pipe_slow ); 1.2401 +%} 1.2402 + 1.2403 +// Replicate long (8 byte) scalar zero to be vector 1.2404 +instruct Repl2L_zero(vecX dst, immL0 zero) %{ 1.2405 + predicate(n->as_Vector()->length() == 2); 1.2406 + match(Set dst (ReplicateL zero)); 1.2407 + format %{ "pxor $dst,$dst\t! replicate2L zero" %} 1.2408 + ins_encode %{ 1.2409 + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1.2410 + %} 1.2411 + ins_pipe( fpu_reg_reg ); 1.2412 +%} 1.2413 + 1.2414 +instruct Repl4L_zero(vecY dst, immL0 zero) %{ 1.2415 + predicate(n->as_Vector()->length() == 4); 1.2416 + match(Set dst (ReplicateL zero)); 1.2417 + format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 1.2418 + ins_encode %{ 1.2419 + // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 1.2420 + bool vector256 = true; 1.2421 + __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 1.2422 + %} 1.2423 + ins_pipe( fpu_reg_reg ); 1.2424 +%} 1.2425 + 1.2426 +// Replicate float (4 byte) scalar to be vector 1.2427 +instruct Repl2F(vecD dst, regF src) %{ 1.2428 + predicate(n->as_Vector()->length() == 2); 1.2429 + match(Set dst (ReplicateF src)); 1.2430 + format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 1.2431 + ins_encode %{ 1.2432 + __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 1.2433 + %} 1.2434 + ins_pipe( fpu_reg_reg ); 1.2435 +%} 1.2436 + 1.2437 +instruct Repl4F(vecX dst, regF src) %{ 1.2438 + predicate(n->as_Vector()->length() == 4); 1.2439 + match(Set dst (ReplicateF src)); 1.2440 + format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 1.2441 + ins_encode %{ 1.2442 + __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 1.2443 + %} 1.2444 + ins_pipe( pipe_slow ); 1.2445 +%} 1.2446 + 1.2447 +instruct Repl8F(vecY dst, regF src) %{ 1.2448 + predicate(n->as_Vector()->length() == 8); 1.2449 + match(Set dst (ReplicateF src)); 1.2450 + format %{ "pshufd $dst,$src,0x00\n\t" 1.2451 + "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} 1.2452 + ins_encode %{ 1.2453 + __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 1.2454 + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1.2455 + %} 1.2456 + ins_pipe( pipe_slow ); 1.2457 +%} 1.2458 + 1.2459 +// Replicate float (4 byte) scalar zero to be vector 1.2460 +instruct Repl2F_zero(vecD dst, immF0 zero) %{ 1.2461 + predicate(n->as_Vector()->length() == 2); 1.2462 + match(Set dst (ReplicateF zero)); 1.2463 + format %{ "xorps $dst,$dst\t! replicate2F zero" %} 1.2464 + ins_encode %{ 1.2465 + __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 1.2466 + %} 1.2467 + ins_pipe( fpu_reg_reg ); 1.2468 +%} 1.2469 + 1.2470 +instruct Repl4F_zero(vecX dst, immF0 zero) %{ 1.2471 + predicate(n->as_Vector()->length() == 4); 1.2472 + match(Set dst (ReplicateF zero)); 1.2473 + format %{ "xorps $dst,$dst\t! replicate4F zero" %} 1.2474 + ins_encode %{ 1.2475 + __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 1.2476 + %} 1.2477 + ins_pipe( fpu_reg_reg ); 1.2478 +%} 1.2479 + 1.2480 +instruct Repl8F_zero(vecY dst, immF0 zero) %{ 1.2481 + predicate(n->as_Vector()->length() == 8); 1.2482 + match(Set dst (ReplicateF zero)); 1.2483 + format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 1.2484 + ins_encode %{ 1.2485 + bool vector256 = true; 1.2486 + __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 1.2487 + %} 1.2488 + ins_pipe( fpu_reg_reg ); 1.2489 +%} 1.2490 + 1.2491 +// Replicate double (8 bytes) scalar to be vector 1.2492 +instruct Repl2D(vecX dst, regD src) %{ 1.2493 + predicate(n->as_Vector()->length() == 2); 1.2494 + match(Set dst (ReplicateD src)); 1.2495 + format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 1.2496 + ins_encode %{ 1.2497 + __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 1.2498 + %} 1.2499 + ins_pipe( pipe_slow ); 1.2500 +%} 1.2501 + 1.2502 +instruct Repl4D(vecY dst, regD src) %{ 1.2503 + predicate(n->as_Vector()->length() == 4); 1.2504 + match(Set dst (ReplicateD src)); 1.2505 + format %{ "pshufd $dst,$src,0x44\n\t" 1.2506 + "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} 1.2507 + ins_encode %{ 1.2508 + __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 1.2509 + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1.2510 + %} 1.2511 + ins_pipe( pipe_slow ); 1.2512 +%} 1.2513 + 1.2514 +// Replicate double (8 byte) scalar zero to be vector 1.2515 +instruct Repl2D_zero(vecX dst, immD0 zero) %{ 1.2516 + predicate(n->as_Vector()->length() == 2); 1.2517 + match(Set dst (ReplicateD zero)); 1.2518 + format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 1.2519 + ins_encode %{ 1.2520 + __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 1.2521 + %} 1.2522 + ins_pipe( fpu_reg_reg ); 1.2523 +%} 1.2524 + 1.2525 +instruct Repl4D_zero(vecY dst, immD0 zero) %{ 1.2526 + predicate(n->as_Vector()->length() == 4); 1.2527 + match(Set dst (ReplicateD zero)); 1.2528 + format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 1.2529 + ins_encode %{ 1.2530 + bool vector256 = true; 1.2531 + __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 1.2532 + %} 1.2533 + ins_pipe( fpu_reg_reg ); 1.2534 +%} 1.2535 + 1.2536 +// ====================VECTOR ARITHMETIC======================================= 1.2537 + 1.2538 +// --------------------------------- ADD -------------------------------------- 1.2539 + 1.2540 +// Bytes vector add 1.2541 +instruct vadd4B(vecS dst, vecS src) %{ 1.2542 + predicate(n->as_Vector()->length() == 4); 1.2543 + match(Set dst (AddVB dst src)); 1.2544 + format %{ "paddb $dst,$src\t! add packed4B" %} 1.2545 + ins_encode %{ 1.2546 + __ paddb($dst$$XMMRegister, $src$$XMMRegister); 1.2547 + %} 1.2548 + ins_pipe( pipe_slow ); 1.2549 +%} 1.2550 + 1.2551 +instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ 1.2552 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.2553 + match(Set dst (AddVB src1 src2)); 1.2554 + format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 1.2555 + ins_encode %{ 1.2556 + bool vector256 = false; 1.2557 + __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.2558 + %} 1.2559 + ins_pipe( pipe_slow ); 1.2560 +%} 1.2561 + 1.2562 +instruct vadd8B(vecD dst, vecD src) %{ 1.2563 + predicate(n->as_Vector()->length() == 8); 1.2564 + match(Set dst (AddVB dst src)); 1.2565 + format %{ "paddb $dst,$src\t! add packed8B" %} 1.2566 + ins_encode %{ 1.2567 + __ paddb($dst$$XMMRegister, $src$$XMMRegister); 1.2568 + %} 1.2569 + ins_pipe( pipe_slow ); 1.2570 +%} 1.2571 + 1.2572 +instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ 1.2573 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.2574 + match(Set dst (AddVB src1 src2)); 1.2575 + format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 1.2576 + ins_encode %{ 1.2577 + bool vector256 = false; 1.2578 + __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.2579 + %} 1.2580 + ins_pipe( pipe_slow ); 1.2581 +%} 1.2582 + 1.2583 +instruct vadd16B(vecX dst, vecX src) %{ 1.2584 + predicate(n->as_Vector()->length() == 16); 1.2585 + match(Set dst (AddVB dst src)); 1.2586 + format %{ "paddb $dst,$src\t! add packed16B" %} 1.2587 + ins_encode %{ 1.2588 + __ paddb($dst$$XMMRegister, $src$$XMMRegister); 1.2589 + %} 1.2590 + ins_pipe( pipe_slow ); 1.2591 +%} 1.2592 + 1.2593 +instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ 1.2594 + predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 1.2595 + match(Set dst (AddVB src1 src2)); 1.2596 + format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 1.2597 + ins_encode %{ 1.2598 + bool vector256 = false; 1.2599 + __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.2600 + %} 1.2601 + ins_pipe( pipe_slow ); 1.2602 +%} 1.2603 + 1.2604 +instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ 1.2605 + predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 1.2606 + match(Set dst (AddVB src (LoadVector mem))); 1.2607 + format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 1.2608 + ins_encode %{ 1.2609 + bool vector256 = false; 1.2610 + __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.2611 + %} 1.2612 + ins_pipe( pipe_slow ); 1.2613 +%} 1.2614 + 1.2615 +instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ 1.2616 + predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 1.2617 + match(Set dst (AddVB src1 src2)); 1.2618 + format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 1.2619 + ins_encode %{ 1.2620 + bool vector256 = true; 1.2621 + __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.2622 + %} 1.2623 + ins_pipe( pipe_slow ); 1.2624 +%} 1.2625 + 1.2626 +instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ 1.2627 + predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 1.2628 + match(Set dst (AddVB src (LoadVector mem))); 1.2629 + format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 1.2630 + ins_encode %{ 1.2631 + bool vector256 = true; 1.2632 + __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.2633 + %} 1.2634 + ins_pipe( pipe_slow ); 1.2635 +%} 1.2636 + 1.2637 +// Shorts/Chars vector add 1.2638 +instruct vadd2S(vecS dst, vecS src) %{ 1.2639 + predicate(n->as_Vector()->length() == 2); 1.2640 + match(Set dst (AddVS dst src)); 1.2641 + format %{ "paddw $dst,$src\t! add packed2S" %} 1.2642 + ins_encode %{ 1.2643 + __ paddw($dst$$XMMRegister, $src$$XMMRegister); 1.2644 + %} 1.2645 + ins_pipe( pipe_slow ); 1.2646 +%} 1.2647 + 1.2648 +instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ 1.2649 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.2650 + match(Set dst (AddVS src1 src2)); 1.2651 + format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 1.2652 + ins_encode %{ 1.2653 + bool vector256 = false; 1.2654 + __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.2655 + %} 1.2656 + ins_pipe( pipe_slow ); 1.2657 +%} 1.2658 + 1.2659 +instruct vadd4S(vecD dst, vecD src) %{ 1.2660 + predicate(n->as_Vector()->length() == 4); 1.2661 + match(Set dst (AddVS dst src)); 1.2662 + format %{ "paddw $dst,$src\t! add packed4S" %} 1.2663 + ins_encode %{ 1.2664 + __ paddw($dst$$XMMRegister, $src$$XMMRegister); 1.2665 + %} 1.2666 + ins_pipe( pipe_slow ); 1.2667 +%} 1.2668 + 1.2669 +instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ 1.2670 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.2671 + match(Set dst (AddVS src1 src2)); 1.2672 + format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 1.2673 + ins_encode %{ 1.2674 + bool vector256 = false; 1.2675 + __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.2676 + %} 1.2677 + ins_pipe( pipe_slow ); 1.2678 +%} 1.2679 + 1.2680 +instruct vadd8S(vecX dst, vecX src) %{ 1.2681 + predicate(n->as_Vector()->length() == 8); 1.2682 + match(Set dst (AddVS dst src)); 1.2683 + format %{ "paddw $dst,$src\t! add packed8S" %} 1.2684 + ins_encode %{ 1.2685 + __ paddw($dst$$XMMRegister, $src$$XMMRegister); 1.2686 + %} 1.2687 + ins_pipe( pipe_slow ); 1.2688 +%} 1.2689 + 1.2690 +instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ 1.2691 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.2692 + match(Set dst (AddVS src1 src2)); 1.2693 + format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 1.2694 + ins_encode %{ 1.2695 + bool vector256 = false; 1.2696 + __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.2697 + %} 1.2698 + ins_pipe( pipe_slow ); 1.2699 +%} 1.2700 + 1.2701 +instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ 1.2702 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.2703 + match(Set dst (AddVS src (LoadVector mem))); 1.2704 + format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 1.2705 + ins_encode %{ 1.2706 + bool vector256 = false; 1.2707 + __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.2708 + %} 1.2709 + ins_pipe( pipe_slow ); 1.2710 +%} 1.2711 + 1.2712 +instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ 1.2713 + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 1.2714 + match(Set dst (AddVS src1 src2)); 1.2715 + format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 1.2716 + ins_encode %{ 1.2717 + bool vector256 = true; 1.2718 + __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.2719 + %} 1.2720 + ins_pipe( pipe_slow ); 1.2721 +%} 1.2722 + 1.2723 +instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ 1.2724 + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 1.2725 + match(Set dst (AddVS src (LoadVector mem))); 1.2726 + format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 1.2727 + ins_encode %{ 1.2728 + bool vector256 = true; 1.2729 + __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.2730 + %} 1.2731 + ins_pipe( pipe_slow ); 1.2732 +%} 1.2733 + 1.2734 +// Integers vector add 1.2735 +instruct vadd2I(vecD dst, vecD src) %{ 1.2736 + predicate(n->as_Vector()->length() == 2); 1.2737 + match(Set dst (AddVI dst src)); 1.2738 + format %{ "paddd $dst,$src\t! add packed2I" %} 1.2739 + ins_encode %{ 1.2740 + __ paddd($dst$$XMMRegister, $src$$XMMRegister); 1.2741 + %} 1.2742 + ins_pipe( pipe_slow ); 1.2743 +%} 1.2744 + 1.2745 +instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 1.2746 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.2747 + match(Set dst (AddVI src1 src2)); 1.2748 + format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 1.2749 + ins_encode %{ 1.2750 + bool vector256 = false; 1.2751 + __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.2752 + %} 1.2753 + ins_pipe( pipe_slow ); 1.2754 +%} 1.2755 + 1.2756 +instruct vadd4I(vecX dst, vecX src) %{ 1.2757 + predicate(n->as_Vector()->length() == 4); 1.2758 + match(Set dst (AddVI dst src)); 1.2759 + format %{ "paddd $dst,$src\t! add packed4I" %} 1.2760 + ins_encode %{ 1.2761 + __ paddd($dst$$XMMRegister, $src$$XMMRegister); 1.2762 + %} 1.2763 + ins_pipe( pipe_slow ); 1.2764 +%} 1.2765 + 1.2766 +instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 1.2767 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.2768 + match(Set dst (AddVI src1 src2)); 1.2769 + format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 1.2770 + ins_encode %{ 1.2771 + bool vector256 = false; 1.2772 + __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.2773 + %} 1.2774 + ins_pipe( pipe_slow ); 1.2775 +%} 1.2776 + 1.2777 +instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 1.2778 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.2779 + match(Set dst (AddVI src (LoadVector mem))); 1.2780 + format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 1.2781 + ins_encode %{ 1.2782 + bool vector256 = false; 1.2783 + __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.2784 + %} 1.2785 + ins_pipe( pipe_slow ); 1.2786 +%} 1.2787 + 1.2788 +instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 1.2789 + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 1.2790 + match(Set dst (AddVI src1 src2)); 1.2791 + format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 1.2792 + ins_encode %{ 1.2793 + bool vector256 = true; 1.2794 + __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.2795 + %} 1.2796 + ins_pipe( pipe_slow ); 1.2797 +%} 1.2798 + 1.2799 +instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 1.2800 + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 1.2801 + match(Set dst (AddVI src (LoadVector mem))); 1.2802 + format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 1.2803 + ins_encode %{ 1.2804 + bool vector256 = true; 1.2805 + __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.2806 + %} 1.2807 + ins_pipe( pipe_slow ); 1.2808 +%} 1.2809 + 1.2810 +// Longs vector add 1.2811 +instruct vadd2L(vecX dst, vecX src) %{ 1.2812 + predicate(n->as_Vector()->length() == 2); 1.2813 + match(Set dst (AddVL dst src)); 1.2814 + format %{ "paddq $dst,$src\t! add packed2L" %} 1.2815 + ins_encode %{ 1.2816 + __ paddq($dst$$XMMRegister, $src$$XMMRegister); 1.2817 + %} 1.2818 + ins_pipe( pipe_slow ); 1.2819 +%} 1.2820 + 1.2821 +instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 1.2822 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.2823 + match(Set dst (AddVL src1 src2)); 1.2824 + format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 1.2825 + ins_encode %{ 1.2826 + bool vector256 = false; 1.2827 + __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.2828 + %} 1.2829 + ins_pipe( pipe_slow ); 1.2830 +%} 1.2831 + 1.2832 +instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 1.2833 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.2834 + match(Set dst (AddVL src (LoadVector mem))); 1.2835 + format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 1.2836 + ins_encode %{ 1.2837 + bool vector256 = false; 1.2838 + __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.2839 + %} 1.2840 + ins_pipe( pipe_slow ); 1.2841 +%} 1.2842 + 1.2843 +instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 1.2844 + predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 1.2845 + match(Set dst (AddVL src1 src2)); 1.2846 + format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 1.2847 + ins_encode %{ 1.2848 + bool vector256 = true; 1.2849 + __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.2850 + %} 1.2851 + ins_pipe( pipe_slow ); 1.2852 +%} 1.2853 + 1.2854 +instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 1.2855 + predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 1.2856 + match(Set dst (AddVL src (LoadVector mem))); 1.2857 + format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 1.2858 + ins_encode %{ 1.2859 + bool vector256 = true; 1.2860 + __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.2861 + %} 1.2862 + ins_pipe( pipe_slow ); 1.2863 +%} 1.2864 + 1.2865 +// Floats vector add 1.2866 +instruct vadd2F(vecD dst, vecD src) %{ 1.2867 + predicate(n->as_Vector()->length() == 2); 1.2868 + match(Set dst (AddVF dst src)); 1.2869 + format %{ "addps $dst,$src\t! add packed2F" %} 1.2870 + ins_encode %{ 1.2871 + __ addps($dst$$XMMRegister, $src$$XMMRegister); 1.2872 + %} 1.2873 + ins_pipe( pipe_slow ); 1.2874 +%} 1.2875 + 1.2876 +instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 1.2877 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.2878 + match(Set dst (AddVF src1 src2)); 1.2879 + format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 1.2880 + ins_encode %{ 1.2881 + bool vector256 = false; 1.2882 + __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.2883 + %} 1.2884 + ins_pipe( pipe_slow ); 1.2885 +%} 1.2886 + 1.2887 +instruct vadd4F(vecX dst, vecX src) %{ 1.2888 + predicate(n->as_Vector()->length() == 4); 1.2889 + match(Set dst (AddVF dst src)); 1.2890 + format %{ "addps $dst,$src\t! add packed4F" %} 1.2891 + ins_encode %{ 1.2892 + __ addps($dst$$XMMRegister, $src$$XMMRegister); 1.2893 + %} 1.2894 + ins_pipe( pipe_slow ); 1.2895 +%} 1.2896 + 1.2897 +instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 1.2898 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.2899 + match(Set dst (AddVF src1 src2)); 1.2900 + format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 1.2901 + ins_encode %{ 1.2902 + bool vector256 = false; 1.2903 + __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.2904 + %} 1.2905 + ins_pipe( pipe_slow ); 1.2906 +%} 1.2907 + 1.2908 +instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 1.2909 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.2910 + match(Set dst (AddVF src (LoadVector mem))); 1.2911 + format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 1.2912 + ins_encode %{ 1.2913 + bool vector256 = false; 1.2914 + __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.2915 + %} 1.2916 + ins_pipe( pipe_slow ); 1.2917 +%} 1.2918 + 1.2919 +instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 1.2920 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.2921 + match(Set dst (AddVF src1 src2)); 1.2922 + format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 1.2923 + ins_encode %{ 1.2924 + bool vector256 = true; 1.2925 + __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.2926 + %} 1.2927 + ins_pipe( pipe_slow ); 1.2928 +%} 1.2929 + 1.2930 +instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 1.2931 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.2932 + match(Set dst (AddVF src (LoadVector mem))); 1.2933 + format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 1.2934 + ins_encode %{ 1.2935 + bool vector256 = true; 1.2936 + __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.2937 + %} 1.2938 + ins_pipe( pipe_slow ); 1.2939 +%} 1.2940 + 1.2941 +// Doubles vector add 1.2942 +instruct vadd2D(vecX dst, vecX src) %{ 1.2943 + predicate(n->as_Vector()->length() == 2); 1.2944 + match(Set dst (AddVD dst src)); 1.2945 + format %{ "addpd $dst,$src\t! add packed2D" %} 1.2946 + ins_encode %{ 1.2947 + __ addpd($dst$$XMMRegister, $src$$XMMRegister); 1.2948 + %} 1.2949 + ins_pipe( pipe_slow ); 1.2950 +%} 1.2951 + 1.2952 +instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 1.2953 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.2954 + match(Set dst (AddVD src1 src2)); 1.2955 + format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 1.2956 + ins_encode %{ 1.2957 + bool vector256 = false; 1.2958 + __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.2959 + %} 1.2960 + ins_pipe( pipe_slow ); 1.2961 +%} 1.2962 + 1.2963 +instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 1.2964 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.2965 + match(Set dst (AddVD src (LoadVector mem))); 1.2966 + format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 1.2967 + ins_encode %{ 1.2968 + bool vector256 = false; 1.2969 + __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.2970 + %} 1.2971 + ins_pipe( pipe_slow ); 1.2972 +%} 1.2973 + 1.2974 +instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 1.2975 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.2976 + match(Set dst (AddVD src1 src2)); 1.2977 + format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 1.2978 + ins_encode %{ 1.2979 + bool vector256 = true; 1.2980 + __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.2981 + %} 1.2982 + ins_pipe( pipe_slow ); 1.2983 +%} 1.2984 + 1.2985 +instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 1.2986 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.2987 + match(Set dst (AddVD src (LoadVector mem))); 1.2988 + format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 1.2989 + ins_encode %{ 1.2990 + bool vector256 = true; 1.2991 + __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.2992 + %} 1.2993 + ins_pipe( pipe_slow ); 1.2994 +%} 1.2995 + 1.2996 +// --------------------------------- SUB -------------------------------------- 1.2997 + 1.2998 +// Bytes vector sub 1.2999 +instruct vsub4B(vecS dst, vecS src) %{ 1.3000 + predicate(n->as_Vector()->length() == 4); 1.3001 + match(Set dst (SubVB dst src)); 1.3002 + format %{ "psubb $dst,$src\t! sub packed4B" %} 1.3003 + ins_encode %{ 1.3004 + __ psubb($dst$$XMMRegister, $src$$XMMRegister); 1.3005 + %} 1.3006 + ins_pipe( pipe_slow ); 1.3007 +%} 1.3008 + 1.3009 +instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ 1.3010 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.3011 + match(Set dst (SubVB src1 src2)); 1.3012 + format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 1.3013 + ins_encode %{ 1.3014 + bool vector256 = false; 1.3015 + __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3016 + %} 1.3017 + ins_pipe( pipe_slow ); 1.3018 +%} 1.3019 + 1.3020 +instruct vsub8B(vecD dst, vecD src) %{ 1.3021 + predicate(n->as_Vector()->length() == 8); 1.3022 + match(Set dst (SubVB dst src)); 1.3023 + format %{ "psubb $dst,$src\t! sub packed8B" %} 1.3024 + ins_encode %{ 1.3025 + __ psubb($dst$$XMMRegister, $src$$XMMRegister); 1.3026 + %} 1.3027 + ins_pipe( pipe_slow ); 1.3028 +%} 1.3029 + 1.3030 +instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ 1.3031 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.3032 + match(Set dst (SubVB src1 src2)); 1.3033 + format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 1.3034 + ins_encode %{ 1.3035 + bool vector256 = false; 1.3036 + __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3037 + %} 1.3038 + ins_pipe( pipe_slow ); 1.3039 +%} 1.3040 + 1.3041 +instruct vsub16B(vecX dst, vecX src) %{ 1.3042 + predicate(n->as_Vector()->length() == 16); 1.3043 + match(Set dst (SubVB dst src)); 1.3044 + format %{ "psubb $dst,$src\t! sub packed16B" %} 1.3045 + ins_encode %{ 1.3046 + __ psubb($dst$$XMMRegister, $src$$XMMRegister); 1.3047 + %} 1.3048 + ins_pipe( pipe_slow ); 1.3049 +%} 1.3050 + 1.3051 +instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ 1.3052 + predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 1.3053 + match(Set dst (SubVB src1 src2)); 1.3054 + format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 1.3055 + ins_encode %{ 1.3056 + bool vector256 = false; 1.3057 + __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3058 + %} 1.3059 + ins_pipe( pipe_slow ); 1.3060 +%} 1.3061 + 1.3062 +instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ 1.3063 + predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 1.3064 + match(Set dst (SubVB src (LoadVector mem))); 1.3065 + format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 1.3066 + ins_encode %{ 1.3067 + bool vector256 = false; 1.3068 + __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.3069 + %} 1.3070 + ins_pipe( pipe_slow ); 1.3071 +%} 1.3072 + 1.3073 +instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ 1.3074 + predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 1.3075 + match(Set dst (SubVB src1 src2)); 1.3076 + format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 1.3077 + ins_encode %{ 1.3078 + bool vector256 = true; 1.3079 + __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3080 + %} 1.3081 + ins_pipe( pipe_slow ); 1.3082 +%} 1.3083 + 1.3084 +instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ 1.3085 + predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 1.3086 + match(Set dst (SubVB src (LoadVector mem))); 1.3087 + format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 1.3088 + ins_encode %{ 1.3089 + bool vector256 = true; 1.3090 + __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.3091 + %} 1.3092 + ins_pipe( pipe_slow ); 1.3093 +%} 1.3094 + 1.3095 +// Shorts/Chars vector sub 1.3096 +instruct vsub2S(vecS dst, vecS src) %{ 1.3097 + predicate(n->as_Vector()->length() == 2); 1.3098 + match(Set dst (SubVS dst src)); 1.3099 + format %{ "psubw $dst,$src\t! sub packed2S" %} 1.3100 + ins_encode %{ 1.3101 + __ psubw($dst$$XMMRegister, $src$$XMMRegister); 1.3102 + %} 1.3103 + ins_pipe( pipe_slow ); 1.3104 +%} 1.3105 + 1.3106 +instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ 1.3107 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.3108 + match(Set dst (SubVS src1 src2)); 1.3109 + format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 1.3110 + ins_encode %{ 1.3111 + bool vector256 = false; 1.3112 + __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3113 + %} 1.3114 + ins_pipe( pipe_slow ); 1.3115 +%} 1.3116 + 1.3117 +instruct vsub4S(vecD dst, vecD src) %{ 1.3118 + predicate(n->as_Vector()->length() == 4); 1.3119 + match(Set dst (SubVS dst src)); 1.3120 + format %{ "psubw $dst,$src\t! sub packed4S" %} 1.3121 + ins_encode %{ 1.3122 + __ psubw($dst$$XMMRegister, $src$$XMMRegister); 1.3123 + %} 1.3124 + ins_pipe( pipe_slow ); 1.3125 +%} 1.3126 + 1.3127 +instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ 1.3128 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.3129 + match(Set dst (SubVS src1 src2)); 1.3130 + format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 1.3131 + ins_encode %{ 1.3132 + bool vector256 = false; 1.3133 + __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3134 + %} 1.3135 + ins_pipe( pipe_slow ); 1.3136 +%} 1.3137 + 1.3138 +instruct vsub8S(vecX dst, vecX src) %{ 1.3139 + predicate(n->as_Vector()->length() == 8); 1.3140 + match(Set dst (SubVS dst src)); 1.3141 + format %{ "psubw $dst,$src\t! sub packed8S" %} 1.3142 + ins_encode %{ 1.3143 + __ psubw($dst$$XMMRegister, $src$$XMMRegister); 1.3144 + %} 1.3145 + ins_pipe( pipe_slow ); 1.3146 +%} 1.3147 + 1.3148 +instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ 1.3149 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.3150 + match(Set dst (SubVS src1 src2)); 1.3151 + format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 1.3152 + ins_encode %{ 1.3153 + bool vector256 = false; 1.3154 + __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3155 + %} 1.3156 + ins_pipe( pipe_slow ); 1.3157 +%} 1.3158 + 1.3159 +instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ 1.3160 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.3161 + match(Set dst (SubVS src (LoadVector mem))); 1.3162 + format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 1.3163 + ins_encode %{ 1.3164 + bool vector256 = false; 1.3165 + __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.3166 + %} 1.3167 + ins_pipe( pipe_slow ); 1.3168 +%} 1.3169 + 1.3170 +instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ 1.3171 + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 1.3172 + match(Set dst (SubVS src1 src2)); 1.3173 + format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 1.3174 + ins_encode %{ 1.3175 + bool vector256 = true; 1.3176 + __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3177 + %} 1.3178 + ins_pipe( pipe_slow ); 1.3179 +%} 1.3180 + 1.3181 +instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ 1.3182 + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 1.3183 + match(Set dst (SubVS src (LoadVector mem))); 1.3184 + format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 1.3185 + ins_encode %{ 1.3186 + bool vector256 = true; 1.3187 + __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.3188 + %} 1.3189 + ins_pipe( pipe_slow ); 1.3190 +%} 1.3191 + 1.3192 +// Integers vector sub 1.3193 +instruct vsub2I(vecD dst, vecD src) %{ 1.3194 + predicate(n->as_Vector()->length() == 2); 1.3195 + match(Set dst (SubVI dst src)); 1.3196 + format %{ "psubd $dst,$src\t! sub packed2I" %} 1.3197 + ins_encode %{ 1.3198 + __ psubd($dst$$XMMRegister, $src$$XMMRegister); 1.3199 + %} 1.3200 + ins_pipe( pipe_slow ); 1.3201 +%} 1.3202 + 1.3203 +instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 1.3204 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.3205 + match(Set dst (SubVI src1 src2)); 1.3206 + format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 1.3207 + ins_encode %{ 1.3208 + bool vector256 = false; 1.3209 + __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3210 + %} 1.3211 + ins_pipe( pipe_slow ); 1.3212 +%} 1.3213 + 1.3214 +instruct vsub4I(vecX dst, vecX src) %{ 1.3215 + predicate(n->as_Vector()->length() == 4); 1.3216 + match(Set dst (SubVI dst src)); 1.3217 + format %{ "psubd $dst,$src\t! sub packed4I" %} 1.3218 + ins_encode %{ 1.3219 + __ psubd($dst$$XMMRegister, $src$$XMMRegister); 1.3220 + %} 1.3221 + ins_pipe( pipe_slow ); 1.3222 +%} 1.3223 + 1.3224 +instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 1.3225 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.3226 + match(Set dst (SubVI src1 src2)); 1.3227 + format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 1.3228 + ins_encode %{ 1.3229 + bool vector256 = false; 1.3230 + __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3231 + %} 1.3232 + ins_pipe( pipe_slow ); 1.3233 +%} 1.3234 + 1.3235 +instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 1.3236 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.3237 + match(Set dst (SubVI src (LoadVector mem))); 1.3238 + format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 1.3239 + ins_encode %{ 1.3240 + bool vector256 = false; 1.3241 + __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.3242 + %} 1.3243 + ins_pipe( pipe_slow ); 1.3244 +%} 1.3245 + 1.3246 +instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 1.3247 + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 1.3248 + match(Set dst (SubVI src1 src2)); 1.3249 + format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 1.3250 + ins_encode %{ 1.3251 + bool vector256 = true; 1.3252 + __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3253 + %} 1.3254 + ins_pipe( pipe_slow ); 1.3255 +%} 1.3256 + 1.3257 +instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 1.3258 + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 1.3259 + match(Set dst (SubVI src (LoadVector mem))); 1.3260 + format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 1.3261 + ins_encode %{ 1.3262 + bool vector256 = true; 1.3263 + __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.3264 + %} 1.3265 + ins_pipe( pipe_slow ); 1.3266 +%} 1.3267 + 1.3268 +// Longs vector sub 1.3269 +instruct vsub2L(vecX dst, vecX src) %{ 1.3270 + predicate(n->as_Vector()->length() == 2); 1.3271 + match(Set dst (SubVL dst src)); 1.3272 + format %{ "psubq $dst,$src\t! sub packed2L" %} 1.3273 + ins_encode %{ 1.3274 + __ psubq($dst$$XMMRegister, $src$$XMMRegister); 1.3275 + %} 1.3276 + ins_pipe( pipe_slow ); 1.3277 +%} 1.3278 + 1.3279 +instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 1.3280 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.3281 + match(Set dst (SubVL src1 src2)); 1.3282 + format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 1.3283 + ins_encode %{ 1.3284 + bool vector256 = false; 1.3285 + __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3286 + %} 1.3287 + ins_pipe( pipe_slow ); 1.3288 +%} 1.3289 + 1.3290 +instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 1.3291 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.3292 + match(Set dst (SubVL src (LoadVector mem))); 1.3293 + format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 1.3294 + ins_encode %{ 1.3295 + bool vector256 = false; 1.3296 + __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.3297 + %} 1.3298 + ins_pipe( pipe_slow ); 1.3299 +%} 1.3300 + 1.3301 +instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 1.3302 + predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 1.3303 + match(Set dst (SubVL src1 src2)); 1.3304 + format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 1.3305 + ins_encode %{ 1.3306 + bool vector256 = true; 1.3307 + __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3308 + %} 1.3309 + ins_pipe( pipe_slow ); 1.3310 +%} 1.3311 + 1.3312 +instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 1.3313 + predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 1.3314 + match(Set dst (SubVL src (LoadVector mem))); 1.3315 + format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 1.3316 + ins_encode %{ 1.3317 + bool vector256 = true; 1.3318 + __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.3319 + %} 1.3320 + ins_pipe( pipe_slow ); 1.3321 +%} 1.3322 + 1.3323 +// Floats vector sub 1.3324 +instruct vsub2F(vecD dst, vecD src) %{ 1.3325 + predicate(n->as_Vector()->length() == 2); 1.3326 + match(Set dst (SubVF dst src)); 1.3327 + format %{ "subps $dst,$src\t! sub packed2F" %} 1.3328 + ins_encode %{ 1.3329 + __ subps($dst$$XMMRegister, $src$$XMMRegister); 1.3330 + %} 1.3331 + ins_pipe( pipe_slow ); 1.3332 +%} 1.3333 + 1.3334 +instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 1.3335 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.3336 + match(Set dst (SubVF src1 src2)); 1.3337 + format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 1.3338 + ins_encode %{ 1.3339 + bool vector256 = false; 1.3340 + __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3341 + %} 1.3342 + ins_pipe( pipe_slow ); 1.3343 +%} 1.3344 + 1.3345 +instruct vsub4F(vecX dst, vecX src) %{ 1.3346 + predicate(n->as_Vector()->length() == 4); 1.3347 + match(Set dst (SubVF dst src)); 1.3348 + format %{ "subps $dst,$src\t! sub packed4F" %} 1.3349 + ins_encode %{ 1.3350 + __ subps($dst$$XMMRegister, $src$$XMMRegister); 1.3351 + %} 1.3352 + ins_pipe( pipe_slow ); 1.3353 +%} 1.3354 + 1.3355 +instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 1.3356 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.3357 + match(Set dst (SubVF src1 src2)); 1.3358 + format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 1.3359 + ins_encode %{ 1.3360 + bool vector256 = false; 1.3361 + __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3362 + %} 1.3363 + ins_pipe( pipe_slow ); 1.3364 +%} 1.3365 + 1.3366 +instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 1.3367 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.3368 + match(Set dst (SubVF src (LoadVector mem))); 1.3369 + format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 1.3370 + ins_encode %{ 1.3371 + bool vector256 = false; 1.3372 + __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.3373 + %} 1.3374 + ins_pipe( pipe_slow ); 1.3375 +%} 1.3376 + 1.3377 +instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 1.3378 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.3379 + match(Set dst (SubVF src1 src2)); 1.3380 + format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 1.3381 + ins_encode %{ 1.3382 + bool vector256 = true; 1.3383 + __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3384 + %} 1.3385 + ins_pipe( pipe_slow ); 1.3386 +%} 1.3387 + 1.3388 +instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 1.3389 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.3390 + match(Set dst (SubVF src (LoadVector mem))); 1.3391 + format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 1.3392 + ins_encode %{ 1.3393 + bool vector256 = true; 1.3394 + __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.3395 + %} 1.3396 + ins_pipe( pipe_slow ); 1.3397 +%} 1.3398 + 1.3399 +// Doubles vector sub 1.3400 +instruct vsub2D(vecX dst, vecX src) %{ 1.3401 + predicate(n->as_Vector()->length() == 2); 1.3402 + match(Set dst (SubVD dst src)); 1.3403 + format %{ "subpd $dst,$src\t! sub packed2D" %} 1.3404 + ins_encode %{ 1.3405 + __ subpd($dst$$XMMRegister, $src$$XMMRegister); 1.3406 + %} 1.3407 + ins_pipe( pipe_slow ); 1.3408 +%} 1.3409 + 1.3410 +instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 1.3411 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.3412 + match(Set dst (SubVD src1 src2)); 1.3413 + format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 1.3414 + ins_encode %{ 1.3415 + bool vector256 = false; 1.3416 + __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3417 + %} 1.3418 + ins_pipe( pipe_slow ); 1.3419 +%} 1.3420 + 1.3421 +instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 1.3422 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.3423 + match(Set dst (SubVD src (LoadVector mem))); 1.3424 + format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 1.3425 + ins_encode %{ 1.3426 + bool vector256 = false; 1.3427 + __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.3428 + %} 1.3429 + ins_pipe( pipe_slow ); 1.3430 +%} 1.3431 + 1.3432 +instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 1.3433 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.3434 + match(Set dst (SubVD src1 src2)); 1.3435 + format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 1.3436 + ins_encode %{ 1.3437 + bool vector256 = true; 1.3438 + __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3439 + %} 1.3440 + ins_pipe( pipe_slow ); 1.3441 +%} 1.3442 + 1.3443 +instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 1.3444 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.3445 + match(Set dst (SubVD src (LoadVector mem))); 1.3446 + format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 1.3447 + ins_encode %{ 1.3448 + bool vector256 = true; 1.3449 + __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.3450 + %} 1.3451 + ins_pipe( pipe_slow ); 1.3452 +%} 1.3453 + 1.3454 +// --------------------------------- MUL -------------------------------------- 1.3455 + 1.3456 +// Shorts/Chars vector mul 1.3457 +instruct vmul2S(vecS dst, vecS src) %{ 1.3458 + predicate(n->as_Vector()->length() == 2); 1.3459 + match(Set dst (MulVS dst src)); 1.3460 + format %{ "pmullw $dst,$src\t! mul packed2S" %} 1.3461 + ins_encode %{ 1.3462 + __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 1.3463 + %} 1.3464 + ins_pipe( pipe_slow ); 1.3465 +%} 1.3466 + 1.3467 +instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ 1.3468 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.3469 + match(Set dst (MulVS src1 src2)); 1.3470 + format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 1.3471 + ins_encode %{ 1.3472 + bool vector256 = false; 1.3473 + __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3474 + %} 1.3475 + ins_pipe( pipe_slow ); 1.3476 +%} 1.3477 + 1.3478 +instruct vmul4S(vecD dst, vecD src) %{ 1.3479 + predicate(n->as_Vector()->length() == 4); 1.3480 + match(Set dst (MulVS dst src)); 1.3481 + format %{ "pmullw $dst,$src\t! mul packed4S" %} 1.3482 + ins_encode %{ 1.3483 + __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 1.3484 + %} 1.3485 + ins_pipe( pipe_slow ); 1.3486 +%} 1.3487 + 1.3488 +instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ 1.3489 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.3490 + match(Set dst (MulVS src1 src2)); 1.3491 + format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 1.3492 + ins_encode %{ 1.3493 + bool vector256 = false; 1.3494 + __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3495 + %} 1.3496 + ins_pipe( pipe_slow ); 1.3497 +%} 1.3498 + 1.3499 +instruct vmul8S(vecX dst, vecX src) %{ 1.3500 + predicate(n->as_Vector()->length() == 8); 1.3501 + match(Set dst (MulVS dst src)); 1.3502 + format %{ "pmullw $dst,$src\t! mul packed8S" %} 1.3503 + ins_encode %{ 1.3504 + __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 1.3505 + %} 1.3506 + ins_pipe( pipe_slow ); 1.3507 +%} 1.3508 + 1.3509 +instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ 1.3510 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.3511 + match(Set dst (MulVS src1 src2)); 1.3512 + format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 1.3513 + ins_encode %{ 1.3514 + bool vector256 = false; 1.3515 + __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3516 + %} 1.3517 + ins_pipe( pipe_slow ); 1.3518 +%} 1.3519 + 1.3520 +instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ 1.3521 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.3522 + match(Set dst (MulVS src (LoadVector mem))); 1.3523 + format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 1.3524 + ins_encode %{ 1.3525 + bool vector256 = false; 1.3526 + __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.3527 + %} 1.3528 + ins_pipe( pipe_slow ); 1.3529 +%} 1.3530 + 1.3531 +instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ 1.3532 + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 1.3533 + match(Set dst (MulVS src1 src2)); 1.3534 + format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 1.3535 + ins_encode %{ 1.3536 + bool vector256 = true; 1.3537 + __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3538 + %} 1.3539 + ins_pipe( pipe_slow ); 1.3540 +%} 1.3541 + 1.3542 +instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ 1.3543 + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 1.3544 + match(Set dst (MulVS src (LoadVector mem))); 1.3545 + format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 1.3546 + ins_encode %{ 1.3547 + bool vector256 = true; 1.3548 + __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.3549 + %} 1.3550 + ins_pipe( pipe_slow ); 1.3551 +%} 1.3552 + 1.3553 +// Integers vector mul (sse4_1) 1.3554 +instruct vmul2I(vecD dst, vecD src) %{ 1.3555 + predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 1.3556 + match(Set dst (MulVI dst src)); 1.3557 + format %{ "pmulld $dst,$src\t! mul packed2I" %} 1.3558 + ins_encode %{ 1.3559 + __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 1.3560 + %} 1.3561 + ins_pipe( pipe_slow ); 1.3562 +%} 1.3563 + 1.3564 +instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 1.3565 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.3566 + match(Set dst (MulVI src1 src2)); 1.3567 + format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 1.3568 + ins_encode %{ 1.3569 + bool vector256 = false; 1.3570 + __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3571 + %} 1.3572 + ins_pipe( pipe_slow ); 1.3573 +%} 1.3574 + 1.3575 +instruct vmul4I(vecX dst, vecX src) %{ 1.3576 + predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 1.3577 + match(Set dst (MulVI dst src)); 1.3578 + format %{ "pmulld $dst,$src\t! mul packed4I" %} 1.3579 + ins_encode %{ 1.3580 + __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 1.3581 + %} 1.3582 + ins_pipe( pipe_slow ); 1.3583 +%} 1.3584 + 1.3585 +instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 1.3586 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.3587 + match(Set dst (MulVI src1 src2)); 1.3588 + format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 1.3589 + ins_encode %{ 1.3590 + bool vector256 = false; 1.3591 + __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3592 + %} 1.3593 + ins_pipe( pipe_slow ); 1.3594 +%} 1.3595 + 1.3596 +instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 1.3597 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.3598 + match(Set dst (MulVI src (LoadVector mem))); 1.3599 + format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 1.3600 + ins_encode %{ 1.3601 + bool vector256 = false; 1.3602 + __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.3603 + %} 1.3604 + ins_pipe( pipe_slow ); 1.3605 +%} 1.3606 + 1.3607 +instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 1.3608 + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 1.3609 + match(Set dst (MulVI src1 src2)); 1.3610 + format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 1.3611 + ins_encode %{ 1.3612 + bool vector256 = true; 1.3613 + __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3614 + %} 1.3615 + ins_pipe( pipe_slow ); 1.3616 +%} 1.3617 + 1.3618 +instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 1.3619 + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 1.3620 + match(Set dst (MulVI src (LoadVector mem))); 1.3621 + format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 1.3622 + ins_encode %{ 1.3623 + bool vector256 = true; 1.3624 + __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.3625 + %} 1.3626 + ins_pipe( pipe_slow ); 1.3627 +%} 1.3628 + 1.3629 +// Floats vector mul 1.3630 +instruct vmul2F(vecD dst, vecD src) %{ 1.3631 + predicate(n->as_Vector()->length() == 2); 1.3632 + match(Set dst (MulVF dst src)); 1.3633 + format %{ "mulps $dst,$src\t! mul packed2F" %} 1.3634 + ins_encode %{ 1.3635 + __ mulps($dst$$XMMRegister, $src$$XMMRegister); 1.3636 + %} 1.3637 + ins_pipe( pipe_slow ); 1.3638 +%} 1.3639 + 1.3640 +instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 1.3641 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.3642 + match(Set dst (MulVF src1 src2)); 1.3643 + format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 1.3644 + ins_encode %{ 1.3645 + bool vector256 = false; 1.3646 + __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3647 + %} 1.3648 + ins_pipe( pipe_slow ); 1.3649 +%} 1.3650 + 1.3651 +instruct vmul4F(vecX dst, vecX src) %{ 1.3652 + predicate(n->as_Vector()->length() == 4); 1.3653 + match(Set dst (MulVF dst src)); 1.3654 + format %{ "mulps $dst,$src\t! mul packed4F" %} 1.3655 + ins_encode %{ 1.3656 + __ mulps($dst$$XMMRegister, $src$$XMMRegister); 1.3657 + %} 1.3658 + ins_pipe( pipe_slow ); 1.3659 +%} 1.3660 + 1.3661 +instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 1.3662 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.3663 + match(Set dst (MulVF src1 src2)); 1.3664 + format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 1.3665 + ins_encode %{ 1.3666 + bool vector256 = false; 1.3667 + __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3668 + %} 1.3669 + ins_pipe( pipe_slow ); 1.3670 +%} 1.3671 + 1.3672 +instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 1.3673 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.3674 + match(Set dst (MulVF src (LoadVector mem))); 1.3675 + format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 1.3676 + ins_encode %{ 1.3677 + bool vector256 = false; 1.3678 + __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.3679 + %} 1.3680 + ins_pipe( pipe_slow ); 1.3681 +%} 1.3682 + 1.3683 +instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 1.3684 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.3685 + match(Set dst (MulVF src1 src2)); 1.3686 + format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 1.3687 + ins_encode %{ 1.3688 + bool vector256 = true; 1.3689 + __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3690 + %} 1.3691 + ins_pipe( pipe_slow ); 1.3692 +%} 1.3693 + 1.3694 +instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 1.3695 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.3696 + match(Set dst (MulVF src (LoadVector mem))); 1.3697 + format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 1.3698 + ins_encode %{ 1.3699 + bool vector256 = true; 1.3700 + __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.3701 + %} 1.3702 + ins_pipe( pipe_slow ); 1.3703 +%} 1.3704 + 1.3705 +// Doubles vector mul 1.3706 +instruct vmul2D(vecX dst, vecX src) %{ 1.3707 + predicate(n->as_Vector()->length() == 2); 1.3708 + match(Set dst (MulVD dst src)); 1.3709 + format %{ "mulpd $dst,$src\t! mul packed2D" %} 1.3710 + ins_encode %{ 1.3711 + __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 1.3712 + %} 1.3713 + ins_pipe( pipe_slow ); 1.3714 +%} 1.3715 + 1.3716 +instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 1.3717 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.3718 + match(Set dst (MulVD src1 src2)); 1.3719 + format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 1.3720 + ins_encode %{ 1.3721 + bool vector256 = false; 1.3722 + __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3723 + %} 1.3724 + ins_pipe( pipe_slow ); 1.3725 +%} 1.3726 + 1.3727 +instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 1.3728 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.3729 + match(Set dst (MulVD src (LoadVector mem))); 1.3730 + format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 1.3731 + ins_encode %{ 1.3732 + bool vector256 = false; 1.3733 + __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.3734 + %} 1.3735 + ins_pipe( pipe_slow ); 1.3736 +%} 1.3737 + 1.3738 +instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 1.3739 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.3740 + match(Set dst (MulVD src1 src2)); 1.3741 + format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 1.3742 + ins_encode %{ 1.3743 + bool vector256 = true; 1.3744 + __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3745 + %} 1.3746 + ins_pipe( pipe_slow ); 1.3747 +%} 1.3748 + 1.3749 +instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 1.3750 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.3751 + match(Set dst (MulVD src (LoadVector mem))); 1.3752 + format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 1.3753 + ins_encode %{ 1.3754 + bool vector256 = true; 1.3755 + __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.3756 + %} 1.3757 + ins_pipe( pipe_slow ); 1.3758 +%} 1.3759 + 1.3760 +// --------------------------------- DIV -------------------------------------- 1.3761 + 1.3762 +// Floats vector div 1.3763 +instruct vdiv2F(vecD dst, vecD src) %{ 1.3764 + predicate(n->as_Vector()->length() == 2); 1.3765 + match(Set dst (DivVF dst src)); 1.3766 + format %{ "divps $dst,$src\t! div packed2F" %} 1.3767 + ins_encode %{ 1.3768 + __ divps($dst$$XMMRegister, $src$$XMMRegister); 1.3769 + %} 1.3770 + ins_pipe( pipe_slow ); 1.3771 +%} 1.3772 + 1.3773 +instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 1.3774 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.3775 + match(Set dst (DivVF src1 src2)); 1.3776 + format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 1.3777 + ins_encode %{ 1.3778 + bool vector256 = false; 1.3779 + __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3780 + %} 1.3781 + ins_pipe( pipe_slow ); 1.3782 +%} 1.3783 + 1.3784 +instruct vdiv4F(vecX dst, vecX src) %{ 1.3785 + predicate(n->as_Vector()->length() == 4); 1.3786 + match(Set dst (DivVF dst src)); 1.3787 + format %{ "divps $dst,$src\t! div packed4F" %} 1.3788 + ins_encode %{ 1.3789 + __ divps($dst$$XMMRegister, $src$$XMMRegister); 1.3790 + %} 1.3791 + ins_pipe( pipe_slow ); 1.3792 +%} 1.3793 + 1.3794 +instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 1.3795 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.3796 + match(Set dst (DivVF src1 src2)); 1.3797 + format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 1.3798 + ins_encode %{ 1.3799 + bool vector256 = false; 1.3800 + __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3801 + %} 1.3802 + ins_pipe( pipe_slow ); 1.3803 +%} 1.3804 + 1.3805 +instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 1.3806 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.3807 + match(Set dst (DivVF src (LoadVector mem))); 1.3808 + format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 1.3809 + ins_encode %{ 1.3810 + bool vector256 = false; 1.3811 + __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.3812 + %} 1.3813 + ins_pipe( pipe_slow ); 1.3814 +%} 1.3815 + 1.3816 +instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 1.3817 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.3818 + match(Set dst (DivVF src1 src2)); 1.3819 + format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 1.3820 + ins_encode %{ 1.3821 + bool vector256 = true; 1.3822 + __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3823 + %} 1.3824 + ins_pipe( pipe_slow ); 1.3825 +%} 1.3826 + 1.3827 +instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 1.3828 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.3829 + match(Set dst (DivVF src (LoadVector mem))); 1.3830 + format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 1.3831 + ins_encode %{ 1.3832 + bool vector256 = true; 1.3833 + __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.3834 + %} 1.3835 + ins_pipe( pipe_slow ); 1.3836 +%} 1.3837 + 1.3838 +// Doubles vector div 1.3839 +instruct vdiv2D(vecX dst, vecX src) %{ 1.3840 + predicate(n->as_Vector()->length() == 2); 1.3841 + match(Set dst (DivVD dst src)); 1.3842 + format %{ "divpd $dst,$src\t! div packed2D" %} 1.3843 + ins_encode %{ 1.3844 + __ divpd($dst$$XMMRegister, $src$$XMMRegister); 1.3845 + %} 1.3846 + ins_pipe( pipe_slow ); 1.3847 +%} 1.3848 + 1.3849 +instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 1.3850 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.3851 + match(Set dst (DivVD src1 src2)); 1.3852 + format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 1.3853 + ins_encode %{ 1.3854 + bool vector256 = false; 1.3855 + __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3856 + %} 1.3857 + ins_pipe( pipe_slow ); 1.3858 +%} 1.3859 + 1.3860 +instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 1.3861 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.3862 + match(Set dst (DivVD src (LoadVector mem))); 1.3863 + format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 1.3864 + ins_encode %{ 1.3865 + bool vector256 = false; 1.3866 + __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.3867 + %} 1.3868 + ins_pipe( pipe_slow ); 1.3869 +%} 1.3870 + 1.3871 +instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 1.3872 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.3873 + match(Set dst (DivVD src1 src2)); 1.3874 + format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 1.3875 + ins_encode %{ 1.3876 + bool vector256 = true; 1.3877 + __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.3878 + %} 1.3879 + ins_pipe( pipe_slow ); 1.3880 +%} 1.3881 + 1.3882 +instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 1.3883 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.3884 + match(Set dst (DivVD src (LoadVector mem))); 1.3885 + format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 1.3886 + ins_encode %{ 1.3887 + bool vector256 = true; 1.3888 + __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.3889 + %} 1.3890 + ins_pipe( pipe_slow ); 1.3891 +%} 1.3892 + 1.3893 +// ------------------------------ Shift --------------------------------------- 1.3894 + 1.3895 +// Left and right shift count vectors are the same on x86 1.3896 +// (only lowest bits of xmm reg are used for count). 1.3897 +instruct vshiftcnt(vecS dst, rRegI cnt) %{ 1.3898 + match(Set dst (LShiftCntV cnt)); 1.3899 + match(Set dst (RShiftCntV cnt)); 1.3900 + format %{ "movd $dst,$cnt\t! load shift count" %} 1.3901 + ins_encode %{ 1.3902 + __ movdl($dst$$XMMRegister, $cnt$$Register); 1.3903 + %} 1.3904 + ins_pipe( pipe_slow ); 1.3905 +%} 1.3906 + 1.3907 +// ------------------------------ LeftShift ----------------------------------- 1.3908 + 1.3909 +// Shorts/Chars vector left shift 1.3910 +instruct vsll2S(vecS dst, vecS shift) %{ 1.3911 + predicate(n->as_Vector()->length() == 2); 1.3912 + match(Set dst (LShiftVS dst shift)); 1.3913 + format %{ "psllw $dst,$shift\t! left shift packed2S" %} 1.3914 + ins_encode %{ 1.3915 + __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 1.3916 + %} 1.3917 + ins_pipe( pipe_slow ); 1.3918 +%} 1.3919 + 1.3920 +instruct vsll2S_imm(vecS dst, immI8 shift) %{ 1.3921 + predicate(n->as_Vector()->length() == 2); 1.3922 + match(Set dst (LShiftVS dst shift)); 1.3923 + format %{ "psllw $dst,$shift\t! left shift packed2S" %} 1.3924 + ins_encode %{ 1.3925 + __ psllw($dst$$XMMRegister, (int)$shift$$constant); 1.3926 + %} 1.3927 + ins_pipe( pipe_slow ); 1.3928 +%} 1.3929 + 1.3930 +instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{ 1.3931 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.3932 + match(Set dst (LShiftVS src shift)); 1.3933 + format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 1.3934 + ins_encode %{ 1.3935 + bool vector256 = false; 1.3936 + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.3937 + %} 1.3938 + ins_pipe( pipe_slow ); 1.3939 +%} 1.3940 + 1.3941 +instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 1.3942 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.3943 + match(Set dst (LShiftVS src shift)); 1.3944 + format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 1.3945 + ins_encode %{ 1.3946 + bool vector256 = false; 1.3947 + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.3948 + %} 1.3949 + ins_pipe( pipe_slow ); 1.3950 +%} 1.3951 + 1.3952 +instruct vsll4S(vecD dst, vecS shift) %{ 1.3953 + predicate(n->as_Vector()->length() == 4); 1.3954 + match(Set dst (LShiftVS dst shift)); 1.3955 + format %{ "psllw $dst,$shift\t! left shift packed4S" %} 1.3956 + ins_encode %{ 1.3957 + __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 1.3958 + %} 1.3959 + ins_pipe( pipe_slow ); 1.3960 +%} 1.3961 + 1.3962 +instruct vsll4S_imm(vecD dst, immI8 shift) %{ 1.3963 + predicate(n->as_Vector()->length() == 4); 1.3964 + match(Set dst (LShiftVS dst shift)); 1.3965 + format %{ "psllw $dst,$shift\t! left shift packed4S" %} 1.3966 + ins_encode %{ 1.3967 + __ psllw($dst$$XMMRegister, (int)$shift$$constant); 1.3968 + %} 1.3969 + ins_pipe( pipe_slow ); 1.3970 +%} 1.3971 + 1.3972 +instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{ 1.3973 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.3974 + match(Set dst (LShiftVS src shift)); 1.3975 + format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 1.3976 + ins_encode %{ 1.3977 + bool vector256 = false; 1.3978 + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.3979 + %} 1.3980 + ins_pipe( pipe_slow ); 1.3981 +%} 1.3982 + 1.3983 +instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 1.3984 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.3985 + match(Set dst (LShiftVS src shift)); 1.3986 + format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 1.3987 + ins_encode %{ 1.3988 + bool vector256 = false; 1.3989 + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.3990 + %} 1.3991 + ins_pipe( pipe_slow ); 1.3992 +%} 1.3993 + 1.3994 +instruct vsll8S(vecX dst, vecS shift) %{ 1.3995 + predicate(n->as_Vector()->length() == 8); 1.3996 + match(Set dst (LShiftVS dst shift)); 1.3997 + format %{ "psllw $dst,$shift\t! left shift packed8S" %} 1.3998 + ins_encode %{ 1.3999 + __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 1.4000 + %} 1.4001 + ins_pipe( pipe_slow ); 1.4002 +%} 1.4003 + 1.4004 +instruct vsll8S_imm(vecX dst, immI8 shift) %{ 1.4005 + predicate(n->as_Vector()->length() == 8); 1.4006 + match(Set dst (LShiftVS dst shift)); 1.4007 + format %{ "psllw $dst,$shift\t! left shift packed8S" %} 1.4008 + ins_encode %{ 1.4009 + __ psllw($dst$$XMMRegister, (int)$shift$$constant); 1.4010 + %} 1.4011 + ins_pipe( pipe_slow ); 1.4012 +%} 1.4013 + 1.4014 +instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{ 1.4015 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.4016 + match(Set dst (LShiftVS src shift)); 1.4017 + format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 1.4018 + ins_encode %{ 1.4019 + bool vector256 = false; 1.4020 + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.4021 + %} 1.4022 + ins_pipe( pipe_slow ); 1.4023 +%} 1.4024 + 1.4025 +instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 1.4026 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.4027 + match(Set dst (LShiftVS src shift)); 1.4028 + format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 1.4029 + ins_encode %{ 1.4030 + bool vector256 = false; 1.4031 + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.4032 + %} 1.4033 + ins_pipe( pipe_slow ); 1.4034 +%} 1.4035 + 1.4036 +instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{ 1.4037 + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 1.4038 + match(Set dst (LShiftVS src shift)); 1.4039 + format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 1.4040 + ins_encode %{ 1.4041 + bool vector256 = true; 1.4042 + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.4043 + %} 1.4044 + ins_pipe( pipe_slow ); 1.4045 +%} 1.4046 + 1.4047 +instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 1.4048 + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 1.4049 + match(Set dst (LShiftVS src shift)); 1.4050 + format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 1.4051 + ins_encode %{ 1.4052 + bool vector256 = true; 1.4053 + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.4054 + %} 1.4055 + ins_pipe( pipe_slow ); 1.4056 +%} 1.4057 + 1.4058 +// Integers vector left shift 1.4059 +instruct vsll2I(vecD dst, vecS shift) %{ 1.4060 + predicate(n->as_Vector()->length() == 2); 1.4061 + match(Set dst (LShiftVI dst shift)); 1.4062 + format %{ "pslld $dst,$shift\t! left shift packed2I" %} 1.4063 + ins_encode %{ 1.4064 + __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 1.4065 + %} 1.4066 + ins_pipe( pipe_slow ); 1.4067 +%} 1.4068 + 1.4069 +instruct vsll2I_imm(vecD dst, immI8 shift) %{ 1.4070 + predicate(n->as_Vector()->length() == 2); 1.4071 + match(Set dst (LShiftVI dst shift)); 1.4072 + format %{ "pslld $dst,$shift\t! left shift packed2I" %} 1.4073 + ins_encode %{ 1.4074 + __ pslld($dst$$XMMRegister, (int)$shift$$constant); 1.4075 + %} 1.4076 + ins_pipe( pipe_slow ); 1.4077 +%} 1.4078 + 1.4079 +instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 1.4080 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.4081 + match(Set dst (LShiftVI src shift)); 1.4082 + format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 1.4083 + ins_encode %{ 1.4084 + bool vector256 = false; 1.4085 + __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.4086 + %} 1.4087 + ins_pipe( pipe_slow ); 1.4088 +%} 1.4089 + 1.4090 +instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 1.4091 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.4092 + match(Set dst (LShiftVI src shift)); 1.4093 + format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 1.4094 + ins_encode %{ 1.4095 + bool vector256 = false; 1.4096 + __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.4097 + %} 1.4098 + ins_pipe( pipe_slow ); 1.4099 +%} 1.4100 + 1.4101 +instruct vsll4I(vecX dst, vecS shift) %{ 1.4102 + predicate(n->as_Vector()->length() == 4); 1.4103 + match(Set dst (LShiftVI dst shift)); 1.4104 + format %{ "pslld $dst,$shift\t! left shift packed4I" %} 1.4105 + ins_encode %{ 1.4106 + __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 1.4107 + %} 1.4108 + ins_pipe( pipe_slow ); 1.4109 +%} 1.4110 + 1.4111 +instruct vsll4I_imm(vecX dst, immI8 shift) %{ 1.4112 + predicate(n->as_Vector()->length() == 4); 1.4113 + match(Set dst (LShiftVI dst shift)); 1.4114 + format %{ "pslld $dst,$shift\t! left shift packed4I" %} 1.4115 + ins_encode %{ 1.4116 + __ pslld($dst$$XMMRegister, (int)$shift$$constant); 1.4117 + %} 1.4118 + ins_pipe( pipe_slow ); 1.4119 +%} 1.4120 + 1.4121 +instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 1.4122 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.4123 + match(Set dst (LShiftVI src shift)); 1.4124 + format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 1.4125 + ins_encode %{ 1.4126 + bool vector256 = false; 1.4127 + __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.4128 + %} 1.4129 + ins_pipe( pipe_slow ); 1.4130 +%} 1.4131 + 1.4132 +instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 1.4133 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.4134 + match(Set dst (LShiftVI src shift)); 1.4135 + format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 1.4136 + ins_encode %{ 1.4137 + bool vector256 = false; 1.4138 + __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.4139 + %} 1.4140 + ins_pipe( pipe_slow ); 1.4141 +%} 1.4142 + 1.4143 +instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 1.4144 + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 1.4145 + match(Set dst (LShiftVI src shift)); 1.4146 + format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 1.4147 + ins_encode %{ 1.4148 + bool vector256 = true; 1.4149 + __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.4150 + %} 1.4151 + ins_pipe( pipe_slow ); 1.4152 +%} 1.4153 + 1.4154 +instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 1.4155 + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 1.4156 + match(Set dst (LShiftVI src shift)); 1.4157 + format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 1.4158 + ins_encode %{ 1.4159 + bool vector256 = true; 1.4160 + __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.4161 + %} 1.4162 + ins_pipe( pipe_slow ); 1.4163 +%} 1.4164 + 1.4165 +// Longs vector left shift 1.4166 +instruct vsll2L(vecX dst, vecS shift) %{ 1.4167 + predicate(n->as_Vector()->length() == 2); 1.4168 + match(Set dst (LShiftVL dst shift)); 1.4169 + format %{ "psllq $dst,$shift\t! left shift packed2L" %} 1.4170 + ins_encode %{ 1.4171 + __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 1.4172 + %} 1.4173 + ins_pipe( pipe_slow ); 1.4174 +%} 1.4175 + 1.4176 +instruct vsll2L_imm(vecX dst, immI8 shift) %{ 1.4177 + predicate(n->as_Vector()->length() == 2); 1.4178 + match(Set dst (LShiftVL dst shift)); 1.4179 + format %{ "psllq $dst,$shift\t! left shift packed2L" %} 1.4180 + ins_encode %{ 1.4181 + __ psllq($dst$$XMMRegister, (int)$shift$$constant); 1.4182 + %} 1.4183 + ins_pipe( pipe_slow ); 1.4184 +%} 1.4185 + 1.4186 +instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 1.4187 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.4188 + match(Set dst (LShiftVL src shift)); 1.4189 + format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 1.4190 + ins_encode %{ 1.4191 + bool vector256 = false; 1.4192 + __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.4193 + %} 1.4194 + ins_pipe( pipe_slow ); 1.4195 +%} 1.4196 + 1.4197 +instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 1.4198 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.4199 + match(Set dst (LShiftVL src shift)); 1.4200 + format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 1.4201 + ins_encode %{ 1.4202 + bool vector256 = false; 1.4203 + __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.4204 + %} 1.4205 + ins_pipe( pipe_slow ); 1.4206 +%} 1.4207 + 1.4208 +instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 1.4209 + predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 1.4210 + match(Set dst (LShiftVL src shift)); 1.4211 + format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 1.4212 + ins_encode %{ 1.4213 + bool vector256 = true; 1.4214 + __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.4215 + %} 1.4216 + ins_pipe( pipe_slow ); 1.4217 +%} 1.4218 + 1.4219 +instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 1.4220 + predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 1.4221 + match(Set dst (LShiftVL src shift)); 1.4222 + format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 1.4223 + ins_encode %{ 1.4224 + bool vector256 = true; 1.4225 + __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.4226 + %} 1.4227 + ins_pipe( pipe_slow ); 1.4228 +%} 1.4229 + 1.4230 +// ----------------------- LogicalRightShift ----------------------------------- 1.4231 + 1.4232 +// Shorts vector logical right shift produces incorrect Java result 1.4233 +// for negative data because java code convert short value into int with 1.4234 +// sign extension before a shift. But char vectors are fine since chars are 1.4235 +// unsigned values. 1.4236 + 1.4237 +instruct vsrl2S(vecS dst, vecS shift) %{ 1.4238 + predicate(n->as_Vector()->length() == 2); 1.4239 + match(Set dst (URShiftVS dst shift)); 1.4240 + format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 1.4241 + ins_encode %{ 1.4242 + __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 1.4243 + %} 1.4244 + ins_pipe( pipe_slow ); 1.4245 +%} 1.4246 + 1.4247 +instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 1.4248 + predicate(n->as_Vector()->length() == 2); 1.4249 + match(Set dst (URShiftVS dst shift)); 1.4250 + format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 1.4251 + ins_encode %{ 1.4252 + __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 1.4253 + %} 1.4254 + ins_pipe( pipe_slow ); 1.4255 +%} 1.4256 + 1.4257 +instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{ 1.4258 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.4259 + match(Set dst (URShiftVS src shift)); 1.4260 + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 1.4261 + ins_encode %{ 1.4262 + bool vector256 = false; 1.4263 + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.4264 + %} 1.4265 + ins_pipe( pipe_slow ); 1.4266 +%} 1.4267 + 1.4268 +instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 1.4269 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.4270 + match(Set dst (URShiftVS src shift)); 1.4271 + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 1.4272 + ins_encode %{ 1.4273 + bool vector256 = false; 1.4274 + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.4275 + %} 1.4276 + ins_pipe( pipe_slow ); 1.4277 +%} 1.4278 + 1.4279 +instruct vsrl4S(vecD dst, vecS shift) %{ 1.4280 + predicate(n->as_Vector()->length() == 4); 1.4281 + match(Set dst (URShiftVS dst shift)); 1.4282 + format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 1.4283 + ins_encode %{ 1.4284 + __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 1.4285 + %} 1.4286 + ins_pipe( pipe_slow ); 1.4287 +%} 1.4288 + 1.4289 +instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 1.4290 + predicate(n->as_Vector()->length() == 4); 1.4291 + match(Set dst (URShiftVS dst shift)); 1.4292 + format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 1.4293 + ins_encode %{ 1.4294 + __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 1.4295 + %} 1.4296 + ins_pipe( pipe_slow ); 1.4297 +%} 1.4298 + 1.4299 +instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{ 1.4300 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.4301 + match(Set dst (URShiftVS src shift)); 1.4302 + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 1.4303 + ins_encode %{ 1.4304 + bool vector256 = false; 1.4305 + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.4306 + %} 1.4307 + ins_pipe( pipe_slow ); 1.4308 +%} 1.4309 + 1.4310 +instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 1.4311 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.4312 + match(Set dst (URShiftVS src shift)); 1.4313 + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 1.4314 + ins_encode %{ 1.4315 + bool vector256 = false; 1.4316 + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.4317 + %} 1.4318 + ins_pipe( pipe_slow ); 1.4319 +%} 1.4320 + 1.4321 +instruct vsrl8S(vecX dst, vecS shift) %{ 1.4322 + predicate(n->as_Vector()->length() == 8); 1.4323 + match(Set dst (URShiftVS dst shift)); 1.4324 + format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 1.4325 + ins_encode %{ 1.4326 + __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 1.4327 + %} 1.4328 + ins_pipe( pipe_slow ); 1.4329 +%} 1.4330 + 1.4331 +instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 1.4332 + predicate(n->as_Vector()->length() == 8); 1.4333 + match(Set dst (URShiftVS dst shift)); 1.4334 + format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 1.4335 + ins_encode %{ 1.4336 + __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 1.4337 + %} 1.4338 + ins_pipe( pipe_slow ); 1.4339 +%} 1.4340 + 1.4341 +instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{ 1.4342 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.4343 + match(Set dst (URShiftVS src shift)); 1.4344 + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 1.4345 + ins_encode %{ 1.4346 + bool vector256 = false; 1.4347 + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.4348 + %} 1.4349 + ins_pipe( pipe_slow ); 1.4350 +%} 1.4351 + 1.4352 +instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 1.4353 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.4354 + match(Set dst (URShiftVS src shift)); 1.4355 + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 1.4356 + ins_encode %{ 1.4357 + bool vector256 = false; 1.4358 + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.4359 + %} 1.4360 + ins_pipe( pipe_slow ); 1.4361 +%} 1.4362 + 1.4363 +instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{ 1.4364 + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 1.4365 + match(Set dst (URShiftVS src shift)); 1.4366 + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 1.4367 + ins_encode %{ 1.4368 + bool vector256 = true; 1.4369 + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.4370 + %} 1.4371 + ins_pipe( pipe_slow ); 1.4372 +%} 1.4373 + 1.4374 +instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 1.4375 + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 1.4376 + match(Set dst (URShiftVS src shift)); 1.4377 + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 1.4378 + ins_encode %{ 1.4379 + bool vector256 = true; 1.4380 + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.4381 + %} 1.4382 + ins_pipe( pipe_slow ); 1.4383 +%} 1.4384 + 1.4385 +// Integers vector logical right shift 1.4386 +instruct vsrl2I(vecD dst, vecS shift) %{ 1.4387 + predicate(n->as_Vector()->length() == 2); 1.4388 + match(Set dst (URShiftVI dst shift)); 1.4389 + format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 1.4390 + ins_encode %{ 1.4391 + __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 1.4392 + %} 1.4393 + ins_pipe( pipe_slow ); 1.4394 +%} 1.4395 + 1.4396 +instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 1.4397 + predicate(n->as_Vector()->length() == 2); 1.4398 + match(Set dst (URShiftVI dst shift)); 1.4399 + format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 1.4400 + ins_encode %{ 1.4401 + __ psrld($dst$$XMMRegister, (int)$shift$$constant); 1.4402 + %} 1.4403 + ins_pipe( pipe_slow ); 1.4404 +%} 1.4405 + 1.4406 +instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 1.4407 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.4408 + match(Set dst (URShiftVI src shift)); 1.4409 + format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 1.4410 + ins_encode %{ 1.4411 + bool vector256 = false; 1.4412 + __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.4413 + %} 1.4414 + ins_pipe( pipe_slow ); 1.4415 +%} 1.4416 + 1.4417 +instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 1.4418 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.4419 + match(Set dst (URShiftVI src shift)); 1.4420 + format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 1.4421 + ins_encode %{ 1.4422 + bool vector256 = false; 1.4423 + __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.4424 + %} 1.4425 + ins_pipe( pipe_slow ); 1.4426 +%} 1.4427 + 1.4428 +instruct vsrl4I(vecX dst, vecS shift) %{ 1.4429 + predicate(n->as_Vector()->length() == 4); 1.4430 + match(Set dst (URShiftVI dst shift)); 1.4431 + format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 1.4432 + ins_encode %{ 1.4433 + __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 1.4434 + %} 1.4435 + ins_pipe( pipe_slow ); 1.4436 +%} 1.4437 + 1.4438 +instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 1.4439 + predicate(n->as_Vector()->length() == 4); 1.4440 + match(Set dst (URShiftVI dst shift)); 1.4441 + format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 1.4442 + ins_encode %{ 1.4443 + __ psrld($dst$$XMMRegister, (int)$shift$$constant); 1.4444 + %} 1.4445 + ins_pipe( pipe_slow ); 1.4446 +%} 1.4447 + 1.4448 +instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 1.4449 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.4450 + match(Set dst (URShiftVI src shift)); 1.4451 + format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 1.4452 + ins_encode %{ 1.4453 + bool vector256 = false; 1.4454 + __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.4455 + %} 1.4456 + ins_pipe( pipe_slow ); 1.4457 +%} 1.4458 + 1.4459 +instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 1.4460 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.4461 + match(Set dst (URShiftVI src shift)); 1.4462 + format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 1.4463 + ins_encode %{ 1.4464 + bool vector256 = false; 1.4465 + __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.4466 + %} 1.4467 + ins_pipe( pipe_slow ); 1.4468 +%} 1.4469 + 1.4470 +instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 1.4471 + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 1.4472 + match(Set dst (URShiftVI src shift)); 1.4473 + format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 1.4474 + ins_encode %{ 1.4475 + bool vector256 = true; 1.4476 + __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.4477 + %} 1.4478 + ins_pipe( pipe_slow ); 1.4479 +%} 1.4480 + 1.4481 +instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 1.4482 + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 1.4483 + match(Set dst (URShiftVI src shift)); 1.4484 + format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 1.4485 + ins_encode %{ 1.4486 + bool vector256 = true; 1.4487 + __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.4488 + %} 1.4489 + ins_pipe( pipe_slow ); 1.4490 +%} 1.4491 + 1.4492 +// Longs vector logical right shift 1.4493 +instruct vsrl2L(vecX dst, vecS shift) %{ 1.4494 + predicate(n->as_Vector()->length() == 2); 1.4495 + match(Set dst (URShiftVL dst shift)); 1.4496 + format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 1.4497 + ins_encode %{ 1.4498 + __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 1.4499 + %} 1.4500 + ins_pipe( pipe_slow ); 1.4501 +%} 1.4502 + 1.4503 +instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 1.4504 + predicate(n->as_Vector()->length() == 2); 1.4505 + match(Set dst (URShiftVL dst shift)); 1.4506 + format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 1.4507 + ins_encode %{ 1.4508 + __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 1.4509 + %} 1.4510 + ins_pipe( pipe_slow ); 1.4511 +%} 1.4512 + 1.4513 +instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 1.4514 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.4515 + match(Set dst (URShiftVL src shift)); 1.4516 + format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 1.4517 + ins_encode %{ 1.4518 + bool vector256 = false; 1.4519 + __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.4520 + %} 1.4521 + ins_pipe( pipe_slow ); 1.4522 +%} 1.4523 + 1.4524 +instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 1.4525 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.4526 + match(Set dst (URShiftVL src shift)); 1.4527 + format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 1.4528 + ins_encode %{ 1.4529 + bool vector256 = false; 1.4530 + __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.4531 + %} 1.4532 + ins_pipe( pipe_slow ); 1.4533 +%} 1.4534 + 1.4535 +instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 1.4536 + predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 1.4537 + match(Set dst (URShiftVL src shift)); 1.4538 + format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 1.4539 + ins_encode %{ 1.4540 + bool vector256 = true; 1.4541 + __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.4542 + %} 1.4543 + ins_pipe( pipe_slow ); 1.4544 +%} 1.4545 + 1.4546 +instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 1.4547 + predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 1.4548 + match(Set dst (URShiftVL src shift)); 1.4549 + format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 1.4550 + ins_encode %{ 1.4551 + bool vector256 = true; 1.4552 + __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.4553 + %} 1.4554 + ins_pipe( pipe_slow ); 1.4555 +%} 1.4556 + 1.4557 +// ------------------- ArithmeticRightShift ----------------------------------- 1.4558 + 1.4559 +// Shorts/Chars vector arithmetic right shift 1.4560 +instruct vsra2S(vecS dst, vecS shift) %{ 1.4561 + predicate(n->as_Vector()->length() == 2); 1.4562 + match(Set dst (RShiftVS dst shift)); 1.4563 + format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 1.4564 + ins_encode %{ 1.4565 + __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 1.4566 + %} 1.4567 + ins_pipe( pipe_slow ); 1.4568 +%} 1.4569 + 1.4570 +instruct vsra2S_imm(vecS dst, immI8 shift) %{ 1.4571 + predicate(n->as_Vector()->length() == 2); 1.4572 + match(Set dst (RShiftVS dst shift)); 1.4573 + format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 1.4574 + ins_encode %{ 1.4575 + __ psraw($dst$$XMMRegister, (int)$shift$$constant); 1.4576 + %} 1.4577 + ins_pipe( pipe_slow ); 1.4578 +%} 1.4579 + 1.4580 +instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{ 1.4581 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.4582 + match(Set dst (RShiftVS src shift)); 1.4583 + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 1.4584 + ins_encode %{ 1.4585 + bool vector256 = false; 1.4586 + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.4587 + %} 1.4588 + ins_pipe( pipe_slow ); 1.4589 +%} 1.4590 + 1.4591 +instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 1.4592 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.4593 + match(Set dst (RShiftVS src shift)); 1.4594 + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 1.4595 + ins_encode %{ 1.4596 + bool vector256 = false; 1.4597 + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.4598 + %} 1.4599 + ins_pipe( pipe_slow ); 1.4600 +%} 1.4601 + 1.4602 +instruct vsra4S(vecD dst, vecS shift) %{ 1.4603 + predicate(n->as_Vector()->length() == 4); 1.4604 + match(Set dst (RShiftVS dst shift)); 1.4605 + format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 1.4606 + ins_encode %{ 1.4607 + __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 1.4608 + %} 1.4609 + ins_pipe( pipe_slow ); 1.4610 +%} 1.4611 + 1.4612 +instruct vsra4S_imm(vecD dst, immI8 shift) %{ 1.4613 + predicate(n->as_Vector()->length() == 4); 1.4614 + match(Set dst (RShiftVS dst shift)); 1.4615 + format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 1.4616 + ins_encode %{ 1.4617 + __ psraw($dst$$XMMRegister, (int)$shift$$constant); 1.4618 + %} 1.4619 + ins_pipe( pipe_slow ); 1.4620 +%} 1.4621 + 1.4622 +instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{ 1.4623 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.4624 + match(Set dst (RShiftVS src shift)); 1.4625 + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 1.4626 + ins_encode %{ 1.4627 + bool vector256 = false; 1.4628 + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.4629 + %} 1.4630 + ins_pipe( pipe_slow ); 1.4631 +%} 1.4632 + 1.4633 +instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 1.4634 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.4635 + match(Set dst (RShiftVS src shift)); 1.4636 + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 1.4637 + ins_encode %{ 1.4638 + bool vector256 = false; 1.4639 + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.4640 + %} 1.4641 + ins_pipe( pipe_slow ); 1.4642 +%} 1.4643 + 1.4644 +instruct vsra8S(vecX dst, vecS shift) %{ 1.4645 + predicate(n->as_Vector()->length() == 8); 1.4646 + match(Set dst (RShiftVS dst shift)); 1.4647 + format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 1.4648 + ins_encode %{ 1.4649 + __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 1.4650 + %} 1.4651 + ins_pipe( pipe_slow ); 1.4652 +%} 1.4653 + 1.4654 +instruct vsra8S_imm(vecX dst, immI8 shift) %{ 1.4655 + predicate(n->as_Vector()->length() == 8); 1.4656 + match(Set dst (RShiftVS dst shift)); 1.4657 + format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 1.4658 + ins_encode %{ 1.4659 + __ psraw($dst$$XMMRegister, (int)$shift$$constant); 1.4660 + %} 1.4661 + ins_pipe( pipe_slow ); 1.4662 +%} 1.4663 + 1.4664 +instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{ 1.4665 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.4666 + match(Set dst (RShiftVS src shift)); 1.4667 + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 1.4668 + ins_encode %{ 1.4669 + bool vector256 = false; 1.4670 + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.4671 + %} 1.4672 + ins_pipe( pipe_slow ); 1.4673 +%} 1.4674 + 1.4675 +instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 1.4676 + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 1.4677 + match(Set dst (RShiftVS src shift)); 1.4678 + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 1.4679 + ins_encode %{ 1.4680 + bool vector256 = false; 1.4681 + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.4682 + %} 1.4683 + ins_pipe( pipe_slow ); 1.4684 +%} 1.4685 + 1.4686 +instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{ 1.4687 + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 1.4688 + match(Set dst (RShiftVS src shift)); 1.4689 + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 1.4690 + ins_encode %{ 1.4691 + bool vector256 = true; 1.4692 + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.4693 + %} 1.4694 + ins_pipe( pipe_slow ); 1.4695 +%} 1.4696 + 1.4697 +instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 1.4698 + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 1.4699 + match(Set dst (RShiftVS src shift)); 1.4700 + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 1.4701 + ins_encode %{ 1.4702 + bool vector256 = true; 1.4703 + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.4704 + %} 1.4705 + ins_pipe( pipe_slow ); 1.4706 +%} 1.4707 + 1.4708 +// Integers vector arithmetic right shift 1.4709 +instruct vsra2I(vecD dst, vecS shift) %{ 1.4710 + predicate(n->as_Vector()->length() == 2); 1.4711 + match(Set dst (RShiftVI dst shift)); 1.4712 + format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 1.4713 + ins_encode %{ 1.4714 + __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 1.4715 + %} 1.4716 + ins_pipe( pipe_slow ); 1.4717 +%} 1.4718 + 1.4719 +instruct vsra2I_imm(vecD dst, immI8 shift) %{ 1.4720 + predicate(n->as_Vector()->length() == 2); 1.4721 + match(Set dst (RShiftVI dst shift)); 1.4722 + format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 1.4723 + ins_encode %{ 1.4724 + __ psrad($dst$$XMMRegister, (int)$shift$$constant); 1.4725 + %} 1.4726 + ins_pipe( pipe_slow ); 1.4727 +%} 1.4728 + 1.4729 +instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 1.4730 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.4731 + match(Set dst (RShiftVI src shift)); 1.4732 + format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 1.4733 + ins_encode %{ 1.4734 + bool vector256 = false; 1.4735 + __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.4736 + %} 1.4737 + ins_pipe( pipe_slow ); 1.4738 +%} 1.4739 + 1.4740 +instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 1.4741 + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 1.4742 + match(Set dst (RShiftVI src shift)); 1.4743 + format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 1.4744 + ins_encode %{ 1.4745 + bool vector256 = false; 1.4746 + __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.4747 + %} 1.4748 + ins_pipe( pipe_slow ); 1.4749 +%} 1.4750 + 1.4751 +instruct vsra4I(vecX dst, vecS shift) %{ 1.4752 + predicate(n->as_Vector()->length() == 4); 1.4753 + match(Set dst (RShiftVI dst shift)); 1.4754 + format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 1.4755 + ins_encode %{ 1.4756 + __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 1.4757 + %} 1.4758 + ins_pipe( pipe_slow ); 1.4759 +%} 1.4760 + 1.4761 +instruct vsra4I_imm(vecX dst, immI8 shift) %{ 1.4762 + predicate(n->as_Vector()->length() == 4); 1.4763 + match(Set dst (RShiftVI dst shift)); 1.4764 + format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 1.4765 + ins_encode %{ 1.4766 + __ psrad($dst$$XMMRegister, (int)$shift$$constant); 1.4767 + %} 1.4768 + ins_pipe( pipe_slow ); 1.4769 +%} 1.4770 + 1.4771 +instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 1.4772 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.4773 + match(Set dst (RShiftVI src shift)); 1.4774 + format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 1.4775 + ins_encode %{ 1.4776 + bool vector256 = false; 1.4777 + __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.4778 + %} 1.4779 + ins_pipe( pipe_slow ); 1.4780 +%} 1.4781 + 1.4782 +instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 1.4783 + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 1.4784 + match(Set dst (RShiftVI src shift)); 1.4785 + format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 1.4786 + ins_encode %{ 1.4787 + bool vector256 = false; 1.4788 + __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.4789 + %} 1.4790 + ins_pipe( pipe_slow ); 1.4791 +%} 1.4792 + 1.4793 +instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 1.4794 + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 1.4795 + match(Set dst (RShiftVI src shift)); 1.4796 + format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 1.4797 + ins_encode %{ 1.4798 + bool vector256 = true; 1.4799 + __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 1.4800 + %} 1.4801 + ins_pipe( pipe_slow ); 1.4802 +%} 1.4803 + 1.4804 +instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 1.4805 + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 1.4806 + match(Set dst (RShiftVI src shift)); 1.4807 + format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 1.4808 + ins_encode %{ 1.4809 + bool vector256 = true; 1.4810 + __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 1.4811 + %} 1.4812 + ins_pipe( pipe_slow ); 1.4813 +%} 1.4814 + 1.4815 +// There are no longs vector arithmetic right shift instructions. 1.4816 + 1.4817 + 1.4818 +// --------------------------------- AND -------------------------------------- 1.4819 + 1.4820 +instruct vand4B(vecS dst, vecS src) %{ 1.4821 + predicate(n->as_Vector()->length_in_bytes() == 4); 1.4822 + match(Set dst (AndV dst src)); 1.4823 + format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 1.4824 + ins_encode %{ 1.4825 + __ pand($dst$$XMMRegister, $src$$XMMRegister); 1.4826 + %} 1.4827 + ins_pipe( pipe_slow ); 1.4828 +%} 1.4829 + 1.4830 +instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 1.4831 + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 1.4832 + match(Set dst (AndV src1 src2)); 1.4833 + format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 1.4834 + ins_encode %{ 1.4835 + bool vector256 = false; 1.4836 + __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.4837 + %} 1.4838 + ins_pipe( pipe_slow ); 1.4839 +%} 1.4840 + 1.4841 +instruct vand8B(vecD dst, vecD src) %{ 1.4842 + predicate(n->as_Vector()->length_in_bytes() == 8); 1.4843 + match(Set dst (AndV dst src)); 1.4844 + format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 1.4845 + ins_encode %{ 1.4846 + __ pand($dst$$XMMRegister, $src$$XMMRegister); 1.4847 + %} 1.4848 + ins_pipe( pipe_slow ); 1.4849 +%} 1.4850 + 1.4851 +instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 1.4852 + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 1.4853 + match(Set dst (AndV src1 src2)); 1.4854 + format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 1.4855 + ins_encode %{ 1.4856 + bool vector256 = false; 1.4857 + __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.4858 + %} 1.4859 + ins_pipe( pipe_slow ); 1.4860 +%} 1.4861 + 1.4862 +instruct vand16B(vecX dst, vecX src) %{ 1.4863 + predicate(n->as_Vector()->length_in_bytes() == 16); 1.4864 + match(Set dst (AndV dst src)); 1.4865 + format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 1.4866 + ins_encode %{ 1.4867 + __ pand($dst$$XMMRegister, $src$$XMMRegister); 1.4868 + %} 1.4869 + ins_pipe( pipe_slow ); 1.4870 +%} 1.4871 + 1.4872 +instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 1.4873 + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 1.4874 + match(Set dst (AndV src1 src2)); 1.4875 + format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 1.4876 + ins_encode %{ 1.4877 + bool vector256 = false; 1.4878 + __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.4879 + %} 1.4880 + ins_pipe( pipe_slow ); 1.4881 +%} 1.4882 + 1.4883 +instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 1.4884 + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 1.4885 + match(Set dst (AndV src (LoadVector mem))); 1.4886 + format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 1.4887 + ins_encode %{ 1.4888 + bool vector256 = false; 1.4889 + __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.4890 + %} 1.4891 + ins_pipe( pipe_slow ); 1.4892 +%} 1.4893 + 1.4894 +instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 1.4895 + predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 1.4896 + match(Set dst (AndV src1 src2)); 1.4897 + format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 1.4898 + ins_encode %{ 1.4899 + bool vector256 = true; 1.4900 + __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.4901 + %} 1.4902 + ins_pipe( pipe_slow ); 1.4903 +%} 1.4904 + 1.4905 +instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 1.4906 + predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 1.4907 + match(Set dst (AndV src (LoadVector mem))); 1.4908 + format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 1.4909 + ins_encode %{ 1.4910 + bool vector256 = true; 1.4911 + __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.4912 + %} 1.4913 + ins_pipe( pipe_slow ); 1.4914 +%} 1.4915 + 1.4916 +// --------------------------------- OR --------------------------------------- 1.4917 + 1.4918 +instruct vor4B(vecS dst, vecS src) %{ 1.4919 + predicate(n->as_Vector()->length_in_bytes() == 4); 1.4920 + match(Set dst (OrV dst src)); 1.4921 + format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 1.4922 + ins_encode %{ 1.4923 + __ por($dst$$XMMRegister, $src$$XMMRegister); 1.4924 + %} 1.4925 + ins_pipe( pipe_slow ); 1.4926 +%} 1.4927 + 1.4928 +instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 1.4929 + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 1.4930 + match(Set dst (OrV src1 src2)); 1.4931 + format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 1.4932 + ins_encode %{ 1.4933 + bool vector256 = false; 1.4934 + __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.4935 + %} 1.4936 + ins_pipe( pipe_slow ); 1.4937 +%} 1.4938 + 1.4939 +instruct vor8B(vecD dst, vecD src) %{ 1.4940 + predicate(n->as_Vector()->length_in_bytes() == 8); 1.4941 + match(Set dst (OrV dst src)); 1.4942 + format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 1.4943 + ins_encode %{ 1.4944 + __ por($dst$$XMMRegister, $src$$XMMRegister); 1.4945 + %} 1.4946 + ins_pipe( pipe_slow ); 1.4947 +%} 1.4948 + 1.4949 +instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 1.4950 + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 1.4951 + match(Set dst (OrV src1 src2)); 1.4952 + format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 1.4953 + ins_encode %{ 1.4954 + bool vector256 = false; 1.4955 + __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.4956 + %} 1.4957 + ins_pipe( pipe_slow ); 1.4958 +%} 1.4959 + 1.4960 +instruct vor16B(vecX dst, vecX src) %{ 1.4961 + predicate(n->as_Vector()->length_in_bytes() == 16); 1.4962 + match(Set dst (OrV dst src)); 1.4963 + format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 1.4964 + ins_encode %{ 1.4965 + __ por($dst$$XMMRegister, $src$$XMMRegister); 1.4966 + %} 1.4967 + ins_pipe( pipe_slow ); 1.4968 +%} 1.4969 + 1.4970 +instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 1.4971 + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 1.4972 + match(Set dst (OrV src1 src2)); 1.4973 + format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 1.4974 + ins_encode %{ 1.4975 + bool vector256 = false; 1.4976 + __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.4977 + %} 1.4978 + ins_pipe( pipe_slow ); 1.4979 +%} 1.4980 + 1.4981 +instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 1.4982 + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 1.4983 + match(Set dst (OrV src (LoadVector mem))); 1.4984 + format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 1.4985 + ins_encode %{ 1.4986 + bool vector256 = false; 1.4987 + __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.4988 + %} 1.4989 + ins_pipe( pipe_slow ); 1.4990 +%} 1.4991 + 1.4992 +instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 1.4993 + predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 1.4994 + match(Set dst (OrV src1 src2)); 1.4995 + format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 1.4996 + ins_encode %{ 1.4997 + bool vector256 = true; 1.4998 + __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.4999 + %} 1.5000 + ins_pipe( pipe_slow ); 1.5001 +%} 1.5002 + 1.5003 +instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 1.5004 + predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 1.5005 + match(Set dst (OrV src (LoadVector mem))); 1.5006 + format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 1.5007 + ins_encode %{ 1.5008 + bool vector256 = true; 1.5009 + __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.5010 + %} 1.5011 + ins_pipe( pipe_slow ); 1.5012 +%} 1.5013 + 1.5014 +// --------------------------------- XOR -------------------------------------- 1.5015 + 1.5016 +instruct vxor4B(vecS dst, vecS src) %{ 1.5017 + predicate(n->as_Vector()->length_in_bytes() == 4); 1.5018 + match(Set dst (XorV dst src)); 1.5019 + format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 1.5020 + ins_encode %{ 1.5021 + __ pxor($dst$$XMMRegister, $src$$XMMRegister); 1.5022 + %} 1.5023 + ins_pipe( pipe_slow ); 1.5024 +%} 1.5025 + 1.5026 +instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 1.5027 + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 1.5028 + match(Set dst (XorV src1 src2)); 1.5029 + format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 1.5030 + ins_encode %{ 1.5031 + bool vector256 = false; 1.5032 + __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.5033 + %} 1.5034 + ins_pipe( pipe_slow ); 1.5035 +%} 1.5036 + 1.5037 +instruct vxor8B(vecD dst, vecD src) %{ 1.5038 + predicate(n->as_Vector()->length_in_bytes() == 8); 1.5039 + match(Set dst (XorV dst src)); 1.5040 + format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 1.5041 + ins_encode %{ 1.5042 + __ pxor($dst$$XMMRegister, $src$$XMMRegister); 1.5043 + %} 1.5044 + ins_pipe( pipe_slow ); 1.5045 +%} 1.5046 + 1.5047 +instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 1.5048 + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 1.5049 + match(Set dst (XorV src1 src2)); 1.5050 + format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 1.5051 + ins_encode %{ 1.5052 + bool vector256 = false; 1.5053 + __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.5054 + %} 1.5055 + ins_pipe( pipe_slow ); 1.5056 +%} 1.5057 + 1.5058 +instruct vxor16B(vecX dst, vecX src) %{ 1.5059 + predicate(n->as_Vector()->length_in_bytes() == 16); 1.5060 + match(Set dst (XorV dst src)); 1.5061 + format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 1.5062 + ins_encode %{ 1.5063 + __ pxor($dst$$XMMRegister, $src$$XMMRegister); 1.5064 + %} 1.5065 + ins_pipe( pipe_slow ); 1.5066 +%} 1.5067 + 1.5068 +instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 1.5069 + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 1.5070 + match(Set dst (XorV src1 src2)); 1.5071 + format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 1.5072 + ins_encode %{ 1.5073 + bool vector256 = false; 1.5074 + __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.5075 + %} 1.5076 + ins_pipe( pipe_slow ); 1.5077 +%} 1.5078 + 1.5079 +instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 1.5080 + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 1.5081 + match(Set dst (XorV src (LoadVector mem))); 1.5082 + format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 1.5083 + ins_encode %{ 1.5084 + bool vector256 = false; 1.5085 + __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.5086 + %} 1.5087 + ins_pipe( pipe_slow ); 1.5088 +%} 1.5089 + 1.5090 +instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 1.5091 + predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 1.5092 + match(Set dst (XorV src1 src2)); 1.5093 + format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 1.5094 + ins_encode %{ 1.5095 + bool vector256 = true; 1.5096 + __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 1.5097 + %} 1.5098 + ins_pipe( pipe_slow ); 1.5099 +%} 1.5100 + 1.5101 +instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 1.5102 + predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 1.5103 + match(Set dst (XorV src (LoadVector mem))); 1.5104 + format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 1.5105 + ins_encode %{ 1.5106 + bool vector256 = true; 1.5107 + __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 1.5108 + %} 1.5109 + ins_pipe( pipe_slow ); 1.5110 +%} 1.5111 +