1.1 --- a/src/cpu/x86/vm/assembler_x86.hpp Wed Aug 15 16:49:38 2012 -0700 1.2 +++ b/src/cpu/x86/vm/assembler_x86.hpp Mon Aug 20 09:07:21 2012 -0700 1.3 @@ -617,6 +617,7 @@ 1.4 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) { 1.5 simd_prefix(dst, xnoreg, src, pre, opc); 1.6 } 1.7 + 1.8 void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre) { 1.9 simd_prefix(src, dst, pre); 1.10 } 1.11 @@ -626,16 +627,10 @@ 1.12 simd_prefix(dst, nds, src, pre, VEX_OPCODE_0F, rex_w); 1.13 } 1.14 1.15 - 1.16 int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, 1.17 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F, 1.18 bool rex_w = false, bool vector256 = false); 1.19 1.20 - int simd_prefix_and_encode(XMMRegister dst, XMMRegister src, 1.21 - VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) { 1.22 - return simd_prefix_and_encode(dst, xnoreg, src, pre, opc); 1.23 - } 1.24 - 1.25 // Move/convert 32-bit integer value. 1.26 int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src, 1.27 VexSimdPrefix pre) { 1.28 @@ -677,6 +672,15 @@ 1.29 void emit_arith(int op1, int op2, Register dst, jobject obj); 1.30 void emit_arith(int op1, int op2, Register dst, Register src); 1.31 1.32 + void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre); 1.33 + void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre); 1.34 + void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre); 1.35 + void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre); 1.36 + void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, 1.37 + Address src, VexSimdPrefix pre, bool vector256); 1.38 + void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, 1.39 + XMMRegister src, VexSimdPrefix pre, bool vector256); 1.40 + 1.41 void emit_operand(Register reg, 1.42 Register base, Register index, Address::ScaleFactor scale, 1.43 int disp, 1.44 @@ -891,12 +895,6 @@ 1.45 void andq(Register dst, Address src); 1.46 void andq(Register dst, Register src); 1.47 1.48 - // Bitwise Logical AND of Packed Double-Precision Floating-Point Values 1.49 - void andpd(XMMRegister dst, XMMRegister src); 1.50 - 1.51 - // Bitwise Logical AND of Packed Single-Precision Floating-Point Values 1.52 - void andps(XMMRegister dst, XMMRegister src); 1.53 - 1.54 void bsfl(Register dst, Register src); 1.55 void bsrl(Register dst, Register src); 1.56 1.57 @@ -1436,10 +1434,6 @@ 1.58 void prefetcht2(Address src); 1.59 void prefetchw(Address src); 1.60 1.61 - // POR - Bitwise logical OR 1.62 - void por(XMMRegister dst, XMMRegister src); 1.63 - void por(XMMRegister dst, Address src); 1.64 - 1.65 // Shuffle Packed Doublewords 1.66 void pshufd(XMMRegister dst, XMMRegister src, int mode); 1.67 void pshufd(XMMRegister dst, Address src, int mode); 1.68 @@ -1448,9 +1442,6 @@ 1.69 void pshuflw(XMMRegister dst, XMMRegister src, int mode); 1.70 void pshuflw(XMMRegister dst, Address src, int mode); 1.71 1.72 - // Shift Right by bits Logical Quadword Immediate 1.73 - void psrlq(XMMRegister dst, int shift); 1.74 - 1.75 // Shift Right by bytes Logical DoubleQuadword Immediate 1.76 void psrldq(XMMRegister dst, int shift); 1.77 1.78 @@ -1475,10 +1466,6 @@ 1.79 1.80 void pushq(Address src); 1.81 1.82 - // Xor Packed Byte Integer Values 1.83 - void pxor(XMMRegister dst, Address src); 1.84 - void pxor(XMMRegister dst, XMMRegister src); 1.85 - 1.86 void rcll(Register dst, int imm8); 1.87 1.88 void rclq(Register dst, int imm8); 1.89 @@ -1601,15 +1588,10 @@ 1.90 void xorq(Register dst, Address src); 1.91 void xorq(Register dst, Register src); 1.92 1.93 - // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values 1.94 - void xorpd(XMMRegister dst, XMMRegister src); 1.95 - 1.96 - // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values 1.97 - void xorps(XMMRegister dst, XMMRegister src); 1.98 - 1.99 void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0 1.100 1.101 // AVX 3-operands scalar instructions (encoded with VEX prefix) 1.102 + 1.103 void vaddsd(XMMRegister dst, XMMRegister nds, Address src); 1.104 void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src); 1.105 void vaddss(XMMRegister dst, XMMRegister nds, Address src); 1.106 @@ -1627,14 +1609,147 @@ 1.107 void vsubss(XMMRegister dst, XMMRegister nds, Address src); 1.108 void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src); 1.109 1.110 - // AVX Vector instrucitons. 1.111 - void vandpd(XMMRegister dst, XMMRegister nds, Address src); 1.112 - void vandps(XMMRegister dst, XMMRegister nds, Address src); 1.113 - void vxorpd(XMMRegister dst, XMMRegister nds, Address src); 1.114 - void vxorps(XMMRegister dst, XMMRegister nds, Address src); 1.115 + 1.116 + //====================VECTOR ARITHMETIC===================================== 1.117 + 1.118 + // Add Packed Floating-Point Values 1.119 + void addpd(XMMRegister dst, XMMRegister src); 1.120 + void addps(XMMRegister dst, XMMRegister src); 1.121 + void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.122 + void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.123 + void vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.124 + void vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.125 + 1.126 + // Subtract Packed Floating-Point Values 1.127 + void subpd(XMMRegister dst, XMMRegister src); 1.128 + void subps(XMMRegister dst, XMMRegister src); 1.129 + void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.130 + void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.131 + void vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.132 + void vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.133 + 1.134 + // Multiply Packed Floating-Point Values 1.135 + void mulpd(XMMRegister dst, XMMRegister src); 1.136 + void mulps(XMMRegister dst, XMMRegister src); 1.137 + void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.138 + void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.139 + void vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.140 + void vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.141 + 1.142 + // Divide Packed Floating-Point Values 1.143 + void divpd(XMMRegister dst, XMMRegister src); 1.144 + void divps(XMMRegister dst, XMMRegister src); 1.145 + void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.146 + void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.147 + void vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.148 + void vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.149 + 1.150 + // Bitwise Logical AND of Packed Floating-Point Values 1.151 + void andpd(XMMRegister dst, XMMRegister src); 1.152 + void andps(XMMRegister dst, XMMRegister src); 1.153 + void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.154 + void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.155 + void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.156 + void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.157 + 1.158 + // Bitwise Logical XOR of Packed Floating-Point Values 1.159 + void xorpd(XMMRegister dst, XMMRegister src); 1.160 + void xorps(XMMRegister dst, XMMRegister src); 1.161 void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.162 void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.163 + void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.164 + void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.165 + 1.166 + // Add packed integers 1.167 + void paddb(XMMRegister dst, XMMRegister src); 1.168 + void paddw(XMMRegister dst, XMMRegister src); 1.169 + void paddd(XMMRegister dst, XMMRegister src); 1.170 + void paddq(XMMRegister dst, XMMRegister src); 1.171 + void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.172 + void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.173 + void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.174 + void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.175 + void vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.176 + void vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.177 + void vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.178 + void vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.179 + 1.180 + // Sub packed integers 1.181 + void psubb(XMMRegister dst, XMMRegister src); 1.182 + void psubw(XMMRegister dst, XMMRegister src); 1.183 + void psubd(XMMRegister dst, XMMRegister src); 1.184 + void psubq(XMMRegister dst, XMMRegister src); 1.185 + void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.186 + void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.187 + void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.188 + void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.189 + void vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.190 + void vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.191 + void vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.192 + void vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.193 + 1.194 + // Multiply packed integers (only shorts and ints) 1.195 + void pmullw(XMMRegister dst, XMMRegister src); 1.196 + void pmulld(XMMRegister dst, XMMRegister src); 1.197 + void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.198 + void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.199 + void vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.200 + void vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.201 + 1.202 + // Shift left packed integers 1.203 + void psllw(XMMRegister dst, int shift); 1.204 + void pslld(XMMRegister dst, int shift); 1.205 + void psllq(XMMRegister dst, int shift); 1.206 + void psllw(XMMRegister dst, XMMRegister shift); 1.207 + void pslld(XMMRegister dst, XMMRegister shift); 1.208 + void psllq(XMMRegister dst, XMMRegister shift); 1.209 + void vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256); 1.210 + void vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256); 1.211 + void vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256); 1.212 + void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); 1.213 + void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); 1.214 + void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); 1.215 + 1.216 + // Logical shift right packed integers 1.217 + void psrlw(XMMRegister dst, int shift); 1.218 + void psrld(XMMRegister dst, int shift); 1.219 + void psrlq(XMMRegister dst, int shift); 1.220 + void psrlw(XMMRegister dst, XMMRegister shift); 1.221 + void psrld(XMMRegister dst, XMMRegister shift); 1.222 + void psrlq(XMMRegister dst, XMMRegister shift); 1.223 + void vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256); 1.224 + void vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256); 1.225 + void vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256); 1.226 + void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); 1.227 + void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); 1.228 + void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); 1.229 + 1.230 + // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs) 1.231 + void psraw(XMMRegister dst, int shift); 1.232 + void psrad(XMMRegister dst, int shift); 1.233 + void psraw(XMMRegister dst, XMMRegister shift); 1.234 + void psrad(XMMRegister dst, XMMRegister shift); 1.235 + void vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256); 1.236 + void vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256); 1.237 + void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); 1.238 + void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); 1.239 + 1.240 + // And packed integers 1.241 + void pand(XMMRegister dst, XMMRegister src); 1.242 + void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.243 + void vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.244 + 1.245 + // Or packed integers 1.246 + void por(XMMRegister dst, XMMRegister src); 1.247 + void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.248 + void vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.249 + 1.250 + // Xor packed integers 1.251 + void pxor(XMMRegister dst, XMMRegister src); 1.252 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); 1.253 + void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256); 1.254 + 1.255 + // Copy low 128bit into high 128bit of YMM registers. 1.256 void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src); 1.257 void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src); 1.258 1.259 @@ -2532,11 +2647,13 @@ 1.260 void vaddss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddss(dst, nds, src); } 1.261 void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src); 1.262 1.263 - void vandpd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vandpd(dst, nds, src); } 1.264 - void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src); 1.265 - 1.266 - void vandps(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vandps(dst, nds, src); } 1.267 - void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src); 1.268 + void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vandpd(dst, nds, src, vector256); } 1.269 + void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vandpd(dst, nds, src, vector256); } 1.270 + void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256); 1.271 + 1.272 + void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vandps(dst, nds, src, vector256); } 1.273 + void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vandps(dst, nds, src, vector256); } 1.274 + void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256); 1.275 1.276 void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); } 1.277 void vdivsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivsd(dst, nds, src); } 1.278 @@ -2565,12 +2682,12 @@ 1.279 // AVX Vector instructions 1.280 1.281 void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); } 1.282 - void vxorpd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorpd(dst, nds, src); } 1.283 - void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src); 1.284 + void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); } 1.285 + void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256); 1.286 1.287 void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); } 1.288 - void vxorps(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorps(dst, nds, src); } 1.289 - void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src); 1.290 + void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); } 1.291 + void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256); 1.292 1.293 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 1.294 if (UseAVX > 1 || !vector256) // vpxor 256 bit is available only in AVX2 1.295 @@ -2578,6 +2695,12 @@ 1.296 else 1.297 Assembler::vxorpd(dst, nds, src, vector256); 1.298 } 1.299 + void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 1.300 + if (UseAVX > 1 || !vector256) // vpxor 256 bit is available only in AVX2 1.301 + Assembler::vpxor(dst, nds, src, vector256); 1.302 + else 1.303 + Assembler::vxorpd(dst, nds, src, vector256); 1.304 + } 1.305 1.306 // Move packed integer values from low 128 bit to hign 128 bit in 256 bit vector. 1.307 void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {