src/cpu/x86/vm/assembler_x86.hpp

changeset 4001
006050192a5a
parent 3969
1d7922586cf6
child 4037
da91efe96a93
     1.1 --- a/src/cpu/x86/vm/assembler_x86.hpp	Wed Aug 15 16:49:38 2012 -0700
     1.2 +++ b/src/cpu/x86/vm/assembler_x86.hpp	Mon Aug 20 09:07:21 2012 -0700
     1.3 @@ -617,6 +617,7 @@
     1.4                     VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
     1.5      simd_prefix(dst, xnoreg, src, pre, opc);
     1.6    }
     1.7 +
     1.8    void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre) {
     1.9      simd_prefix(src, dst, pre);
    1.10    }
    1.11 @@ -626,16 +627,10 @@
    1.12      simd_prefix(dst, nds, src, pre, VEX_OPCODE_0F, rex_w);
    1.13    }
    1.14  
    1.15 -
    1.16    int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
    1.17                               VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
    1.18                               bool rex_w = false, bool vector256 = false);
    1.19  
    1.20 -  int simd_prefix_and_encode(XMMRegister dst, XMMRegister src,
    1.21 -                             VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
    1.22 -    return simd_prefix_and_encode(dst, xnoreg, src, pre, opc);
    1.23 -  }
    1.24 -
    1.25    // Move/convert 32-bit integer value.
    1.26    int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src,
    1.27                               VexSimdPrefix pre) {
    1.28 @@ -677,6 +672,15 @@
    1.29    void emit_arith(int op1, int op2, Register dst, jobject obj);
    1.30    void emit_arith(int op1, int op2, Register dst, Register src);
    1.31  
    1.32 +  void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre);
    1.33 +  void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre);
    1.34 +  void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre);
    1.35 +  void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre);
    1.36 +  void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
    1.37 +                      Address src, VexSimdPrefix pre, bool vector256);
    1.38 +  void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
    1.39 +                      XMMRegister src, VexSimdPrefix pre, bool vector256);
    1.40 +
    1.41    void emit_operand(Register reg,
    1.42                      Register base, Register index, Address::ScaleFactor scale,
    1.43                      int disp,
    1.44 @@ -891,12 +895,6 @@
    1.45    void andq(Register dst, Address src);
    1.46    void andq(Register dst, Register src);
    1.47  
    1.48 -  // Bitwise Logical AND of Packed Double-Precision Floating-Point Values
    1.49 -  void andpd(XMMRegister dst, XMMRegister src);
    1.50 -
    1.51 -  // Bitwise Logical AND of Packed Single-Precision Floating-Point Values
    1.52 -  void andps(XMMRegister dst, XMMRegister src);
    1.53 -
    1.54    void bsfl(Register dst, Register src);
    1.55    void bsrl(Register dst, Register src);
    1.56  
    1.57 @@ -1436,10 +1434,6 @@
    1.58    void prefetcht2(Address src);
    1.59    void prefetchw(Address src);
    1.60  
    1.61 -  // POR - Bitwise logical OR
    1.62 -  void por(XMMRegister dst, XMMRegister src);
    1.63 -  void por(XMMRegister dst, Address src);
    1.64 -
    1.65    // Shuffle Packed Doublewords
    1.66    void pshufd(XMMRegister dst, XMMRegister src, int mode);
    1.67    void pshufd(XMMRegister dst, Address src,     int mode);
    1.68 @@ -1448,9 +1442,6 @@
    1.69    void pshuflw(XMMRegister dst, XMMRegister src, int mode);
    1.70    void pshuflw(XMMRegister dst, Address src,     int mode);
    1.71  
    1.72 -  // Shift Right by bits Logical Quadword Immediate
    1.73 -  void psrlq(XMMRegister dst, int shift);
    1.74 -
    1.75    // Shift Right by bytes Logical DoubleQuadword Immediate
    1.76    void psrldq(XMMRegister dst, int shift);
    1.77  
    1.78 @@ -1475,10 +1466,6 @@
    1.79  
    1.80    void pushq(Address src);
    1.81  
    1.82 -  // Xor Packed Byte Integer Values
    1.83 -  void pxor(XMMRegister dst, Address src);
    1.84 -  void pxor(XMMRegister dst, XMMRegister src);
    1.85 -
    1.86    void rcll(Register dst, int imm8);
    1.87  
    1.88    void rclq(Register dst, int imm8);
    1.89 @@ -1601,15 +1588,10 @@
    1.90    void xorq(Register dst, Address src);
    1.91    void xorq(Register dst, Register src);
    1.92  
    1.93 -  // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
    1.94 -  void xorpd(XMMRegister dst, XMMRegister src);
    1.95 -
    1.96 -  // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
    1.97 -  void xorps(XMMRegister dst, XMMRegister src);
    1.98 -
    1.99    void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
   1.100  
   1.101    // AVX 3-operands scalar instructions (encoded with VEX prefix)
   1.102 +
   1.103    void vaddsd(XMMRegister dst, XMMRegister nds, Address src);
   1.104    void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
   1.105    void vaddss(XMMRegister dst, XMMRegister nds, Address src);
   1.106 @@ -1627,14 +1609,147 @@
   1.107    void vsubss(XMMRegister dst, XMMRegister nds, Address src);
   1.108    void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
   1.109  
   1.110 -  // AVX Vector instrucitons.
   1.111 -  void vandpd(XMMRegister dst, XMMRegister nds, Address src);
   1.112 -  void vandps(XMMRegister dst, XMMRegister nds, Address src);
   1.113 -  void vxorpd(XMMRegister dst, XMMRegister nds, Address src);
   1.114 -  void vxorps(XMMRegister dst, XMMRegister nds, Address src);
   1.115 +
   1.116 +  //====================VECTOR ARITHMETIC=====================================
   1.117 +
   1.118 +  // Add Packed Floating-Point Values
   1.119 +  void addpd(XMMRegister dst, XMMRegister src);
   1.120 +  void addps(XMMRegister dst, XMMRegister src);
   1.121 +  void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
   1.122 +  void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
   1.123 +  void vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
   1.124 +  void vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
   1.125 +
   1.126 +  // Subtract Packed Floating-Point Values
   1.127 +  void subpd(XMMRegister dst, XMMRegister src);
   1.128 +  void subps(XMMRegister dst, XMMRegister src);
   1.129 +  void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
   1.130 +  void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
   1.131 +  void vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
   1.132 +  void vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
   1.133 +
   1.134 +  // Multiply Packed Floating-Point Values
   1.135 +  void mulpd(XMMRegister dst, XMMRegister src);
   1.136 +  void mulps(XMMRegister dst, XMMRegister src);
   1.137 +  void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
   1.138 +  void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
   1.139 +  void vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
   1.140 +  void vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
   1.141 +
   1.142 +  // Divide Packed Floating-Point Values
   1.143 +  void divpd(XMMRegister dst, XMMRegister src);
   1.144 +  void divps(XMMRegister dst, XMMRegister src);
   1.145 +  void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
   1.146 +  void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
   1.147 +  void vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
   1.148 +  void vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
   1.149 +
   1.150 +  // Bitwise Logical AND of Packed Floating-Point Values
   1.151 +  void andpd(XMMRegister dst, XMMRegister src);
   1.152 +  void andps(XMMRegister dst, XMMRegister src);
   1.153 +  void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
   1.154 +  void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
   1.155 +  void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
   1.156 +  void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
   1.157 +
   1.158 +  // Bitwise Logical XOR of Packed Floating-Point Values
   1.159 +  void xorpd(XMMRegister dst, XMMRegister src);
   1.160 +  void xorps(XMMRegister dst, XMMRegister src);
   1.161    void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
   1.162    void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
   1.163 +  void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
   1.164 +  void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
   1.165 +
   1.166 +  // Add packed integers
   1.167 +  void paddb(XMMRegister dst, XMMRegister src);
   1.168 +  void paddw(XMMRegister dst, XMMRegister src);
   1.169 +  void paddd(XMMRegister dst, XMMRegister src);
   1.170 +  void paddq(XMMRegister dst, XMMRegister src);
   1.171 +  void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
   1.172 +  void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
   1.173 +  void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
   1.174 +  void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
   1.175 +  void vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
   1.176 +  void vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
   1.177 +  void vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
   1.178 +  void vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
   1.179 +
   1.180 +  // Sub packed integers
   1.181 +  void psubb(XMMRegister dst, XMMRegister src);
   1.182 +  void psubw(XMMRegister dst, XMMRegister src);
   1.183 +  void psubd(XMMRegister dst, XMMRegister src);
   1.184 +  void psubq(XMMRegister dst, XMMRegister src);
   1.185 +  void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
   1.186 +  void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
   1.187 +  void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
   1.188 +  void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
   1.189 +  void vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
   1.190 +  void vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
   1.191 +  void vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
   1.192 +  void vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
   1.193 +
   1.194 +  // Multiply packed integers (only shorts and ints)
   1.195 +  void pmullw(XMMRegister dst, XMMRegister src);
   1.196 +  void pmulld(XMMRegister dst, XMMRegister src);
   1.197 +  void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
   1.198 +  void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
   1.199 +  void vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
   1.200 +  void vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
   1.201 +
   1.202 +  // Shift left packed integers
   1.203 +  void psllw(XMMRegister dst, int shift);
   1.204 +  void pslld(XMMRegister dst, int shift);
   1.205 +  void psllq(XMMRegister dst, int shift);
   1.206 +  void psllw(XMMRegister dst, XMMRegister shift);
   1.207 +  void pslld(XMMRegister dst, XMMRegister shift);
   1.208 +  void psllq(XMMRegister dst, XMMRegister shift);
   1.209 +  void vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
   1.210 +  void vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256);
   1.211 +  void vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256);
   1.212 +  void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
   1.213 +  void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
   1.214 +  void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
   1.215 +
   1.216 +  // Logical shift right packed integers
   1.217 +  void psrlw(XMMRegister dst, int shift);
   1.218 +  void psrld(XMMRegister dst, int shift);
   1.219 +  void psrlq(XMMRegister dst, int shift);
   1.220 +  void psrlw(XMMRegister dst, XMMRegister shift);
   1.221 +  void psrld(XMMRegister dst, XMMRegister shift);
   1.222 +  void psrlq(XMMRegister dst, XMMRegister shift);
   1.223 +  void vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
   1.224 +  void vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256);
   1.225 +  void vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256);
   1.226 +  void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
   1.227 +  void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
   1.228 +  void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
   1.229 +
   1.230 +  // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
   1.231 +  void psraw(XMMRegister dst, int shift);
   1.232 +  void psrad(XMMRegister dst, int shift);
   1.233 +  void psraw(XMMRegister dst, XMMRegister shift);
   1.234 +  void psrad(XMMRegister dst, XMMRegister shift);
   1.235 +  void vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
   1.236 +  void vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256);
   1.237 +  void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
   1.238 +  void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
   1.239 +
   1.240 +  // And packed integers
   1.241 +  void pand(XMMRegister dst, XMMRegister src);
   1.242 +  void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
   1.243 +  void vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
   1.244 +
   1.245 +  // Or packed integers
   1.246 +  void por(XMMRegister dst, XMMRegister src);
   1.247 +  void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
   1.248 +  void vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
   1.249 +
   1.250 +  // Xor packed integers
   1.251 +  void pxor(XMMRegister dst, XMMRegister src);
   1.252    void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
   1.253 +  void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
   1.254 +
   1.255 +  // Copy low 128bit into high 128bit of YMM registers.
   1.256    void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
   1.257    void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
   1.258  
   1.259 @@ -2532,11 +2647,13 @@
   1.260    void vaddss(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vaddss(dst, nds, src); }
   1.261    void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
   1.262  
   1.263 -  void vandpd(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vandpd(dst, nds, src); }
   1.264 -  void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
   1.265 -
   1.266 -  void vandps(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vandps(dst, nds, src); }
   1.267 -  void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src);
   1.268 +  void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vandpd(dst, nds, src, vector256); }
   1.269 +  void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256)     { Assembler::vandpd(dst, nds, src, vector256); }
   1.270 +  void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
   1.271 +
   1.272 +  void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vandps(dst, nds, src, vector256); }
   1.273 +  void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256)     { Assembler::vandps(dst, nds, src, vector256); }
   1.274 +  void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
   1.275  
   1.276    void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); }
   1.277    void vdivsd(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vdivsd(dst, nds, src); }
   1.278 @@ -2565,12 +2682,12 @@
   1.279    // AVX Vector instructions
   1.280  
   1.281    void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); }
   1.282 -  void vxorpd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorpd(dst, nds, src); }
   1.283 -  void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
   1.284 +  void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); }
   1.285 +  void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
   1.286  
   1.287    void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); }
   1.288 -  void vxorps(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorps(dst, nds, src); }
   1.289 -  void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src);
   1.290 +  void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); }
   1.291 +  void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
   1.292  
   1.293    void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
   1.294      if (UseAVX > 1 || !vector256) // vpxor 256 bit is available only in AVX2
   1.295 @@ -2578,6 +2695,12 @@
   1.296      else
   1.297        Assembler::vxorpd(dst, nds, src, vector256);
   1.298    }
   1.299 +  void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
   1.300 +    if (UseAVX > 1 || !vector256) // vpxor 256 bit is available only in AVX2
   1.301 +      Assembler::vpxor(dst, nds, src, vector256);
   1.302 +    else
   1.303 +      Assembler::vxorpd(dst, nds, src, vector256);
   1.304 +  }
   1.305  
   1.306    // Move packed integer values from low 128 bit to hign 128 bit in 256 bit vector.
   1.307    void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {

mercurial