src/cpu/x86/vm/macroAssembler_x86.cpp

changeset 7025
b1bc1af04c6e
parent 6723
0bf37f737702
child 7152
166d744df0de
     1.1 --- a/src/cpu/x86/vm/macroAssembler_x86.cpp	Tue Aug 12 15:17:46 2014 +0000
     1.2 +++ b/src/cpu/x86/vm/macroAssembler_x86.cpp	Tue Aug 05 15:02:10 2014 -0700
     1.3 @@ -7316,17 +7316,34 @@
     1.4   * Fold 128-bit data chunk
     1.5   */
     1.6  void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset) {
     1.7 -  vpclmulhdq(xtmp, xK, xcrc); // [123:64]
     1.8 -  vpclmulldq(xcrc, xK, xcrc); // [63:0]
     1.9 -  vpxor(xcrc, xcrc, Address(buf, offset), false /* vector256 */);
    1.10 -  pxor(xcrc, xtmp);
    1.11 +  if (UseAVX > 0) {
    1.12 +    vpclmulhdq(xtmp, xK, xcrc); // [123:64]
    1.13 +    vpclmulldq(xcrc, xK, xcrc); // [63:0]
    1.14 +    vpxor(xcrc, xcrc, Address(buf, offset), false /* vector256 */);
    1.15 +    pxor(xcrc, xtmp);
    1.16 +  } else {
    1.17 +    movdqa(xtmp, xcrc);
    1.18 +    pclmulhdq(xtmp, xK);   // [123:64]
    1.19 +    pclmulldq(xcrc, xK);   // [63:0]
    1.20 +    pxor(xcrc, xtmp);
    1.21 +    movdqu(xtmp, Address(buf, offset));
    1.22 +    pxor(xcrc, xtmp);
    1.23 +  }
    1.24  }
    1.25  
    1.26  void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf) {
    1.27 -  vpclmulhdq(xtmp, xK, xcrc);
    1.28 -  vpclmulldq(xcrc, xK, xcrc);
    1.29 -  pxor(xcrc, xbuf);
    1.30 -  pxor(xcrc, xtmp);
    1.31 +  if (UseAVX > 0) {
    1.32 +    vpclmulhdq(xtmp, xK, xcrc);
    1.33 +    vpclmulldq(xcrc, xK, xcrc);
    1.34 +    pxor(xcrc, xbuf);
    1.35 +    pxor(xcrc, xtmp);
    1.36 +  } else {
    1.37 +    movdqa(xtmp, xcrc);
    1.38 +    pclmulhdq(xtmp, xK);
    1.39 +    pclmulldq(xcrc, xK);
    1.40 +    pxor(xcrc, xbuf);
    1.41 +    pxor(xcrc, xtmp);
    1.42 +  }
    1.43  }
    1.44  
    1.45  /**
    1.46 @@ -7444,9 +7461,17 @@
    1.47    // Fold 128 bits in xmm1 down into 32 bits in crc register.
    1.48    BIND(L_fold_128b);
    1.49    movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr()));
    1.50 -  vpclmulqdq(xmm2, xmm0, xmm1, 0x1);
    1.51 -  vpand(xmm3, xmm0, xmm2, false /* vector256 */);
    1.52 -  vpclmulqdq(xmm0, xmm0, xmm3, 0x1);
    1.53 +  if (UseAVX > 0) {
    1.54 +    vpclmulqdq(xmm2, xmm0, xmm1, 0x1);
    1.55 +    vpand(xmm3, xmm0, xmm2, false /* vector256 */);
    1.56 +    vpclmulqdq(xmm0, xmm0, xmm3, 0x1);
    1.57 +  } else {
    1.58 +    movdqa(xmm2, xmm0);
    1.59 +    pclmulqdq(xmm2, xmm1, 0x1);
    1.60 +    movdqa(xmm3, xmm0);
    1.61 +    pand(xmm3, xmm2);
    1.62 +    pclmulqdq(xmm0, xmm3, 0x1);
    1.63 +  }
    1.64    psrldq(xmm1, 8);
    1.65    psrldq(xmm2, 4);
    1.66    pxor(xmm0, xmm1);

mercurial