1.1 --- a/src/cpu/x86/vm/macroAssembler_x86.cpp Tue Aug 12 15:17:46 2014 +0000 1.2 +++ b/src/cpu/x86/vm/macroAssembler_x86.cpp Tue Aug 05 15:02:10 2014 -0700 1.3 @@ -7316,17 +7316,34 @@ 1.4 * Fold 128-bit data chunk 1.5 */ 1.6 void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset) { 1.7 - vpclmulhdq(xtmp, xK, xcrc); // [123:64] 1.8 - vpclmulldq(xcrc, xK, xcrc); // [63:0] 1.9 - vpxor(xcrc, xcrc, Address(buf, offset), false /* vector256 */); 1.10 - pxor(xcrc, xtmp); 1.11 + if (UseAVX > 0) { 1.12 + vpclmulhdq(xtmp, xK, xcrc); // [123:64] 1.13 + vpclmulldq(xcrc, xK, xcrc); // [63:0] 1.14 + vpxor(xcrc, xcrc, Address(buf, offset), false /* vector256 */); 1.15 + pxor(xcrc, xtmp); 1.16 + } else { 1.17 + movdqa(xtmp, xcrc); 1.18 + pclmulhdq(xtmp, xK); // [123:64] 1.19 + pclmulldq(xcrc, xK); // [63:0] 1.20 + pxor(xcrc, xtmp); 1.21 + movdqu(xtmp, Address(buf, offset)); 1.22 + pxor(xcrc, xtmp); 1.23 + } 1.24 } 1.25 1.26 void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf) { 1.27 - vpclmulhdq(xtmp, xK, xcrc); 1.28 - vpclmulldq(xcrc, xK, xcrc); 1.29 - pxor(xcrc, xbuf); 1.30 - pxor(xcrc, xtmp); 1.31 + if (UseAVX > 0) { 1.32 + vpclmulhdq(xtmp, xK, xcrc); 1.33 + vpclmulldq(xcrc, xK, xcrc); 1.34 + pxor(xcrc, xbuf); 1.35 + pxor(xcrc, xtmp); 1.36 + } else { 1.37 + movdqa(xtmp, xcrc); 1.38 + pclmulhdq(xtmp, xK); 1.39 + pclmulldq(xcrc, xK); 1.40 + pxor(xcrc, xbuf); 1.41 + pxor(xcrc, xtmp); 1.42 + } 1.43 } 1.44 1.45 /** 1.46 @@ -7444,9 +7461,17 @@ 1.47 // Fold 128 bits in xmm1 down into 32 bits in crc register. 1.48 BIND(L_fold_128b); 1.49 movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr())); 1.50 - vpclmulqdq(xmm2, xmm0, xmm1, 0x1); 1.51 - vpand(xmm3, xmm0, xmm2, false /* vector256 */); 1.52 - vpclmulqdq(xmm0, xmm0, xmm3, 0x1); 1.53 + if (UseAVX > 0) { 1.54 + vpclmulqdq(xmm2, xmm0, xmm1, 0x1); 1.55 + vpand(xmm3, xmm0, xmm2, false /* vector256 */); 1.56 + vpclmulqdq(xmm0, xmm0, xmm3, 0x1); 1.57 + } else { 1.58 + movdqa(xmm2, xmm0); 1.59 + pclmulqdq(xmm2, xmm1, 0x1); 1.60 + movdqa(xmm3, xmm0); 1.61 + pand(xmm3, xmm2); 1.62 + pclmulqdq(xmm0, xmm3, 0x1); 1.63 + } 1.64 psrldq(xmm1, 8); 1.65 psrldq(xmm2, 4); 1.66 pxor(xmm0, xmm1);