src/cpu/x86/vm/stubGenerator_x86_64.cpp

changeset 9788
44ef77ad417c
parent 8877
f04097176542
child 9806
758c07667682
equal deleted inserted replaced
9787:9f28a4cac6d9 9788:44ef77ad417c
3637 __ jmp(L_exit); 3637 __ jmp(L_exit);
3638 3638
3639 return start; 3639 return start;
3640 } 3640 }
3641 3641
3642
3643 // byte swap x86 long
3644 address generate_ghash_long_swap_mask() {
3645 __ align(CodeEntryAlignment);
3646 StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask");
3647 address start = __ pc();
3648 __ emit_data64(0x0f0e0d0c0b0a0908, relocInfo::none );
3649 __ emit_data64(0x0706050403020100, relocInfo::none );
3650 return start;
3651 }
3652
3653 // byte swap x86 byte array
3654 address generate_ghash_byte_swap_mask() {
3655 __ align(CodeEntryAlignment);
3656 StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask");
3657 address start = __ pc();
3658 __ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none );
3659 __ emit_data64(0x0001020304050607, relocInfo::none );
3660 return start;
3661 }
3662
3663 /* Single and multi-block ghash operations */
3664 address generate_ghash_processBlocks() {
3665 __ align(CodeEntryAlignment);
3666 Label L_ghash_loop, L_exit;
3667 StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
3668 address start = __ pc();
3669
3670 const Register state = c_rarg0;
3671 const Register subkeyH = c_rarg1;
3672 const Register data = c_rarg2;
3673 const Register blocks = c_rarg3;
3674
3675 #ifdef _WIN64
3676 const int XMM_REG_LAST = 10;
3677 #endif
3678
3679 const XMMRegister xmm_temp0 = xmm0;
3680 const XMMRegister xmm_temp1 = xmm1;
3681 const XMMRegister xmm_temp2 = xmm2;
3682 const XMMRegister xmm_temp3 = xmm3;
3683 const XMMRegister xmm_temp4 = xmm4;
3684 const XMMRegister xmm_temp5 = xmm5;
3685 const XMMRegister xmm_temp6 = xmm6;
3686 const XMMRegister xmm_temp7 = xmm7;
3687 const XMMRegister xmm_temp8 = xmm8;
3688 const XMMRegister xmm_temp9 = xmm9;
3689 const XMMRegister xmm_temp10 = xmm10;
3690
3691 __ enter();
3692
3693 #ifdef _WIN64
3694 // save the xmm registers which must be preserved 6-10
3695 __ subptr(rsp, -rsp_after_call_off * wordSize);
3696 for (int i = 6; i <= XMM_REG_LAST; i++) {
3697 __ movdqu(xmm_save(i), as_XMMRegister(i));
3698 }
3699 #endif
3700
3701 __ movdqu(xmm_temp10, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
3702
3703 __ movdqu(xmm_temp0, Address(state, 0));
3704 __ pshufb(xmm_temp0, xmm_temp10);
3705
3706
3707 __ BIND(L_ghash_loop);
3708 __ movdqu(xmm_temp2, Address(data, 0));
3709 __ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
3710
3711 __ movdqu(xmm_temp1, Address(subkeyH, 0));
3712 __ pshufb(xmm_temp1, xmm_temp10);
3713
3714 __ pxor(xmm_temp0, xmm_temp2);
3715
3716 //
3717 // Multiply with the hash key
3718 //
3719 __ movdqu(xmm_temp3, xmm_temp0);
3720 __ pclmulqdq(xmm_temp3, xmm_temp1, 0); // xmm3 holds a0*b0
3721 __ movdqu(xmm_temp4, xmm_temp0);
3722 __ pclmulqdq(xmm_temp4, xmm_temp1, 16); // xmm4 holds a0*b1
3723
3724 __ movdqu(xmm_temp5, xmm_temp0);
3725 __ pclmulqdq(xmm_temp5, xmm_temp1, 1); // xmm5 holds a1*b0
3726 __ movdqu(xmm_temp6, xmm_temp0);
3727 __ pclmulqdq(xmm_temp6, xmm_temp1, 17); // xmm6 holds a1*b1
3728
3729 __ pxor(xmm_temp4, xmm_temp5); // xmm4 holds a0*b1 + a1*b0
3730
3731 __ movdqu(xmm_temp5, xmm_temp4); // move the contents of xmm4 to xmm5
3732 __ psrldq(xmm_temp4, 8); // shift by xmm4 64 bits to the right
3733 __ pslldq(xmm_temp5, 8); // shift by xmm5 64 bits to the left
3734 __ pxor(xmm_temp3, xmm_temp5);
3735 __ pxor(xmm_temp6, xmm_temp4); // Register pair <xmm6:xmm3> holds the result
3736 // of the carry-less multiplication of
3737 // xmm0 by xmm1.
3738
3739 // We shift the result of the multiplication by one bit position
3740 // to the left to cope for the fact that the bits are reversed.
3741 __ movdqu(xmm_temp7, xmm_temp3);
3742 __ movdqu(xmm_temp8, xmm_temp6);
3743 __ pslld(xmm_temp3, 1);
3744 __ pslld(xmm_temp6, 1);
3745 __ psrld(xmm_temp7, 31);
3746 __ psrld(xmm_temp8, 31);
3747 __ movdqu(xmm_temp9, xmm_temp7);
3748 __ pslldq(xmm_temp8, 4);
3749 __ pslldq(xmm_temp7, 4);
3750 __ psrldq(xmm_temp9, 12);
3751 __ por(xmm_temp3, xmm_temp7);
3752 __ por(xmm_temp6, xmm_temp8);
3753 __ por(xmm_temp6, xmm_temp9);
3754
3755 //
3756 // First phase of the reduction
3757 //
3758 // Move xmm3 into xmm7, xmm8, xmm9 in order to perform the shifts
3759 // independently.
3760 __ movdqu(xmm_temp7, xmm_temp3);
3761 __ movdqu(xmm_temp8, xmm_temp3);
3762 __ movdqu(xmm_temp9, xmm_temp3);
3763 __ pslld(xmm_temp7, 31); // packed right shift shifting << 31
3764 __ pslld(xmm_temp8, 30); // packed right shift shifting << 30
3765 __ pslld(xmm_temp9, 25); // packed right shift shifting << 25
3766 __ pxor(xmm_temp7, xmm_temp8); // xor the shifted versions
3767 __ pxor(xmm_temp7, xmm_temp9);
3768 __ movdqu(xmm_temp8, xmm_temp7);
3769 __ pslldq(xmm_temp7, 12);
3770 __ psrldq(xmm_temp8, 4);
3771 __ pxor(xmm_temp3, xmm_temp7); // first phase of the reduction complete
3772
3773 //
3774 // Second phase of the reduction
3775 //
3776 // Make 3 copies of xmm3 in xmm2, xmm4, xmm5 for doing these
3777 // shift operations.
3778 __ movdqu(xmm_temp2, xmm_temp3);
3779 __ movdqu(xmm_temp4, xmm_temp3);
3780 __ movdqu(xmm_temp5, xmm_temp3);
3781 __ psrld(xmm_temp2, 1); // packed left shifting >> 1
3782 __ psrld(xmm_temp4, 2); // packed left shifting >> 2
3783 __ psrld(xmm_temp5, 7); // packed left shifting >> 7
3784 __ pxor(xmm_temp2, xmm_temp4); // xor the shifted versions
3785 __ pxor(xmm_temp2, xmm_temp5);
3786 __ pxor(xmm_temp2, xmm_temp8);
3787 __ pxor(xmm_temp3, xmm_temp2);
3788 __ pxor(xmm_temp6, xmm_temp3); // the result is in xmm6
3789
3790 __ decrement(blocks);
3791 __ jcc(Assembler::zero, L_exit);
3792 __ movdqu(xmm_temp0, xmm_temp6);
3793 __ addptr(data, 16);
3794 __ jmp(L_ghash_loop);
3795
3796 __ BIND(L_exit);
3797 __ pshufb(xmm_temp6, xmm_temp10); // Byte swap 16-byte result
3798 __ movdqu(Address(state, 0), xmm_temp6); // store the result
3799
3800 #ifdef _WIN64
3801 // restore xmm regs belonging to calling function
3802 for (int i = 6; i <= XMM_REG_LAST; i++) {
3803 __ movdqu(as_XMMRegister(i), xmm_save(i));
3804 }
3805 #endif
3806 __ leave();
3807 __ ret(0);
3808 return start;
3809 }
3810
3642 /** 3811 /**
3643 * Arguments: 3812 * Arguments:
3644 * 3813 *
3645 * Inputs: 3814 * Inputs:
3646 * c_rarg0 - int crc 3815 * c_rarg0 - int crc
4075 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); 4244 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
4076 StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); 4245 StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
4077 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel(); 4246 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
4078 } 4247 }
4079 4248
4249 // Generate GHASH intrinsics code
4250 if (UseGHASHIntrinsics) {
4251 StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
4252 StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
4253 StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
4254 }
4255
4080 // Safefetch stubs. 4256 // Safefetch stubs.
4081 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, 4257 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
4082 &StubRoutines::_safefetch32_fault_pc, 4258 &StubRoutines::_safefetch32_fault_pc,
4083 &StubRoutines::_safefetch32_continuation_pc); 4259 &StubRoutines::_safefetch32_continuation_pc);
4084 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, 4260 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,

mercurial