3637 __ jmp(L_exit); |
3637 __ jmp(L_exit); |
3638 |
3638 |
3639 return start; |
3639 return start; |
3640 } |
3640 } |
3641 |
3641 |
|
3642 |
|
3643 // byte swap x86 long |
|
3644 address generate_ghash_long_swap_mask() { |
|
3645 __ align(CodeEntryAlignment); |
|
3646 StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask"); |
|
3647 address start = __ pc(); |
|
3648 __ emit_data64(0x0f0e0d0c0b0a0908, relocInfo::none ); |
|
3649 __ emit_data64(0x0706050403020100, relocInfo::none ); |
|
3650 return start; |
|
3651 } |
|
3652 |
|
3653 // byte swap x86 byte array |
|
3654 address generate_ghash_byte_swap_mask() { |
|
3655 __ align(CodeEntryAlignment); |
|
3656 StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask"); |
|
3657 address start = __ pc(); |
|
3658 __ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none ); |
|
3659 __ emit_data64(0x0001020304050607, relocInfo::none ); |
|
3660 return start; |
|
3661 } |
|
3662 |
|
3663 /* Single and multi-block ghash operations */ |
|
3664 address generate_ghash_processBlocks() { |
|
3665 __ align(CodeEntryAlignment); |
|
3666 Label L_ghash_loop, L_exit; |
|
3667 StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks"); |
|
3668 address start = __ pc(); |
|
3669 |
|
3670 const Register state = c_rarg0; |
|
3671 const Register subkeyH = c_rarg1; |
|
3672 const Register data = c_rarg2; |
|
3673 const Register blocks = c_rarg3; |
|
3674 |
|
3675 #ifdef _WIN64 |
|
3676 const int XMM_REG_LAST = 10; |
|
3677 #endif |
|
3678 |
|
3679 const XMMRegister xmm_temp0 = xmm0; |
|
3680 const XMMRegister xmm_temp1 = xmm1; |
|
3681 const XMMRegister xmm_temp2 = xmm2; |
|
3682 const XMMRegister xmm_temp3 = xmm3; |
|
3683 const XMMRegister xmm_temp4 = xmm4; |
|
3684 const XMMRegister xmm_temp5 = xmm5; |
|
3685 const XMMRegister xmm_temp6 = xmm6; |
|
3686 const XMMRegister xmm_temp7 = xmm7; |
|
3687 const XMMRegister xmm_temp8 = xmm8; |
|
3688 const XMMRegister xmm_temp9 = xmm9; |
|
3689 const XMMRegister xmm_temp10 = xmm10; |
|
3690 |
|
3691 __ enter(); |
|
3692 |
|
3693 #ifdef _WIN64 |
|
3694 // save the xmm registers which must be preserved 6-10 |
|
3695 __ subptr(rsp, -rsp_after_call_off * wordSize); |
|
3696 for (int i = 6; i <= XMM_REG_LAST; i++) { |
|
3697 __ movdqu(xmm_save(i), as_XMMRegister(i)); |
|
3698 } |
|
3699 #endif |
|
3700 |
|
3701 __ movdqu(xmm_temp10, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); |
|
3702 |
|
3703 __ movdqu(xmm_temp0, Address(state, 0)); |
|
3704 __ pshufb(xmm_temp0, xmm_temp10); |
|
3705 |
|
3706 |
|
3707 __ BIND(L_ghash_loop); |
|
3708 __ movdqu(xmm_temp2, Address(data, 0)); |
|
3709 __ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr())); |
|
3710 |
|
3711 __ movdqu(xmm_temp1, Address(subkeyH, 0)); |
|
3712 __ pshufb(xmm_temp1, xmm_temp10); |
|
3713 |
|
3714 __ pxor(xmm_temp0, xmm_temp2); |
|
3715 |
|
3716 // |
|
3717 // Multiply with the hash key |
|
3718 // |
|
3719 __ movdqu(xmm_temp3, xmm_temp0); |
|
3720 __ pclmulqdq(xmm_temp3, xmm_temp1, 0); // xmm3 holds a0*b0 |
|
3721 __ movdqu(xmm_temp4, xmm_temp0); |
|
3722 __ pclmulqdq(xmm_temp4, xmm_temp1, 16); // xmm4 holds a0*b1 |
|
3723 |
|
3724 __ movdqu(xmm_temp5, xmm_temp0); |
|
3725 __ pclmulqdq(xmm_temp5, xmm_temp1, 1); // xmm5 holds a1*b0 |
|
3726 __ movdqu(xmm_temp6, xmm_temp0); |
|
3727 __ pclmulqdq(xmm_temp6, xmm_temp1, 17); // xmm6 holds a1*b1 |
|
3728 |
|
3729 __ pxor(xmm_temp4, xmm_temp5); // xmm4 holds a0*b1 + a1*b0 |
|
3730 |
|
3731 __ movdqu(xmm_temp5, xmm_temp4); // move the contents of xmm4 to xmm5 |
|
3732 __ psrldq(xmm_temp4, 8); // shift by xmm4 64 bits to the right |
|
3733 __ pslldq(xmm_temp5, 8); // shift by xmm5 64 bits to the left |
|
3734 __ pxor(xmm_temp3, xmm_temp5); |
|
3735 __ pxor(xmm_temp6, xmm_temp4); // Register pair <xmm6:xmm3> holds the result |
|
3736 // of the carry-less multiplication of |
|
3737 // xmm0 by xmm1. |
|
3738 |
|
3739 // We shift the result of the multiplication by one bit position |
|
3740 // to the left to cope for the fact that the bits are reversed. |
|
3741 __ movdqu(xmm_temp7, xmm_temp3); |
|
3742 __ movdqu(xmm_temp8, xmm_temp6); |
|
3743 __ pslld(xmm_temp3, 1); |
|
3744 __ pslld(xmm_temp6, 1); |
|
3745 __ psrld(xmm_temp7, 31); |
|
3746 __ psrld(xmm_temp8, 31); |
|
3747 __ movdqu(xmm_temp9, xmm_temp7); |
|
3748 __ pslldq(xmm_temp8, 4); |
|
3749 __ pslldq(xmm_temp7, 4); |
|
3750 __ psrldq(xmm_temp9, 12); |
|
3751 __ por(xmm_temp3, xmm_temp7); |
|
3752 __ por(xmm_temp6, xmm_temp8); |
|
3753 __ por(xmm_temp6, xmm_temp9); |
|
3754 |
|
3755 // |
|
3756 // First phase of the reduction |
|
3757 // |
|
3758 // Move xmm3 into xmm7, xmm8, xmm9 in order to perform the shifts |
|
3759 // independently. |
|
3760 __ movdqu(xmm_temp7, xmm_temp3); |
|
3761 __ movdqu(xmm_temp8, xmm_temp3); |
|
3762 __ movdqu(xmm_temp9, xmm_temp3); |
|
3763 __ pslld(xmm_temp7, 31); // packed right shift shifting << 31 |
|
3764 __ pslld(xmm_temp8, 30); // packed right shift shifting << 30 |
|
3765 __ pslld(xmm_temp9, 25); // packed right shift shifting << 25 |
|
3766 __ pxor(xmm_temp7, xmm_temp8); // xor the shifted versions |
|
3767 __ pxor(xmm_temp7, xmm_temp9); |
|
3768 __ movdqu(xmm_temp8, xmm_temp7); |
|
3769 __ pslldq(xmm_temp7, 12); |
|
3770 __ psrldq(xmm_temp8, 4); |
|
3771 __ pxor(xmm_temp3, xmm_temp7); // first phase of the reduction complete |
|
3772 |
|
3773 // |
|
3774 // Second phase of the reduction |
|
3775 // |
|
3776 // Make 3 copies of xmm3 in xmm2, xmm4, xmm5 for doing these |
|
3777 // shift operations. |
|
3778 __ movdqu(xmm_temp2, xmm_temp3); |
|
3779 __ movdqu(xmm_temp4, xmm_temp3); |
|
3780 __ movdqu(xmm_temp5, xmm_temp3); |
|
3781 __ psrld(xmm_temp2, 1); // packed left shifting >> 1 |
|
3782 __ psrld(xmm_temp4, 2); // packed left shifting >> 2 |
|
3783 __ psrld(xmm_temp5, 7); // packed left shifting >> 7 |
|
3784 __ pxor(xmm_temp2, xmm_temp4); // xor the shifted versions |
|
3785 __ pxor(xmm_temp2, xmm_temp5); |
|
3786 __ pxor(xmm_temp2, xmm_temp8); |
|
3787 __ pxor(xmm_temp3, xmm_temp2); |
|
3788 __ pxor(xmm_temp6, xmm_temp3); // the result is in xmm6 |
|
3789 |
|
3790 __ decrement(blocks); |
|
3791 __ jcc(Assembler::zero, L_exit); |
|
3792 __ movdqu(xmm_temp0, xmm_temp6); |
|
3793 __ addptr(data, 16); |
|
3794 __ jmp(L_ghash_loop); |
|
3795 |
|
3796 __ BIND(L_exit); |
|
3797 __ pshufb(xmm_temp6, xmm_temp10); // Byte swap 16-byte result |
|
3798 __ movdqu(Address(state, 0), xmm_temp6); // store the result |
|
3799 |
|
3800 #ifdef _WIN64 |
|
3801 // restore xmm regs belonging to calling function |
|
3802 for (int i = 6; i <= XMM_REG_LAST; i++) { |
|
3803 __ movdqu(as_XMMRegister(i), xmm_save(i)); |
|
3804 } |
|
3805 #endif |
|
3806 __ leave(); |
|
3807 __ ret(0); |
|
3808 return start; |
|
3809 } |
|
3810 |
3642 /** |
3811 /** |
3643 * Arguments: |
3812 * Arguments: |
3644 * |
3813 * |
3645 * Inputs: |
3814 * Inputs: |
3646 * c_rarg0 - int crc |
3815 * c_rarg0 - int crc |